blender/intern/cycles/kernel/kernel_compat_cpu.h

/*
 * Copyright 2011, Blender Foundation.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#ifndef __KERNEL_COMPAT_CPU_H__
#define __KERNEL_COMPAT_CPU_H__

#define __KERNEL_CPU__

#include "util_debug.h"
#include "util_math.h"
#include "util_types.h"

CCL_NAMESPACE_BEGIN

/* Assertions inside the kernel only work for the CPU device, so we wrap it in
   a macro which is empty for other devices */

#define kernel_assert(cond) assert(cond)

/* Texture types to be compatible with CUDA textures. These are really just
   simple arrays and after inlining fetch hopefully revert to being a simple
   pointer lookup. */

template<typename T> struct texture  {
	T fetch(int index)
	{
		kernel_assert(index >= 0 && index < width);
		return data[index];
	}

	/*__m128 fetch_m128(int index)
	{
		kernel_assert(index >= 0 && index < width);
		return ((__m128*)data)[index];
	}

	__m128i fetch_m128i(int index)
	{
		kernel_assert(index >= 0 && index < width);
		return ((__m128i*)data)[index];
	}*/

	float interp(float x, int size)
	{
		kernel_assert(size == width);

		x = clamp(x, 0.0f, 1.0f)*width;

		int index = min((int)x, width-1);
		int nindex = min(index+1, width-1);
		float t = x - index;

		return (1.0f - t)*data[index] + t*data[nindex];
	}

	T *data;
	int width;
};

template<typename T> struct texture_image  {
	float4 read(float4 r)
	{
		return r;
	}

	float4 read(uchar4 r)
	{
		float f = 1.0f/255.0f;
		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
	}

	int wrap_periodic(int x, int width)
	{
		x %= width;
		if(x < 0)
			x += width;
		return x;
	}

	int wrap_clamp(int x, int width)
	{
		return clamp(x, 0, width-1);
	}

	float frac(float x, int *ix)
	{
		int i = (int)x - ((x < 0.0f)? 1: 0);
		*ix = i;
		return x - (float)i;
	}

	float4 interp(float x, float y, bool periodic = true)
	{
		if(!data)
			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);

		int ix, iy, nix, niy;
		float tx = frac(x*width, &ix);
		float ty = frac(y*height, &iy);

		if(periodic) {
			ix = wrap_periodic(ix, width);
			iy = wrap_periodic(iy, height);

			nix = wrap_periodic(ix+1, width);
			niy = wrap_periodic(iy+1, height);
		}
		else {
			ix = wrap_clamp(ix, width);
			iy = wrap_clamp(iy, height);

			nix = wrap_clamp(ix+1, width);
			niy = wrap_clamp(iy+1, height);
		}

		float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
		r += (1.0f - ty)*tx*read(data[nix + iy*width]);
		r += ty*(1.0f - tx)*read(data[ix + niy*width]);
		r += ty*tx*read(data[nix + niy*width]);

		return r;
	}

	T *data;
	int width, height;
};

typedef texture<float4> texture_float4;
typedef texture<float> texture_float;
typedef texture<uint> texture_uint;
typedef texture<int> texture_int;
typedef texture<uint4> texture_uint4;
typedef texture_image<float4> texture_image_float4;
typedef texture_image<uchar4> texture_image_uchar4;

/* Macros to handle different memory storage on different devices */

#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
#define kernel_tex_fetch_m128(tex, index) (kg->tex.fetch_m128(index))
#define kernel_tex_fetch_m128i(tex, index) (kg->tex.fetch_m128i(index))
#define kernel_tex_interp(tex, t, size) (kg->tex.interp(t, size))
#define kernel_tex_image_interp(tex, x, y) (kg->tex.interp(x, y))

#define kernel_data (kg->__data)

CCL_NAMESPACE_END

#endif /* __KERNEL_COMPAT_CPU_H__ */
Cycles render engine, initial commit. This is the engine itself, blender modifications and build instructions will follow later. Cycles uses code from some great open source projects, many thanks them: * BVH building and traversal code from NVidia's "Understanding the Efficiency of Ray Traversal on GPUs": http://code.google.com/p/understanding-the-efficiency-of-ray-traversal-on-gpus/ * Open Shading Language for a large part of the shading system: http://code.google.com/p/openshadinglanguage/ * Blender for procedural textures and a few other nodes. * Approximate Catmull Clark subdivision from NVidia Mesh tools: http://code.google.com/p/nvidia-mesh-tools/ * Sobol direction vectors from: http://web.maths.unsw.edu.au/~fkuo/sobol/ * Film response functions from: http://www.cs.columbia.edu/CAVE/software/softlib/dorf.php 2011-04-27 11:58:34 +00:00			`/*`
			`* Copyright 2011, Blender Foundation.`
			`*`
			`* This program is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU General Public License`
			`* as published by the Free Software Foundation; either version 2`
			`* of the License, or (at your option) any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program; if not, write to the Free Software Foundation,`
			`* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.`
			`*/`

			`#ifndef __KERNEL_COMPAT_CPU_H__`
			`#define __KERNEL_COMPAT_CPU_H__`

			`#define __KERNEL_CPU__`

			`#include "util_debug.h"`
			`#include "util_math.h"`
			`#include "util_types.h"`

			`CCL_NAMESPACE_BEGIN`

			`/* Assertions inside the kernel only work for the CPU device, so we wrap it in`
			`a macro which is empty for other devices */`

			`#define kernel_assert(cond) assert(cond)`

			`/* Texture types to be compatible with CUDA textures. These are really just`
			`simple arrays and after inlining fetch hopefully revert to being a simple`
			`pointer lookup. */`

			`template<typename T> struct texture {`
			`T fetch(int index)`
			`{`
			`kernel_assert(index >= 0 && index < width);`
			`return data[index];`
			`}`

Cycles: * Fix excessive fireflies in Velvet BSDF (patch by David). * Disable some unused SSE code * Remove RTTI disabling flags for now, this is giving some compile issues and was only needed of OSL which we're not using yet. 2011-11-10 14:32:16 +00:00			`/*__m128 fetch_m128(int index)`
Cycles render engine, initial commit. This is the engine itself, blender modifications and build instructions will follow later. Cycles uses code from some great open source projects, many thanks them: * BVH building and traversal code from NVidia's "Understanding the Efficiency of Ray Traversal on GPUs": http://code.google.com/p/understanding-the-efficiency-of-ray-traversal-on-gpus/ * Open Shading Language for a large part of the shading system: http://code.google.com/p/openshadinglanguage/ * Blender for procedural textures and a few other nodes. * Approximate Catmull Clark subdivision from NVidia Mesh tools: http://code.google.com/p/nvidia-mesh-tools/ * Sobol direction vectors from: http://web.maths.unsw.edu.au/~fkuo/sobol/ * Film response functions from: http://www.cs.columbia.edu/CAVE/software/softlib/dorf.php 2011-04-27 11:58:34 +00:00			`{`
			`kernel_assert(index >= 0 && index < width);`
			`return ((__m128*)data)[index];`
			`}`

			`__m128i fetch_m128i(int index)`
			`{`
			`kernel_assert(index >= 0 && index < width);`
			`return ((__m128i*)data)[index];`
Cycles: * Fix excessive fireflies in Velvet BSDF (patch by David). * Disable some unused SSE code * Remove RTTI disabling flags for now, this is giving some compile issues and was only needed of OSL which we're not using yet. 2011-11-10 14:32:16 +00:00			`}*/`
Cycles render engine, initial commit. This is the engine itself, blender modifications and build instructions will follow later. Cycles uses code from some great open source projects, many thanks them: * BVH building and traversal code from NVidia's "Understanding the Efficiency of Ray Traversal on GPUs": http://code.google.com/p/understanding-the-efficiency-of-ray-traversal-on-gpus/ * Open Shading Language for a large part of the shading system: http://code.google.com/p/openshadinglanguage/ * Blender for procedural textures and a few other nodes. * Approximate Catmull Clark subdivision from NVidia Mesh tools: http://code.google.com/p/nvidia-mesh-tools/ * Sobol direction vectors from: http://web.maths.unsw.edu.au/~fkuo/sobol/ * Film response functions from: http://www.cs.columbia.edu/CAVE/software/softlib/dorf.php 2011-04-27 11:58:34 +00:00
Cycles: OpenCL tweaks * Reduce kernel arguments size, helps compile for apple nvidia. * Fix use of unitialized variable in displace kernel. * Use build flags in opencl kernel md5 hash. * Reorganize code for kernel feature #defines a bit. 2011-11-22 13:15:19 +00:00			`float interp(float x, int size)`
Cycles render engine, initial commit. This is the engine itself, blender modifications and build instructions will follow later. Cycles uses code from some great open source projects, many thanks them: * BVH building and traversal code from NVidia's "Understanding the Efficiency of Ray Traversal on GPUs": http://code.google.com/p/understanding-the-efficiency-of-ray-traversal-on-gpus/ * Open Shading Language for a large part of the shading system: http://code.google.com/p/openshadinglanguage/ * Blender for procedural textures and a few other nodes. * Approximate Catmull Clark subdivision from NVidia Mesh tools: http://code.google.com/p/nvidia-mesh-tools/ * Sobol direction vectors from: http://web.maths.unsw.edu.au/~fkuo/sobol/ * Film response functions from: http://www.cs.columbia.edu/CAVE/software/softlib/dorf.php 2011-04-27 11:58:34 +00:00			`{`
Cycles: OpenCL tweaks * Reduce kernel arguments size, helps compile for apple nvidia. * Fix use of unitialized variable in displace kernel. * Use build flags in opencl kernel md5 hash. * Reorganize code for kernel feature #defines a bit. 2011-11-22 13:15:19 +00:00			`kernel_assert(size == width);`

Cycles render engine, initial commit. This is the engine itself, blender modifications and build instructions will follow later. Cycles uses code from some great open source projects, many thanks them: * BVH building and traversal code from NVidia's "Understanding the Efficiency of Ray Traversal on GPUs": http://code.google.com/p/understanding-the-efficiency-of-ray-traversal-on-gpus/ * Open Shading Language for a large part of the shading system: http://code.google.com/p/openshadinglanguage/ * Blender for procedural textures and a few other nodes. * Approximate Catmull Clark subdivision from NVidia Mesh tools: http://code.google.com/p/nvidia-mesh-tools/ * Sobol direction vectors from: http://web.maths.unsw.edu.au/~fkuo/sobol/ * Film response functions from: http://www.cs.columbia.edu/CAVE/software/softlib/dorf.php 2011-04-27 11:58:34 +00:00			`x = clamp(x, 0.0f, 1.0f)*width;`

			`int index = min((int)x, width-1);`
			`int nindex = min(index+1, width-1);`
			`float t = x - index;`

			`return (1.0f - t)data[index] + tdata[nindex];`
			`}`

			`T *data;`
			`int width;`
			`};`

			`template<typename T> struct texture_image {`
			`float4 read(float4 r)`
			`{`
			`return r;`
			`}`

			`float4 read(uchar4 r)`
			`{`
			`float f = 1.0f/255.0f;`
			`return make_float4(r.xf, r.yf, r.zf, r.wf);`
			`}`

			`int wrap_periodic(int x, int width)`
			`{`
			`x %= width;`
			`if(x < 0)`
			`x += width;`
			`return x;`
			`}`

			`int wrap_clamp(int x, int width)`
			`{`
			`return clamp(x, 0, width-1);`
			`}`

			`float frac(float x, int *ix)`
			`{`
			`int i = (int)x - ((x < 0.0f)? 1: 0);`
			`*ix = i;`
			`return x - (float)i;`
			`}`

			`float4 interp(float x, float y, bool periodic = true)`
			`{`
			`if(!data)`
			`return make_float4(0.0f, 0.0f, 0.0f, 0.0f);`

			`int ix, iy, nix, niy;`
			`float tx = frac(x*width, &ix);`
			`float ty = frac(y*height, &iy);`

			`if(periodic) {`
			`ix = wrap_periodic(ix, width);`
			`iy = wrap_periodic(iy, height);`

			`nix = wrap_periodic(ix+1, width);`
			`niy = wrap_periodic(iy+1, height);`
			`}`
			`else {`
			`ix = wrap_clamp(ix, width);`
			`iy = wrap_clamp(iy, height);`

			`nix = wrap_clamp(ix+1, width);`
			`niy = wrap_clamp(iy+1, height);`
			`}`

			`float4 r = (1.0f - ty)(1.0f - tx)read(data[ix + iy*width]);`
			`r += (1.0f - ty)txread(data[nix + iy*width]);`
			`r += ty(1.0f - tx)read(data[ix + niy*width]);`
			`r += tytxread(data[nix + niy*width]);`

			`return r;`
			`}`

			`T *data;`
			`int width, height;`
			`};`

			`typedef texture<float4> texture_float4;`
			`typedef texture<float> texture_float;`
			`typedef texture<uint> texture_uint;`
			`typedef texture<int> texture_int;`
			`typedef texture<uint4> texture_uint4;`
			`typedef texture_image<float4> texture_image_float4;`
			`typedef texture_image<uchar4> texture_image_uchar4;`

			`/* Macros to handle different memory storage on different devices */`

			`#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))`
			`#define kernel_tex_fetch_m128(tex, index) (kg->tex.fetch_m128(index))`
			`#define kernel_tex_fetch_m128i(tex, index) (kg->tex.fetch_m128i(index))`
Cycles: OpenCL tweaks * Reduce kernel arguments size, helps compile for apple nvidia. * Fix use of unitialized variable in displace kernel. * Use build flags in opencl kernel md5 hash. * Reorganize code for kernel feature #defines a bit. 2011-11-22 13:15:19 +00:00			`#define kernel_tex_interp(tex, t, size) (kg->tex.interp(t, size))`
Cycles render engine, initial commit. This is the engine itself, blender modifications and build instructions will follow later. Cycles uses code from some great open source projects, many thanks them: * BVH building and traversal code from NVidia's "Understanding the Efficiency of Ray Traversal on GPUs": http://code.google.com/p/understanding-the-efficiency-of-ray-traversal-on-gpus/ * Open Shading Language for a large part of the shading system: http://code.google.com/p/openshadinglanguage/ * Blender for procedural textures and a few other nodes. * Approximate Catmull Clark subdivision from NVidia Mesh tools: http://code.google.com/p/nvidia-mesh-tools/ * Sobol direction vectors from: http://web.maths.unsw.edu.au/~fkuo/sobol/ * Film response functions from: http://www.cs.columbia.edu/CAVE/software/softlib/dorf.php 2011-04-27 11:58:34 +00:00			`#define kernel_tex_image_interp(tex, x, y) (kg->tex.interp(x, y))`

			`#define kernel_data (kg->__data)`

			`CCL_NAMESPACE_END`

			`#endif /* __KERNEL_COMPAT_CPU_H__ */`