2011-04-27 11:58:34 +00:00
|
|
|
/*
|
2013-08-18 14:16:15 +00:00
|
|
|
* Copyright 2011-2013 Blender Foundation
|
2011-04-27 11:58:34 +00:00
|
|
|
*
|
2013-08-18 14:16:15 +00:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
2011-04-27 11:58:34 +00:00
|
|
|
*
|
2013-08-18 14:16:15 +00:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2011-04-27 11:58:34 +00:00
|
|
|
*
|
2013-08-18 14:16:15 +00:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
2014-12-25 01:50:24 +00:00
|
|
|
* limitations under the License.
|
2011-04-27 11:58:34 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __UTIL_MATH_H__
|
|
|
|
#define __UTIL_MATH_H__
|
|
|
|
|
|
|
|
/* Math
|
|
|
|
*
|
|
|
|
* Basic math functions on scalar and vector types. This header is used by
|
|
|
|
* both the kernel code when compiled as C++, and other C++ non-kernel code. */
|
|
|
|
|
2017-01-20 10:55:48 +00:00
|
|
|
#ifndef __KERNEL_GPU__
|
|
|
|
# include <cmath>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
|
|
|
#include <float.h>
|
|
|
|
#include <math.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "util_types.h"
|
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
2012-04-11 09:07:28 +00:00
|
|
|
/* Float Pi variations */
|
|
|
|
|
2013-05-12 14:13:29 +00:00
|
|
|
/* Division */
|
2011-09-02 00:10:03 +00:00
|
|
|
#ifndef M_PI_F
|
2017-03-10 23:55:23 +00:00
|
|
|
#define M_PI_F (3.1415926535897932f) /* pi */
|
2011-09-02 00:10:03 +00:00
|
|
|
#endif
|
|
|
|
#ifndef M_PI_2_F
|
2017-03-10 23:55:23 +00:00
|
|
|
#define M_PI_2_F (1.5707963267948966f) /* pi/2 */
|
2011-09-02 00:10:03 +00:00
|
|
|
#endif
|
|
|
|
#ifndef M_PI_4_F
|
2017-03-10 23:55:23 +00:00
|
|
|
#define M_PI_4_F (0.7853981633974830f) /* pi/4 */
|
2011-09-02 00:10:03 +00:00
|
|
|
#endif
|
|
|
|
#ifndef M_1_PI_F
|
2017-03-10 23:55:23 +00:00
|
|
|
#define M_1_PI_F (0.3183098861837067f) /* 1/pi */
|
2011-09-02 00:10:03 +00:00
|
|
|
#endif
|
|
|
|
#ifndef M_2_PI_F
|
2017-03-10 23:55:23 +00:00
|
|
|
#define M_2_PI_F (0.6366197723675813f) /* 2/pi */
|
2011-08-09 18:53:54 +00:00
|
|
|
#endif
|
2013-05-12 14:13:29 +00:00
|
|
|
|
|
|
|
/* Multiplication */
|
|
|
|
#ifndef M_2PI_F
|
2017-03-10 23:55:23 +00:00
|
|
|
#define M_2PI_F (6.2831853071795864f) /* 2*pi */
|
2013-05-12 14:13:29 +00:00
|
|
|
#endif
|
|
|
|
#ifndef M_4PI_F
|
2017-03-10 23:55:23 +00:00
|
|
|
#define M_4PI_F (12.566370614359172f) /* 4*pi */
|
2013-05-12 14:13:29 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Float sqrt variations */
|
|
|
|
|
2012-04-30 12:49:26 +00:00
|
|
|
#ifndef M_SQRT2_F
|
2017-03-10 23:55:23 +00:00
|
|
|
#define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */
|
2012-04-30 12:49:26 +00:00
|
|
|
#endif
|
|
|
|
|
2015-02-06 10:40:07 +00:00
|
|
|
#ifndef M_LN2_F
|
2017-03-10 23:55:23 +00:00
|
|
|
#define M_LN2_F (0.6931471805599453f) /* ln(2) */
|
2015-02-06 10:40:07 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef M_LN10_F
|
2017-03-10 23:55:23 +00:00
|
|
|
#define M_LN10_F (2.3025850929940457f) /* ln(10) */
|
2015-02-06 10:40:07 +00:00
|
|
|
#endif
|
2011-08-09 18:53:54 +00:00
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
/* Scalar */
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
2012-04-11 09:07:28 +00:00
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float fmaxf(float a, float b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (a > b)? a: b;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float fminf(float a, float b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (a < b)? a: b;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2012-04-11 09:07:28 +00:00
|
|
|
#endif
|
|
|
|
|
2011-04-27 17:23:37 +00:00
|
|
|
#ifndef __KERNEL_GPU__
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2017-01-20 10:55:48 +00:00
|
|
|
using std::isfinite;
|
|
|
|
using std::isnan;
|
|
|
|
|
2016-04-15 13:29:12 +00:00
|
|
|
ccl_device_inline int abs(int x)
|
|
|
|
{
|
|
|
|
return (x > 0)? x: -x;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int max(int a, int b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (a > b)? a: b;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int min(int a, int b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (a < b)? a: b;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float max(float a, float b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (a > b)? a: b;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float min(float a, float b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (a < b)? a: b;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline double max(double a, double b)
|
2011-09-05 12:24:28 +00:00
|
|
|
{
|
|
|
|
return (a > b)? a: b;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline double min(double a, double b)
|
2011-09-05 12:24:28 +00:00
|
|
|
{
|
|
|
|
return (a < b)? a: b;
|
|
|
|
}
|
|
|
|
|
2014-12-16 15:27:44 +00:00
|
|
|
/* These 2 guys are templated for usage with registers data.
|
|
|
|
*
|
|
|
|
* NOTE: Since this is CPU-only functions it is ok to use references here.
|
|
|
|
* But for other devices we'll need to be careful about this.
|
|
|
|
*/
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
ccl_device_inline T min4(const T& a, const T& b, const T& c, const T& d)
|
|
|
|
{
|
|
|
|
return min(min(a,b),min(c,d));
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d)
|
|
|
|
{
|
|
|
|
return max(max(a,b),max(c,d));
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
#endif
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float min4(float a, float b, float c, float d)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2011-11-21 16:28:19 +00:00
|
|
|
return min(min(a, b), min(c, d));
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float max4(float a, float b, float c, float d)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2011-11-21 16:28:19 +00:00
|
|
|
return max(max(a, b), max(c, d));
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-29 21:47:30 +00:00
|
|
|
ccl_device_inline float max3(float3 a)
|
|
|
|
{
|
|
|
|
return max(max(a.x, a.y), a.z);
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int clamp(int a, int mn, int mx)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return min(max(a, mn), mx);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float clamp(float a, float mn, float mx)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return min(max(a, mn), mx);
|
|
|
|
}
|
|
|
|
|
2016-07-16 23:42:28 +00:00
|
|
|
ccl_device_inline float mix(float a, float b, float t)
|
|
|
|
{
|
|
|
|
return a + t*(b - a);
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
#endif
|
|
|
|
|
2015-04-27 19:13:03 +00:00
|
|
|
#ifndef __KERNEL_CUDA__
|
|
|
|
|
|
|
|
ccl_device_inline float saturate(float a)
|
|
|
|
{
|
|
|
|
return clamp(a, 0.0f, 1.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int float_to_int(float f)
|
2013-06-07 16:06:17 +00:00
|
|
|
{
|
|
|
|
return (int)f;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int floor_to_int(float f)
|
2013-06-07 16:06:17 +00:00
|
|
|
{
|
|
|
|
return float_to_int(floorf(f));
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int ceil_to_int(float f)
|
2013-06-07 16:06:17 +00:00
|
|
|
{
|
|
|
|
return float_to_int(ceilf(f));
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float signf(float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (f < 0.0f)? -1.0f: 1.0f;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float nonzerof(float f, float eps)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2014-05-07 18:53:05 +00:00
|
|
|
if(fabsf(f) < eps)
|
2011-04-27 11:58:34 +00:00
|
|
|
return signf(f)*eps;
|
|
|
|
else
|
|
|
|
return f;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float smoothstepf(float f)
|
2012-06-04 17:17:10 +00:00
|
|
|
{
|
|
|
|
float ff = f*f;
|
|
|
|
return (3.0f*ff - 2.0f*ff*f);
|
|
|
|
}
|
|
|
|
|
2016-07-16 23:42:28 +00:00
|
|
|
ccl_device_inline int mod(int x, int m)
|
|
|
|
{
|
|
|
|
return (x % m + m) % m;
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
/* Float2 Vector */
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline bool is_zero(const float2& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (a.x == 0.0f && a.y == 0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float average(const float2& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (a.x + a.y)*(1.0f/2.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator-(const float2& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(-a.x, -a.y);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator*(const float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(a.x*b.x, a.y*b.y);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator*(const float2& a, float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(a.x*f, a.y*f);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator*(float f, const float2& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(a.x*f, a.y*f);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator/(float f, const float2& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(f/a.x, f/a.y);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator/(const float2& a, float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
float invf = 1.0f/f;
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(a.x*invf, a.y*invf);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator/(const float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(a.x/b.x, a.y/b.y);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator+(const float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(a.x+b.x, a.y+b.y);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator-(const float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(a.x-b.x, a.y-b.y);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator+=(float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a + b;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator*=(float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a * b;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float2 operator*=(float2& a, float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a * f;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 operator/=(float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a / b;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float2 operator/=(float2& a, float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
float invf = 1.0f/f;
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a * invf;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float dot(const float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return a.x*b.x + a.y*b.y;
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float cross(const float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (a.x*b.y - a.y*b.x);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline bool operator==(const int2 a, const int2 b)
|
2012-09-04 13:29:07 +00:00
|
|
|
{
|
|
|
|
return (a.x == b.x && a.y == b.y);
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float len(const float2& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return sqrtf(dot(a, a));
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 normalize(const float2& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2013-06-04 17:20:00 +00:00
|
|
|
return a/len(a);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 normalize_len(const float2& a, float *t)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2013-06-04 17:20:00 +00:00
|
|
|
*t = len(a);
|
2011-04-27 11:58:34 +00:00
|
|
|
return a/(*t);
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 safe_normalize(const float2& a)
|
2014-11-08 12:35:21 +00:00
|
|
|
{
|
|
|
|
float t = len(a);
|
2015-12-01 12:53:29 +00:00
|
|
|
return (t != 0.0f)? a/t: a;
|
2014-11-08 12:35:21 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline bool operator==(const float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (a.x == b.x && a.y == b.y);
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline bool operator!=(const float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return !(a == b);
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 min(const float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(min(a.x, b.x), min(a.y, b.y));
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 max(const float2& a, const float2& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float2(max(a.x, b.x), max(a.y, b.y));
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return min(max(a, mn), mx);
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 fabs(const float2& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return make_float2(fabsf(a.x), fabsf(a.y));
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 as_float2(const float4& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return make_float2(a.x, a.y);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __KERNEL_GPU__
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline void print_float2(const char *label, const float2& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2012-06-09 17:45:22 +00:00
|
|
|
printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float2 interp(const float2& a, const float2& b, float t)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return a + t*(b - a);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Float3 Vector */
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float3 operator-(const float3& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-10-12 12:23:29 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
|
|
|
|
#else
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float3(-a.x, -a.y, -a.z);
|
2016-10-12 12:23:29 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float3 operator*(const float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-10-12 12:23:29 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return float3(_mm_mul_ps(a.m128,b.m128));
|
|
|
|
#else
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
|
2016-10-12 12:23:29 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float3 operator*(const float3& a, const float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-10-12 12:23:29 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f)));
|
|
|
|
#else
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float3(a.x*f, a.y*f, a.z*f);
|
2016-10-12 12:23:29 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float3 operator*(const float f, const float3& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-10-26 20:14:41 +00:00
|
|
|
/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
|
|
|
|
#if defined(__KERNEL_SSE__) && 0
|
|
|
|
return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
|
2016-10-12 12:23:29 +00:00
|
|
|
#else
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return make_float3(a.x*f, a.y*f, a.z*f);
|
2016-10-12 12:23:29 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float3 operator/(const float f, const float3& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-10-26 20:14:41 +00:00
|
|
|
/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
|
|
|
|
#if defined(__KERNEL_SSE__) && 0
|
|
|
|
__m128 rc = _mm_rcp_ps(a.m128);
|
|
|
|
return float3(_mm_mul_ps(_mm_set1_ps(f),rc));
|
|
|
|
#else
|
2016-10-12 12:23:29 +00:00
|
|
|
return make_float3(f / a.x, f / a.y, f / a.z);
|
2016-10-26 20:14:41 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float3 operator/(const float3& a, const float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
float invf = 1.0f/f;
|
2016-10-12 12:23:29 +00:00
|
|
|
return a * invf;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float3 operator/(const float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-10-26 20:14:41 +00:00
|
|
|
/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
|
|
|
|
#if defined(__KERNEL_SSE__) && 0
|
2016-10-12 12:23:29 +00:00
|
|
|
__m128 rc = _mm_rcp_ps(b.m128);
|
|
|
|
return float3(_mm_mul_ps(a, rc));
|
|
|
|
#else
|
|
|
|
return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float3 operator+(const float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-10-12 12:23:29 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return float3(_mm_add_ps(a.m128, b.m128));
|
|
|
|
#else
|
|
|
|
return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float3 operator-(const float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-10-12 12:23:29 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return float3(_mm_sub_ps(a.m128, b.m128));
|
|
|
|
#else
|
|
|
|
return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float3 operator+=(float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a + b;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float3 operator*=(float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a * b;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 operator*=(float3& a, float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a * f;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float3 operator/=(float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a / b;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 operator/=(float3& a, float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
float invf = 1.0f/f;
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a * invf;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float dot(const float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2013-11-22 13:16:47 +00:00
|
|
|
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
|
|
|
|
return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
|
|
|
|
#else
|
2011-04-27 11:58:34 +00:00
|
|
|
return a.x*b.x + a.y*b.y + a.z*b.z;
|
2013-11-22 13:16:47 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:23:29 +00:00
|
|
|
ccl_device_inline float dot_xy(const float3& a, const float3& b)
|
|
|
|
{
|
|
|
|
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
|
|
|
|
return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,b),b));
|
|
|
|
#else
|
|
|
|
return a.x*b.x + a.y*b.y;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float dot(const float4& a, const float4& b)
|
2014-03-29 12:03:46 +00:00
|
|
|
{
|
|
|
|
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
|
|
|
|
return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
|
|
|
|
#else
|
|
|
|
return (a.x*b.x + a.y*b.y) + (a.z*b.z + a.w*b.w);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float3 cross(const float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2011-05-31 11:31:00 +00:00
|
|
|
float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
|
2011-04-27 11:58:34 +00:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2013-06-04 17:20:00 +00:00
|
|
|
#endif
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float len(const float3 a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2013-11-22 13:16:47 +00:00
|
|
|
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
|
|
|
|
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F)));
|
|
|
|
#else
|
2011-04-27 11:58:34 +00:00
|
|
|
return sqrtf(dot(a, a));
|
2013-11-22 13:16:47 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float len_squared(const float3 a)
|
2012-05-02 09:33:45 +00:00
|
|
|
{
|
|
|
|
return dot(a, a);
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float len_squared(const float4& a)
|
2014-03-29 12:03:46 +00:00
|
|
|
{
|
|
|
|
return dot(a, a);
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float3 normalize(const float3& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-12-02 09:15:24 +00:00
|
|
|
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
|
2013-11-22 13:16:47 +00:00
|
|
|
__m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
|
|
|
|
return _mm_div_ps(a.m128, norm);
|
|
|
|
#else
|
2013-06-04 17:20:00 +00:00
|
|
|
return a/len(a);
|
2013-11-22 13:16:47 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Anisotropic and Glass BSDFs
This commit adds a new distribution to the Glossy, Anisotropic and Glass BSDFs that implements the
multiple-scattering microfacet model described in the paper "Multiple-Scattering Microfacet BSDFs with the Smith Model".
Essentially, the improvement is that unlike classical GGX, which only models single scattering and assumes
the contribution of multiple bounces to be zero, this new model performs a random walk on the microsurface until
the ray leaves it again, which ensures perfect energy conservation.
In practise, this means that the "darkening problem" - GGX materials becoming darker with increasing
roughness - is solved in a physically correct and efficient way.
The downside of this model is that it has no (known) analytic expression for evalation. However, it can be
evaluated stochastically, and although the correct PDF isn't known either, the properties of MIS and the
balance heuristic guarantee an unbiased result at the cost of slightly higher noise.
Reviewers: dingto, #cycles, brecht
Reviewed By: dingto, #cycles, brecht
Subscribers: bliblubli, ace_dragon, gregzaal, brecht, harvester, dingto, marcog, swerner, jtheninja, Blendify, nutel
Differential Revision: https://developer.blender.org/D2002
2016-06-23 20:56:43 +00:00
|
|
|
ccl_device_inline float3 saturate3(float3 a)
|
|
|
|
{
|
|
|
|
return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 normalize_len(const float3 a, float *t)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2013-06-04 17:20:00 +00:00
|
|
|
*t = len(a);
|
2016-10-12 12:23:29 +00:00
|
|
|
float x = 1.0f / *t;
|
|
|
|
return a*x;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2014-11-08 12:35:21 +00:00
|
|
|
ccl_device_inline float3 safe_normalize(const float3 a)
|
|
|
|
{
|
|
|
|
float t = len(a);
|
2016-10-12 12:23:29 +00:00
|
|
|
return (t != 0.0f)? a * (1.0f/t) : a;
|
2014-11-08 12:35:21 +00:00
|
|
|
}
|
|
|
|
|
2016-08-09 10:20:08 +00:00
|
|
|
ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
|
|
|
|
{
|
|
|
|
*t = len(a);
|
|
|
|
return (*t != 0.0f)? a/(*t): a;
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline bool operator==(const float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
|
|
|
|
#else
|
2011-04-27 11:58:34 +00:00
|
|
|
return (a.x == b.x && a.y == b.y && a.z == b.z);
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline bool operator!=(const float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return !(a == b);
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float3 min(const float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_min_ps(a.m128, b.m128);
|
|
|
|
#else
|
|
|
|
return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float3 max(const float3& a, const float3& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_max_ps(a.m128, b.m128);
|
|
|
|
#else
|
|
|
|
return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return min(max(a, mn), mx);
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float3 fabs(const float3& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
|
|
|
|
return _mm_and_ps(a.m128, mask);
|
|
|
|
#else
|
2011-04-27 11:58:34 +00:00
|
|
|
return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2011-05-20 12:26:01 +00:00
|
|
|
#endif
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 float2_to_float3(const float2 a)
|
2012-05-07 10:53:09 +00:00
|
|
|
{
|
|
|
|
return make_float3(a.x, a.y, 0.0f);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 float4_to_float3(const float4 a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return make_float3(a.x, a.y, a.z);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 float3_to_float4(const float3 a)
|
2011-12-20 12:25:45 +00:00
|
|
|
{
|
|
|
|
return make_float4(a.x, a.y, a.z, 1.0f);
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
#ifndef __KERNEL_GPU__
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline void print_float3(const char *label, const float3& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2012-06-09 17:45:22 +00:00
|
|
|
printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 rcp(const float3& a)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
float4 r = _mm_rcp_ps(a.m128);
|
|
|
|
return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
|
|
|
|
#else
|
|
|
|
return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
#endif
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 interp(float3 a, float3 b, float t)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return a + t*(b - a);
|
|
|
|
}
|
|
|
|
|
2016-07-16 23:42:28 +00:00
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float3 mix(const float3& a, const float3& b, float t)
|
2016-07-16 23:42:28 +00:00
|
|
|
{
|
|
|
|
return a + t*(b - a);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline bool is_zero(const float3 a)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return a == make_float3(0.0f);
|
|
|
|
#else
|
|
|
|
return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float reduce_add(const float3 a)
|
2012-04-28 09:10:20 +00:00
|
|
|
{
|
|
|
|
return (a.x + a.y + a.z);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float average(const float3 a)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
return reduce_add(a)*(1.0f/3.0f);
|
|
|
|
}
|
|
|
|
|
2016-07-25 01:03:23 +00:00
|
|
|
ccl_device_inline bool isequal_float3(const float3 a, const float3 b)
|
|
|
|
{
|
|
|
|
#ifdef __KERNEL_OPENCL__
|
|
|
|
return all(a == b);
|
|
|
|
#else
|
|
|
|
return a == b;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
/* Float4 Vector */
|
|
|
|
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
2011-04-27 11:58:34 +00:00
|
|
|
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
template<size_t index_0, size_t index_1, size_t index_2, size_t index_3> __forceinline const float4 shuffle(const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0)));
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2017-03-27 14:08:39 +00:00
|
|
|
#if defined(__KERNEL_SSE3__)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return _mm_moveldup_ps(b);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b)
|
|
|
|
{
|
|
|
|
return _mm_movehdup_ps(b);
|
|
|
|
}
|
2017-03-27 14:08:39 +00:00
|
|
|
#endif
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
|
|
|
|
template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b)
|
|
|
|
{
|
|
|
|
return _mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b)));
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator-(const float4& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
|
|
|
|
return _mm_xor_ps(a.m128, mask);
|
|
|
|
#else
|
|
|
|
return make_float4(-a.x, -a.y, -a.z, -a.w);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator*(const float4& a, const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-12-02 09:15:24 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return _mm_mul_ps(a.m128, b.m128);
|
|
|
|
#else
|
|
|
|
return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator*(const float4& a, float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-10-26 20:14:41 +00:00
|
|
|
#if defined(__KERNEL_SSE__)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a * make_float4(f);
|
|
|
|
#else
|
|
|
|
return make_float4(a.x*f, a.y*f, a.z*f, a.w*f);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator*(float f, const float4& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a * f;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 rcp(const float4& a)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
float4 r = _mm_rcp_ps(a.m128);
|
|
|
|
return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
|
|
|
|
#else
|
|
|
|
return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator/(const float4& a, float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a * (1.0f/f);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator/(const float4& a, const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return a * rcp(b);
|
|
|
|
#else
|
|
|
|
return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
|
|
|
|
#endif
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator+(const float4& a, const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2016-12-02 09:15:24 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return _mm_add_ps(a.m128, b.m128);
|
|
|
|
#else
|
|
|
|
return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator-(const float4& a, const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_sub_ps(a.m128, b.m128);
|
|
|
|
#else
|
|
|
|
return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator+=(float4& a, const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a + b;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator*=(float4& a, const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a * b;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 operator/=(float4& a, float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return a = a / f;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int4 operator<(const float4& a, const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_cvtps_epi32(_mm_cmplt_ps(a.m128, b.m128)); /* todo: avoid cvt */
|
|
|
|
#else
|
|
|
|
return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline int4 operator>=(const float4& a, const float4& b)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128)); /* todo: avoid cvt */
|
|
|
|
#else
|
|
|
|
return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int4 operator<=(const float4& a, const float4& b)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_cvtps_epi32(_mm_cmple_ps(a.m128, b.m128)); /* todo: avoid cvt */
|
|
|
|
#else
|
|
|
|
return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline bool operator==(const float4& a, const float4& b)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
|
|
|
|
#else
|
|
|
|
return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 cross(const float4& a, const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) - (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b));
|
|
|
|
#else
|
|
|
|
return make_float4(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f);
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline bool is_zero(const float4& a)
|
2012-04-30 12:49:26 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return a == make_float4(0.0f);
|
|
|
|
#else
|
|
|
|
return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float reduce_add(const float4& a)
|
2012-04-30 12:49:26 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
float4 h = shuffle<1,0,3,2>(a) + a;
|
|
|
|
return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */
|
|
|
|
#else
|
|
|
|
return ((a.x + a.y) + (a.z + a.w));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float average(const float4& a)
|
2012-04-30 12:49:26 +00:00
|
|
|
{
|
|
|
|
return reduce_add(a) * 0.25f;
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float len(const float4& a)
|
2012-04-30 12:49:26 +00:00
|
|
|
{
|
|
|
|
return sqrtf(dot(a, a));
|
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float4 normalize(const float4& a)
|
2012-04-30 12:49:26 +00:00
|
|
|
{
|
2013-06-04 17:20:00 +00:00
|
|
|
return a/len(a);
|
2012-04-30 12:49:26 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float4 safe_normalize(const float4& a)
|
2014-11-08 12:35:21 +00:00
|
|
|
{
|
|
|
|
float t = len(a);
|
2015-12-01 12:53:29 +00:00
|
|
|
return (t != 0.0f)? a/t: a;
|
2014-11-08 12:35:21 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float4 min(const float4& a, const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_min_ps(a.m128, b.m128);
|
|
|
|
#else
|
2011-04-27 11:58:34 +00:00
|
|
|
return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float4 max(const float4& a, const float4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_max_ps(a.m128, b.m128);
|
|
|
|
#else
|
2011-04-27 11:58:34 +00:00
|
|
|
return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __KERNEL_GPU__
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 select(const int4& mask, const float4& a, const float4& b)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a), _mm_andnot_ps(_mm_cvtepi32_ps(mask), b)); /* todo: avoid cvt */
|
|
|
|
#else
|
|
|
|
return make_float4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 reduce_min(const float4& a)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
float4 h = min(shuffle<1,0,3,2>(a), a);
|
|
|
|
return min(shuffle<2,3,0,1>(h), h);
|
|
|
|
#else
|
|
|
|
return make_float4(min(min(a.x, a.y), min(a.z, a.w)));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 reduce_max(const float4& a)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
float4 h = max(shuffle<1,0,3,2>(a), a);
|
|
|
|
return max(shuffle<2,3,0,1>(h), h);
|
|
|
|
#else
|
|
|
|
return make_float4(max(max(a.x, a.y), max(a.z, a.w)));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float4 reduce_add(const float4& a)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
float4 h = shuffle<1,0,3,2>(a) + a;
|
|
|
|
return shuffle<2,3,0,1>(h) + h;
|
|
|
|
#else
|
|
|
|
return make_float4((a.x + a.y) + (a.z + a.w));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline void print_float4(const char *label, const float4& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2012-06-09 17:45:22 +00:00
|
|
|
printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
Cycles: Adding Hilbert Spiral as a tile order for rendering
This patch adds the "Hilbert Spiral", a custom-designed continuous space-filling curve, as a tile order for rendering in Cycles.
It essentially works by dividing the tiles into tile blocks which are processed in a spiral outwards from the center. Inside each
block, the tiles are processed in a regular Hilbert curve pattern. By rotating that pattern according to the spiral direction,
a continuous curve is obtained, which helps with cache coherency and therefore rendering speed.
The curve is a compromise between the faster-rendering Bottom-to-Top etc. orders and the Center order, which is a bit slower,
but starts with the more important areas. The Hilbert Spiral also starts in the center (unless huge tiles are used) and is still
marginally slower than Bottom-to-Top, but noticeably faster than Center.
Reviewers: sergey, #cycles, dingto
Reviewed By: #cycles, dingto
Subscribers: iscream, gregzaal, sergey, mib2berlin
Differential Revision: https://developer.blender.org/D1166
2016-01-09 23:11:34 +00:00
|
|
|
/* Int2 */
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
|
|
|
ccl_device_inline int2 operator+(const int2 &a, const int2 &b)
|
|
|
|
{
|
|
|
|
return make_int2(a.x + b.x, a.y + b.y);
|
|
|
|
}
|
|
|
|
|
|
|
|
ccl_device_inline int2 operator+=(int2 &a, const int2 &b)
|
|
|
|
{
|
|
|
|
return a = a + b;
|
|
|
|
}
|
|
|
|
|
|
|
|
ccl_device_inline int2 operator-(const int2 &a, const int2 &b)
|
|
|
|
{
|
|
|
|
return make_int2(a.x - b.x, a.y - b.y);
|
|
|
|
}
|
|
|
|
|
|
|
|
ccl_device_inline int2 operator*(const int2 &a, const int2 &b)
|
|
|
|
{
|
|
|
|
return make_int2(a.x * b.x, a.y * b.y);
|
|
|
|
}
|
|
|
|
|
|
|
|
ccl_device_inline int2 operator/(const int2 &a, const int2 &b)
|
|
|
|
{
|
|
|
|
return make_int2(a.x / b.x, a.y / b.y);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
/* Int3 */
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int3 min(int3 a, int3 b)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
2014-01-14 18:55:02 +00:00
|
|
|
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return _mm_min_epi32(a.m128, b.m128);
|
|
|
|
#else
|
|
|
|
return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int3 max(int3 a, int3 b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2014-01-14 18:55:02 +00:00
|
|
|
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return _mm_max_epi32(a.m128, b.m128);
|
|
|
|
#else
|
|
|
|
return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int3 clamp(const int3& a, int mn, int mx)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return min(max(a, make_int3(mn)), make_int3(mx));
|
|
|
|
#else
|
|
|
|
return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx));
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int3 clamp(const int3& a, int3& mn, int mx)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return min(max(a, mn), make_int3(mx));
|
|
|
|
#else
|
|
|
|
return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx));
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __KERNEL_GPU__
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline void print_int3(const char *label, const int3& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
printf("%s: %d %d %d\n", label, a.x, a.y, a.z);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Int4 */
|
|
|
|
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifndef __KERNEL_GPU__
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int4 operator+(const int4& a, const int4& b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_add_epi32(a.m128, b.m128);
|
|
|
|
#else
|
|
|
|
return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int4 operator+=(int4& a, const int4& b)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
return a = a + b;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int4 operator>>(const int4& a, int i)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
return _mm_srai_epi32(a.m128, i);
|
|
|
|
#else
|
|
|
|
return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
|
2011-04-27 11:58:34 +00:00
|
|
|
#endif
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
}
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int4 min(int4 a, int4 b)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
2014-01-14 18:55:02 +00:00
|
|
|
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return _mm_min_epi32(a.m128, b.m128);
|
|
|
|
#else
|
|
|
|
return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int4 max(int4 a, int4 b)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
2014-01-14 18:55:02 +00:00
|
|
|
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
return _mm_max_epi32(a.m128, b.m128);
|
|
|
|
#else
|
|
|
|
return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
return min(max(a, mn), mx);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int4 select(const int4& mask, const int4& a, const int4& b)
|
Cycles: merging features from tomato branch.
=== BVH build time optimizations ===
* BVH building was multithreaded. Not all building is multithreaded, packing
and the initial bounding/splitting is still single threaded, but recursive
splitting is, which was the main bottleneck.
* Object splitting now uses binning rather than sorting of all elements, using
code from the Embree raytracer from Intel.
http://software.intel.com/en-us/articles/embree-photo-realistic-ray-tracing-kernels/
* Other small changes to avoid allocations, pack memory more tightly, avoid
some unnecessary operations, ...
These optimizations do not work yet when Spatial Splits are enabled, for that
more work is needed. There's also other optimizations still needed, in
particular for the case of many low poly objects, the packing step and node
memory allocation.
BVH raytracing time should remain about the same, but BVH build time should be
significantly reduced, test here show speedup of about 5x to 10x on a dual core
and 5x to 25x on an 8-core machine, depending on the scene.
=== Threads ===
Centralized task scheduler for multithreading, which is basically the
CPU device threading code wrapped into something reusable.
Basic idea is that there is a single TaskScheduler that keeps a pool of threads,
one for each core. Other places in the code can then create a TaskPool that they
can drop Tasks in to be executed by the scheduler, and wait for them to complete
or cancel them early.
=== Normal ====
Added a Normal output to the texture coordinate node. This currently
gives the object space normal, which is the same under object animation.
In the future this might become a "generated" normal so it's also stable for
deforming objects, but for now it's already useful for non-deforming objects.
=== Render Layers ===
Per render layer Samples control, leaving it to 0 will use the common scene
setting.
Environment pass will now render environment even if film is set to transparent.
Exclude Layers" added. Scene layers (all object that influence the render,
directly or indirectly) are shared between all render layers. However sometimes
it's useful to leave out some object influence for a particular render layer.
That's what this option allows you to do.
=== Filter Glossy ===
When using a value higher than 0.0, this will blur glossy reflections after
blurry bounces, to reduce noise at the cost of accuracy. 1.0 is a good
starting value to tweak.
Some light paths have a low probability of being found while contributing much
light to the pixel. As a result these light paths will be found in some pixels
and not in others, causing fireflies. An example of such a difficult path might
be a small light that is causing a small specular highlight on a sharp glossy
material, which we are seeing through a rough glossy material. With path tracing
it is difficult to find the specular highlight, but if we increase the roughness
on the material the highlight gets bigger and softer, and so easier to find.
Often this blurring will be hardly noticeable, because we are seeing it through
a blurry material anyway, but there are also cases where this will lead to a
loss of detail in lighting.
2012-04-28 08:53:59 +00:00
|
|
|
{
|
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
__m128 m = _mm_cvtepi32_ps(mask);
|
|
|
|
return _mm_castps_si128(_mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b)))); /* todo: avoid cvt */
|
|
|
|
#else
|
|
|
|
return make_int4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w);
|
|
|
|
#endif
|
|
|
|
}
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline void print_int4(const char *label, const int4& a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Int/Float conversion */
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int as_int(uint i)
|
2012-05-13 12:32:44 +00:00
|
|
|
{
|
2013-06-07 16:06:17 +00:00
|
|
|
union { uint ui; int i; } u;
|
2012-05-13 12:32:44 +00:00
|
|
|
u.ui = i;
|
|
|
|
return u.i;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline uint as_uint(int i)
|
2012-05-13 12:32:44 +00:00
|
|
|
{
|
2013-06-07 16:06:17 +00:00
|
|
|
union { uint ui; int i; } u;
|
2012-05-13 12:32:44 +00:00
|
|
|
u.i = i;
|
|
|
|
return u.ui;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline uint as_uint(float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2013-06-07 16:06:17 +00:00
|
|
|
union { uint i; float f; } u;
|
2011-04-27 11:58:34 +00:00
|
|
|
u.f = f;
|
|
|
|
return u.i;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline int __float_as_int(float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
union { int i; float f; } u;
|
|
|
|
u.f = f;
|
|
|
|
return u.i;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float __int_as_float(int i)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
union { int i; float f; } u;
|
|
|
|
u.i = i;
|
|
|
|
return u.f;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline uint __float_as_uint(float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
union { uint i; float f; } u;
|
|
|
|
u.f = f;
|
|
|
|
return u.i;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float __uint_as_float(uint i)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
union { uint i; float f; } u;
|
|
|
|
u.i = i;
|
|
|
|
return u.f;
|
|
|
|
}
|
|
|
|
|
2017-01-19 13:41:04 +00:00
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
/* Interpolation */
|
|
|
|
|
2011-11-10 14:53:56 +00:00
|
|
|
template<class A, class B> A lerp(const A& a, const A& b, const B& t)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
return (A)(a * ((B)1 - t) + b * t);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Triangle */
|
|
|
|
|
2016-10-12 12:48:59 +00:00
|
|
|
ccl_device_inline float triangle_area(const float3& v1, const float3& v2, const float3& v3)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2013-06-04 17:20:00 +00:00
|
|
|
return len(cross(v3 - v2, v1 - v2))*0.5f;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2017-03-24 14:22:27 +00:00
|
|
|
/* Versions of functions which are safe for fast math. */
|
|
|
|
ccl_device_inline bool isnan_safe(float f)
|
|
|
|
{
|
|
|
|
unsigned int x = __float_as_uint(f);
|
|
|
|
return (x << 1) > 0xff000000u;
|
|
|
|
}
|
|
|
|
|
|
|
|
ccl_device_inline bool isfinite_safe(float f)
|
|
|
|
{
|
|
|
|
/* By IEEE 754 rule, 2*Inf equals Inf */
|
|
|
|
unsigned int x = __float_as_uint(f);
|
|
|
|
return (f == f) && (x == 0 || (f != 2.0f*f)) && !((x << 1) > 0xff000000u);
|
|
|
|
}
|
|
|
|
|
2011-09-27 20:37:24 +00:00
|
|
|
/* Orthonormal vectors */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b)
|
2011-09-27 20:37:24 +00:00
|
|
|
{
|
2013-08-18 14:15:57 +00:00
|
|
|
#if 0
|
|
|
|
if(fabsf(N.y) >= 0.999f) {
|
|
|
|
*a = make_float3(1, 0, 0);
|
|
|
|
*b = make_float3(0, 0, 1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if(fabsf(N.z) >= 0.999f) {
|
|
|
|
*a = make_float3(1, 0, 0);
|
|
|
|
*b = make_float3(0, 1, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2011-09-27 20:37:24 +00:00
|
|
|
if(N.x != N.y || N.x != N.z)
|
|
|
|
*a = make_float3(N.z-N.y, N.x-N.z, N.y-N.x); //(1,1,1)x N
|
|
|
|
else
|
|
|
|
*a = make_float3(N.z-N.y, N.x+N.z, -N.y-N.x); //(-1,1,1)x N
|
|
|
|
|
|
|
|
*a = normalize(*a);
|
|
|
|
*b = cross(N, *a);
|
|
|
|
}
|
|
|
|
|
2012-03-28 12:18:12 +00:00
|
|
|
/* Color division */
|
|
|
|
|
2014-03-29 12:03:50 +00:00
|
|
|
ccl_device_inline float3 safe_invert_color(float3 a)
|
|
|
|
{
|
|
|
|
float x, y, z;
|
|
|
|
|
|
|
|
x = (a.x != 0.0f)? 1.0f/a.x: 0.0f;
|
|
|
|
y = (a.y != 0.0f)? 1.0f/a.y: 0.0f;
|
|
|
|
z = (a.z != 0.0f)? 1.0f/a.z: 0.0f;
|
|
|
|
|
|
|
|
return make_float3(x, y, z);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 safe_divide_color(float3 a, float3 b)
|
2012-03-28 12:18:12 +00:00
|
|
|
{
|
|
|
|
float x, y, z;
|
|
|
|
|
|
|
|
x = (b.x != 0.0f)? a.x/b.x: 0.0f;
|
|
|
|
y = (b.y != 0.0f)? a.y/b.y: 0.0f;
|
|
|
|
z = (b.z != 0.0f)? a.z/b.z: 0.0f;
|
|
|
|
|
|
|
|
return make_float3(x, y, z);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b)
|
2013-07-08 23:31:45 +00:00
|
|
|
{
|
|
|
|
float x, y, z;
|
|
|
|
|
|
|
|
x = (b.x != 0.0f)? a.x/b.x: 0.0f;
|
|
|
|
y = (b.y != 0.0f)? a.y/b.y: 0.0f;
|
|
|
|
z = (b.z != 0.0f)? a.z/b.z: 0.0f;
|
|
|
|
|
2017-02-28 00:33:57 +00:00
|
|
|
/* try to get gray even if b is zero */
|
2013-07-08 23:31:45 +00:00
|
|
|
if(b.x == 0.0f) {
|
|
|
|
if(b.y == 0.0f) {
|
|
|
|
x = z;
|
|
|
|
y = z;
|
|
|
|
}
|
|
|
|
else if(b.z == 0.0f) {
|
|
|
|
x = y;
|
|
|
|
z = y;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
x = 0.5f*(y + z);
|
|
|
|
}
|
|
|
|
else if(b.y == 0.0f) {
|
|
|
|
if(b.z == 0.0f) {
|
|
|
|
y = x;
|
|
|
|
z = x;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
y = 0.5f*(x + z);
|
|
|
|
}
|
|
|
|
else if(b.z == 0.0f) {
|
|
|
|
z = 0.5f*(x + y);
|
|
|
|
}
|
|
|
|
|
|
|
|
return make_float3(x, y, z);
|
|
|
|
}
|
|
|
|
|
2012-11-04 22:31:32 +00:00
|
|
|
/* Rotation of point around axis and angle */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle)
|
2012-11-04 22:31:32 +00:00
|
|
|
{
|
|
|
|
float costheta = cosf(angle);
|
|
|
|
float sintheta = sinf(angle);
|
|
|
|
float3 r;
|
|
|
|
|
|
|
|
r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) +
|
|
|
|
(((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) +
|
|
|
|
(((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z);
|
|
|
|
|
|
|
|
r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) +
|
|
|
|
((costheta + (1 - costheta) * axis.y * axis.y) * p.y) +
|
|
|
|
(((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z);
|
|
|
|
|
|
|
|
r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) +
|
|
|
|
(((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) +
|
|
|
|
((costheta + (1 - costheta) * axis.z * axis.z) * p.z);
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2012-12-19 21:17:16 +00:00
|
|
|
/* NaN-safe math ops */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float safe_sqrtf(float f)
|
2013-08-18 14:15:57 +00:00
|
|
|
{
|
|
|
|
return sqrtf(max(f, 0.0f));
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float safe_asinf(float a)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2014-01-14 18:55:02 +00:00
|
|
|
return asinf(clamp(a, -1.0f, 1.0f));
|
2012-12-19 21:17:16 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float safe_acosf(float a)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2014-01-14 18:55:02 +00:00
|
|
|
return acosf(clamp(a, -1.0f, 1.0f));
|
2012-12-19 21:17:16 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float compatible_powf(float x, float y)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2014-01-14 18:55:02 +00:00
|
|
|
#ifdef __KERNEL_GPU__
|
|
|
|
if(y == 0.0f) /* x^0 -> 1, including 0^0 */
|
|
|
|
return 1.0f;
|
|
|
|
|
2012-12-19 21:17:16 +00:00
|
|
|
/* GPU pow doesn't accept negative x, do manual checks here */
|
|
|
|
if(x < 0.0f) {
|
2013-05-16 17:20:56 +00:00
|
|
|
if(fmodf(-y, 2.0f) == 0.0f)
|
2012-12-19 21:17:16 +00:00
|
|
|
return powf(-x, y);
|
|
|
|
else
|
|
|
|
return -powf(-x, y);
|
|
|
|
}
|
|
|
|
else if(x == 0.0f)
|
|
|
|
return 0.0f;
|
2014-01-14 18:55:02 +00:00
|
|
|
#endif
|
2012-12-19 21:17:16 +00:00
|
|
|
return powf(x, y);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float safe_powf(float a, float b)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2014-05-04 17:49:22 +00:00
|
|
|
if(UNLIKELY(a < 0.0f && b != float_to_int(b)))
|
2012-12-19 21:17:16 +00:00
|
|
|
return 0.0f;
|
2014-01-14 18:55:02 +00:00
|
|
|
|
2012-12-19 21:17:16 +00:00
|
|
|
return compatible_powf(a, b);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float safe_logf(float a, float b)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2014-05-04 17:49:22 +00:00
|
|
|
if(UNLIKELY(a < 0.0f || b < 0.0f))
|
2012-12-19 21:17:16 +00:00
|
|
|
return 0.0f;
|
|
|
|
|
|
|
|
return logf(a)/logf(b);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float safe_divide(float a, float b)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2013-04-01 20:26:52 +00:00
|
|
|
return (b != 0.0f)? a/b: 0.0f;
|
2012-12-19 21:17:16 +00:00
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float safe_modulo(float a, float b)
|
2013-05-20 14:38:47 +00:00
|
|
|
{
|
|
|
|
return (b != 0.0f)? fmodf(a, b): 0.0f;
|
|
|
|
}
|
|
|
|
|
Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Anisotropic and Glass BSDFs
This commit adds a new distribution to the Glossy, Anisotropic and Glass BSDFs that implements the
multiple-scattering microfacet model described in the paper "Multiple-Scattering Microfacet BSDFs with the Smith Model".
Essentially, the improvement is that unlike classical GGX, which only models single scattering and assumes
the contribution of multiple bounces to be zero, this new model performs a random walk on the microsurface until
the ray leaves it again, which ensures perfect energy conservation.
In practise, this means that the "darkening problem" - GGX materials becoming darker with increasing
roughness - is solved in a physically correct and efficient way.
The downside of this model is that it has no (known) analytic expression for evalation. However, it can be
evaluated stochastically, and although the correct PDF isn't known either, the properties of MIS and the
balance heuristic guarantee an unbiased result at the cost of slightly higher noise.
Reviewers: dingto, #cycles, brecht
Reviewed By: dingto, #cycles, brecht
Subscribers: bliblubli, ace_dragon, gregzaal, brecht, harvester, dingto, marcog, swerner, jtheninja, Blendify, nutel
Differential Revision: https://developer.blender.org/D2002
2016-06-23 20:56:43 +00:00
|
|
|
ccl_device_inline float beta(float x, float y)
|
|
|
|
{
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
return expf(lgammaf(x) + lgammaf(y) - lgammaf(x+y));
|
|
|
|
#else
|
|
|
|
return expf(lgamma(x) + lgamma(y) - lgamma(x+y));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2017-03-23 11:55:51 +00:00
|
|
|
ccl_device_inline float xor_signmask(float x, int y)
|
|
|
|
{
|
|
|
|
return __int_as_float(__float_as_int(x) ^ y);
|
|
|
|
}
|
|
|
|
|
2014-07-29 10:07:05 +00:00
|
|
|
/* projections */
|
2015-02-19 07:52:48 +00:00
|
|
|
ccl_device_inline float2 map_to_tube(const float3 co)
|
2015-01-21 19:37:09 +00:00
|
|
|
{
|
2015-02-19 07:52:48 +00:00
|
|
|
float len, u, v;
|
|
|
|
len = sqrtf(co.x * co.x + co.y * co.y);
|
2015-03-27 19:15:15 +00:00
|
|
|
if(len > 0.0f) {
|
2015-02-19 07:52:48 +00:00
|
|
|
u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f;
|
2015-04-30 09:21:32 +00:00
|
|
|
v = (co.z + 1.0f) * 0.5f;
|
2015-01-21 19:37:09 +00:00
|
|
|
}
|
|
|
|
else {
|
2015-02-19 07:52:48 +00:00
|
|
|
u = v = 0.0f;
|
2015-01-21 19:37:09 +00:00
|
|
|
}
|
2015-02-19 07:52:48 +00:00
|
|
|
return make_float2(u, v);
|
2015-01-21 19:37:09 +00:00
|
|
|
}
|
|
|
|
|
2015-02-19 07:52:48 +00:00
|
|
|
ccl_device_inline float2 map_to_sphere(const float3 co)
|
2014-07-29 10:07:05 +00:00
|
|
|
{
|
2015-02-19 07:52:48 +00:00
|
|
|
float l = len(co);
|
|
|
|
float u, v;
|
|
|
|
if(l > 0.0f) {
|
|
|
|
if(UNLIKELY(co.x == 0.0f && co.y == 0.0f)) {
|
|
|
|
u = 0.0f; /* othwise domain error */
|
2014-09-16 02:41:16 +00:00
|
|
|
}
|
|
|
|
else {
|
2015-02-19 07:52:48 +00:00
|
|
|
u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f;
|
2014-09-16 02:41:16 +00:00
|
|
|
}
|
2015-02-19 07:52:48 +00:00
|
|
|
v = 1.0f - safe_acosf(co.z / l) / M_PI_F;
|
2014-07-29 10:07:05 +00:00
|
|
|
}
|
|
|
|
else {
|
2015-02-19 07:52:48 +00:00
|
|
|
u = v = 0.0f;
|
2014-07-29 10:07:05 +00:00
|
|
|
}
|
2015-02-19 07:52:48 +00:00
|
|
|
return make_float2(u, v);
|
2014-07-29 10:07:05 +00:00
|
|
|
}
|
|
|
|
|
2014-12-15 16:18:01 +00:00
|
|
|
ccl_device_inline int util_max_axis(float3 vec)
|
|
|
|
{
|
2016-10-25 11:54:17 +00:00
|
|
|
#ifdef __KERNEL_SSE__
|
|
|
|
__m128 a = shuffle<0,0,1,1>(vec.m128);
|
|
|
|
__m128 b = shuffle<1,2,2,1>(vec.m128);
|
|
|
|
__m128 c = _mm_cmpgt_ps(a, b);
|
|
|
|
int mask = _mm_movemask_ps(c) & 0x7;
|
|
|
|
static const char tab[8] = {2, 2, 2, 0, 1, 2, 1, 0};
|
|
|
|
return tab[mask];
|
|
|
|
#else
|
2014-12-15 16:18:01 +00:00
|
|
|
if(vec.x > vec.y) {
|
|
|
|
if(vec.x > vec.z)
|
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if(vec.y > vec.z)
|
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
return 2;
|
|
|
|
}
|
2016-10-25 11:54:17 +00:00
|
|
|
#endif
|
2014-12-15 16:18:01 +00:00
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
CCL_NAMESPACE_END
|
|
|
|
|
|
|
|
#endif /* __UTIL_MATH_H__ */
|