2011-04-27 11:58:34 +00:00
|
|
|
/*
|
2013-08-18 14:16:15 +00:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
2011-04-27 11:58:34 +00:00
|
|
|
*
|
2013-08-18 14:16:15 +00:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2011-04-27 11:58:34 +00:00
|
|
|
*
|
2013-08-18 14:16:15 +00:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
2014-03-29 12:03:45 +00:00
|
|
|
* limitations under the License.
|
2011-04-27 11:58:34 +00:00
|
|
|
*/
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Object Primitive
|
|
|
|
*
|
|
|
|
* All mesh and curve primitives are part of an object. The same mesh and curves
|
|
|
|
* may be instanced multiple times by different objects.
|
|
|
|
*
|
|
|
|
* If the mesh is not instanced multiple times, the object will not be explicitly
|
|
|
|
* stored as a primitive in the BVH, rather the bare triangles are curved are
|
|
|
|
* directly primitives in the BVH with world space locations applied, and the object
|
|
|
|
* ID is looked up afterwards. */
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Object attributes, for now a fixed size and contents */
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
enum ObjectTransform {
|
|
|
|
OBJECT_TRANSFORM = 0,
|
2012-12-20 19:26:57 +00:00
|
|
|
OBJECT_TRANSFORM_MOTION_PRE = 0,
|
|
|
|
OBJECT_INVERSE_TRANSFORM = 4,
|
|
|
|
OBJECT_TRANSFORM_MOTION_POST = 4,
|
|
|
|
OBJECT_PROPERTIES = 8,
|
|
|
|
OBJECT_DUPLI = 9
|
|
|
|
};
|
|
|
|
|
|
|
|
enum ObjectVectorTransform {
|
|
|
|
OBJECT_VECTOR_MOTION_PRE = 0,
|
|
|
|
OBJECT_VECTOR_MOTION_POST = 3
|
2011-04-27 11:58:34 +00:00
|
|
|
};
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Object to world space transformation */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2012-10-15 21:12:58 +00:00
|
|
|
int offset = object*OBJECT_SIZE + (int)type;
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
Transform tfm;
|
2012-10-15 21:12:58 +00:00
|
|
|
tfm.x = kernel_tex_fetch(__objects, offset + 0);
|
|
|
|
tfm.y = kernel_tex_fetch(__objects, offset + 1);
|
|
|
|
tfm.z = kernel_tex_fetch(__objects, offset + 2);
|
|
|
|
tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
|
|
|
|
|
|
|
|
return tfm;
|
|
|
|
}
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Object to world space transformation for motion vectors */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type)
|
2012-12-20 19:26:57 +00:00
|
|
|
{
|
|
|
|
int offset = object*OBJECT_VECTOR_SIZE + (int)type;
|
|
|
|
|
|
|
|
Transform tfm;
|
|
|
|
tfm.x = kernel_tex_fetch(__objects_vector, offset + 0);
|
|
|
|
tfm.y = kernel_tex_fetch(__objects_vector, offset + 1);
|
|
|
|
tfm.z = kernel_tex_fetch(__objects_vector, offset + 2);
|
|
|
|
tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
|
|
|
|
|
|
|
|
return tfm;
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Motion blurred object transformations */
|
|
|
|
|
2012-10-09 18:37:14 +00:00
|
|
|
#ifdef __OBJECT_MOTION__
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time)
|
2012-10-15 21:12:58 +00:00
|
|
|
{
|
2012-11-29 00:43:50 +00:00
|
|
|
DecompMotionTransform motion;
|
2012-10-15 21:12:58 +00:00
|
|
|
|
2012-10-17 22:48:29 +00:00
|
|
|
int offset = object*OBJECT_SIZE + (int)OBJECT_TRANSFORM_MOTION_PRE;
|
2012-10-15 21:12:58 +00:00
|
|
|
|
2012-11-29 00:43:50 +00:00
|
|
|
motion.mid.x = kernel_tex_fetch(__objects, offset + 0);
|
|
|
|
motion.mid.y = kernel_tex_fetch(__objects, offset + 1);
|
|
|
|
motion.mid.z = kernel_tex_fetch(__objects, offset + 2);
|
|
|
|
motion.mid.w = kernel_tex_fetch(__objects, offset + 3);
|
2012-04-30 12:49:26 +00:00
|
|
|
|
2012-11-29 00:43:50 +00:00
|
|
|
motion.pre_x = kernel_tex_fetch(__objects, offset + 4);
|
|
|
|
motion.pre_y = kernel_tex_fetch(__objects, offset + 5);
|
|
|
|
motion.post_x = kernel_tex_fetch(__objects, offset + 6);
|
|
|
|
motion.post_y = kernel_tex_fetch(__objects, offset + 7);
|
2012-04-30 12:49:26 +00:00
|
|
|
|
2012-10-17 22:48:29 +00:00
|
|
|
Transform tfm;
|
|
|
|
transform_motion_interpolate(&tfm, &motion, time);
|
2012-10-15 21:12:58 +00:00
|
|
|
|
2012-10-17 22:48:29 +00:00
|
|
|
return tfm;
|
|
|
|
}
|
2012-04-30 12:49:26 +00:00
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, int object, float time, Transform *itfm)
|
2012-10-17 22:48:29 +00:00
|
|
|
{
|
|
|
|
int object_flag = kernel_tex_fetch(__object_flag, object);
|
|
|
|
|
|
|
|
if(object_flag & SD_OBJECT_MOTION) {
|
|
|
|
/* if we do motion blur */
|
|
|
|
Transform tfm = object_fetch_transform_motion(kg, object, time);
|
2012-04-30 12:49:26 +00:00
|
|
|
|
2012-10-15 21:12:58 +00:00
|
|
|
if(itfm)
|
|
|
|
*itfm = transform_quick_inverse(tfm);
|
2012-10-17 22:48:29 +00:00
|
|
|
|
|
|
|
return tfm;
|
2012-04-30 12:49:26 +00:00
|
|
|
}
|
2012-10-15 21:12:58 +00:00
|
|
|
else {
|
2012-10-17 22:48:29 +00:00
|
|
|
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
|
2012-11-04 22:31:32 +00:00
|
|
|
if(itfm)
|
|
|
|
*itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2012-10-17 22:48:29 +00:00
|
|
|
return tfm;
|
2012-10-15 21:12:58 +00:00
|
|
|
}
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
2012-10-15 21:12:58 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Transform position from object to world space */
|
|
|
|
|
|
|
|
ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2012-10-09 18:37:14 +00:00
|
|
|
#ifdef __OBJECT_MOTION__
|
2015-05-14 13:35:12 +00:00
|
|
|
*P = transform_point_auto(&ccl_fetch(sd, ob_tfm), *P);
|
2012-04-30 12:49:26 +00:00
|
|
|
#else
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
|
2012-04-16 08:35:21 +00:00
|
|
|
*P = transform_point(&tfm, *P);
|
2012-04-30 12:49:26 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Transform position from world to object space */
|
|
|
|
|
|
|
|
ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
|
2012-05-08 23:39:31 +00:00
|
|
|
{
|
2012-10-09 18:37:14 +00:00
|
|
|
#ifdef __OBJECT_MOTION__
|
2015-05-14 13:35:12 +00:00
|
|
|
*P = transform_point_auto(&ccl_fetch(sd, ob_itfm), *P);
|
2012-05-08 23:39:31 +00:00
|
|
|
#else
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
|
2012-05-08 23:39:31 +00:00
|
|
|
*P = transform_point(&tfm, *P);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Transform normal from world to object space */
|
|
|
|
|
|
|
|
ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
|
2012-05-02 09:33:45 +00:00
|
|
|
{
|
2012-10-09 18:37:14 +00:00
|
|
|
#ifdef __OBJECT_MOTION__
|
2015-05-14 13:35:12 +00:00
|
|
|
*N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_tfm), *N));
|
2012-05-02 09:33:45 +00:00
|
|
|
#else
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
|
2012-05-02 09:33:45 +00:00
|
|
|
*N = normalize(transform_direction_transposed(&tfm, *N));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Transform normal from object to world space */
|
|
|
|
|
|
|
|
ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2012-10-09 18:37:14 +00:00
|
|
|
#ifdef __OBJECT_MOTION__
|
2015-05-14 13:35:12 +00:00
|
|
|
*N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_itfm), *N));
|
2012-04-30 12:49:26 +00:00
|
|
|
#else
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
|
2012-04-30 12:49:26 +00:00
|
|
|
*N = normalize(transform_direction_transposed(&tfm, *N));
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Transform direction vector from object to world space */
|
|
|
|
|
|
|
|
ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2012-10-09 18:37:14 +00:00
|
|
|
#ifdef __OBJECT_MOTION__
|
2015-05-14 13:35:12 +00:00
|
|
|
*D = transform_direction_auto(&ccl_fetch(sd, ob_tfm), *D);
|
2012-04-30 12:49:26 +00:00
|
|
|
#else
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
|
2011-04-27 11:58:34 +00:00
|
|
|
*D = transform_direction(&tfm, *D);
|
2012-04-30 12:49:26 +00:00
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Transform direction vector from world to object space */
|
|
|
|
|
|
|
|
ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
|
2013-07-31 21:18:23 +00:00
|
|
|
{
|
|
|
|
#ifdef __OBJECT_MOTION__
|
2015-05-14 13:35:12 +00:00
|
|
|
*D = transform_direction_auto(&ccl_fetch(sd, ob_itfm), *D);
|
2013-07-31 21:18:23 +00:00
|
|
|
#else
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
|
2013-07-31 21:18:23 +00:00
|
|
|
*D = transform_direction(&tfm, *D);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Object center position */
|
|
|
|
|
|
|
|
ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd)
|
2012-05-21 12:52:28 +00:00
|
|
|
{
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
if(ccl_fetch(sd, object) == OBJECT_NONE)
|
2012-10-16 13:20:57 +00:00
|
|
|
return make_float3(0.0f, 0.0f, 0.0f);
|
|
|
|
|
2012-10-09 18:37:14 +00:00
|
|
|
#ifdef __OBJECT_MOTION__
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
return make_float3(ccl_fetch(sd, ob_tfm).x.w, ccl_fetch(sd, ob_tfm).y.w, ccl_fetch(sd, ob_tfm).z.w);
|
2012-05-21 12:52:28 +00:00
|
|
|
#else
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
|
2012-05-21 12:52:28 +00:00
|
|
|
return make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Total surface area of object */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float object_surface_area(KernelGlobals *kg, int object)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
|
|
|
|
float4 f = kernel_tex_fetch(__objects, offset);
|
|
|
|
return f.x;
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Pass ID number of object */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float object_pass_id(KernelGlobals *kg, int object)
|
Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
2012-01-25 17:23:52 +00:00
|
|
|
{
|
2014-03-29 12:03:47 +00:00
|
|
|
if(object == OBJECT_NONE)
|
Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
2012-01-25 17:23:52 +00:00
|
|
|
return 0.0f;
|
|
|
|
|
|
|
|
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
|
|
|
|
float4 f = kernel_tex_fetch(__objects, offset);
|
|
|
|
return f.y;
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Per object random number for shader variation */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float object_random_number(KernelGlobals *kg, int object)
|
2012-05-21 12:52:28 +00:00
|
|
|
{
|
2014-03-29 12:03:47 +00:00
|
|
|
if(object == OBJECT_NONE)
|
2012-05-21 12:52:28 +00:00
|
|
|
return 0.0f;
|
|
|
|
|
|
|
|
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
|
|
|
|
float4 f = kernel_tex_fetch(__objects, offset);
|
|
|
|
return f.z;
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Particle ID from which this object was generated */
|
|
|
|
|
2014-05-04 15:30:02 +00:00
|
|
|
ccl_device_inline int object_particle_id(KernelGlobals *kg, int object)
|
2012-06-08 16:17:57 +00:00
|
|
|
{
|
2014-03-29 12:03:47 +00:00
|
|
|
if(object == OBJECT_NONE)
|
2015-03-31 14:51:55 +00:00
|
|
|
return 0;
|
2012-06-08 16:17:57 +00:00
|
|
|
|
|
|
|
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
|
|
|
|
float4 f = kernel_tex_fetch(__objects, offset);
|
2013-06-07 16:06:17 +00:00
|
|
|
return __float_as_uint(f.w);
|
2012-06-08 16:17:57 +00:00
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Generated texture coordinate on surface from where object was instanced */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 object_dupli_generated(KernelGlobals *kg, int object)
|
2012-10-04 21:40:39 +00:00
|
|
|
{
|
2014-03-29 12:03:47 +00:00
|
|
|
if(object == OBJECT_NONE)
|
2012-10-04 21:40:39 +00:00
|
|
|
return make_float3(0.0f, 0.0f, 0.0f);
|
|
|
|
|
|
|
|
int offset = object*OBJECT_SIZE + OBJECT_DUPLI;
|
|
|
|
float4 f = kernel_tex_fetch(__objects, offset);
|
|
|
|
return make_float3(f.x, f.y, f.z);
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* UV texture coordinate on surface from where object was instanced */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float3 object_dupli_uv(KernelGlobals *kg, int object)
|
2012-10-04 21:40:39 +00:00
|
|
|
{
|
2014-03-29 12:03:47 +00:00
|
|
|
if(object == OBJECT_NONE)
|
2012-10-04 21:40:39 +00:00
|
|
|
return make_float3(0.0f, 0.0f, 0.0f);
|
|
|
|
|
|
|
|
int offset = object*OBJECT_SIZE + OBJECT_DUPLI;
|
|
|
|
float4 f = kernel_tex_fetch(__objects, offset + 1);
|
|
|
|
return make_float3(f.x, f.y, 0.0f);
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Information about mesh for motion blurred triangles and curves */
|
|
|
|
|
2014-03-29 12:03:46 +00:00
|
|
|
ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys)
|
|
|
|
{
|
|
|
|
int offset = object*OBJECT_SIZE + OBJECT_DUPLI;
|
|
|
|
|
|
|
|
if(numkeys) {
|
|
|
|
float4 f = kernel_tex_fetch(__objects, offset);
|
|
|
|
*numkeys = __float_as_int(f.w);
|
|
|
|
}
|
|
|
|
|
|
|
|
float4 f = kernel_tex_fetch(__objects, offset + 1);
|
|
|
|
if(numsteps)
|
|
|
|
*numsteps = __float_as_int(f.z);
|
|
|
|
if(numverts)
|
|
|
|
*numverts = __float_as_int(f.w);
|
|
|
|
}
|
2012-10-04 21:40:39 +00:00
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Pass ID for shader */
|
|
|
|
|
|
|
|
ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
|
2012-05-21 12:52:28 +00:00
|
|
|
{
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
return kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2 + 1);
|
2012-05-21 12:52:28 +00:00
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Particle data from which object was instanced */
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device_inline float particle_index(KernelGlobals *kg, int particle)
|
2012-06-08 16:17:57 +00:00
|
|
|
{
|
|
|
|
int offset = particle*PARTICLE_SIZE;
|
2012-08-31 19:38:59 +00:00
|
|
|
float4 f = kernel_tex_fetch(__particles, offset + 0);
|
2012-06-08 16:17:57 +00:00
|
|
|
return f.x;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float particle_age(KernelGlobals *kg, int particle)
|
2012-06-08 16:17:57 +00:00
|
|
|
{
|
|
|
|
int offset = particle*PARTICLE_SIZE;
|
2012-08-31 19:38:59 +00:00
|
|
|
float4 f = kernel_tex_fetch(__particles, offset + 0);
|
2012-06-08 16:17:57 +00:00
|
|
|
return f.y;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float particle_lifetime(KernelGlobals *kg, int particle)
|
2012-07-26 11:40:58 +00:00
|
|
|
{
|
|
|
|
int offset = particle*PARTICLE_SIZE;
|
2012-08-31 19:38:59 +00:00
|
|
|
float4 f = kernel_tex_fetch(__particles, offset + 0);
|
2012-07-26 11:40:58 +00:00
|
|
|
return f.z;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float particle_size(KernelGlobals *kg, int particle)
|
2012-08-31 19:38:59 +00:00
|
|
|
{
|
|
|
|
int offset = particle*PARTICLE_SIZE;
|
|
|
|
float4 f = kernel_tex_fetch(__particles, offset + 0);
|
|
|
|
return f.w;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float4 particle_rotation(KernelGlobals *kg, int particle)
|
2012-08-31 19:38:59 +00:00
|
|
|
{
|
|
|
|
int offset = particle*PARTICLE_SIZE;
|
|
|
|
float4 f = kernel_tex_fetch(__particles, offset + 1);
|
|
|
|
return f;
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float3 particle_location(KernelGlobals *kg, int particle)
|
2012-08-31 19:38:59 +00:00
|
|
|
{
|
|
|
|
int offset = particle*PARTICLE_SIZE;
|
|
|
|
float4 f = kernel_tex_fetch(__particles, offset + 2);
|
|
|
|
return make_float3(f.x, f.y, f.z);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float3 particle_velocity(KernelGlobals *kg, int particle)
|
2012-08-31 19:38:59 +00:00
|
|
|
{
|
|
|
|
int offset = particle*PARTICLE_SIZE;
|
|
|
|
float4 f2 = kernel_tex_fetch(__particles, offset + 2);
|
|
|
|
float4 f3 = kernel_tex_fetch(__particles, offset + 3);
|
|
|
|
return make_float3(f2.w, f3.x, f3.y);
|
|
|
|
}
|
|
|
|
|
2013-11-15 23:17:10 +00:00
|
|
|
ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
|
2012-08-31 19:38:59 +00:00
|
|
|
{
|
|
|
|
int offset = particle*PARTICLE_SIZE;
|
|
|
|
float4 f3 = kernel_tex_fetch(__particles, offset + 3);
|
|
|
|
float4 f4 = kernel_tex_fetch(__particles, offset + 4);
|
|
|
|
return make_float3(f3.z, f3.w, f4.x);
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Object intersection in BVH */
|
2014-03-29 12:03:45 +00:00
|
|
|
|
2014-04-03 18:08:53 +00:00
|
|
|
ccl_device_inline float3 bvh_clamp_direction(float3 dir)
|
2014-03-29 12:03:45 +00:00
|
|
|
{
|
2014-04-03 18:08:53 +00:00
|
|
|
/* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */
|
2014-05-02 21:22:14 +00:00
|
|
|
float ooeps = 8.271806E-25f;
|
2014-04-03 18:08:53 +00:00
|
|
|
return make_float3((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x),
|
2014-05-04 16:19:08 +00:00
|
|
|
(fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y),
|
|
|
|
(fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z));
|
2014-04-03 18:08:53 +00:00
|
|
|
}
|
2014-03-29 12:03:45 +00:00
|
|
|
|
2014-04-03 18:08:53 +00:00
|
|
|
ccl_device_inline float3 bvh_inverse_direction(float3 dir)
|
|
|
|
{
|
|
|
|
return 1.0f / dir;
|
2014-03-29 12:03:45 +00:00
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Transform ray into object space to enter static object in BVH */
|
|
|
|
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
|
2014-03-29 12:03:45 +00:00
|
|
|
{
|
|
|
|
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
|
|
|
|
|
|
|
*P = transform_point(&tfm, ray->P);
|
|
|
|
|
|
|
|
float len;
|
2014-04-03 18:08:53 +00:00
|
|
|
*dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
|
|
|
|
*idir = bvh_inverse_direction(*dir);
|
2014-03-29 12:03:45 +00:00
|
|
|
|
|
|
|
if(*t != FLT_MAX)
|
|
|
|
*t *= len;
|
|
|
|
}
|
|
|
|
|
2014-12-25 17:40:02 +00:00
|
|
|
#ifdef __QBVH__
|
|
|
|
/* Same as above, but optimized for QBVH scene intersection,
|
|
|
|
* which needs to modify two max distances.
|
|
|
|
*
|
|
|
|
* TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
|
|
|
|
* so we can avoid having this duplication.
|
|
|
|
*/
|
|
|
|
ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
|
|
|
|
int object,
|
|
|
|
const Ray *ray,
|
|
|
|
float3 *P,
|
|
|
|
float3 *dir,
|
|
|
|
float3 *idir,
|
|
|
|
float *t,
|
|
|
|
float *t1)
|
|
|
|
{
|
|
|
|
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
|
|
|
|
|
|
|
*P = transform_point(&tfm, ray->P);
|
|
|
|
|
|
|
|
float len;
|
|
|
|
*dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
|
|
|
|
*idir = bvh_inverse_direction(*dir);
|
|
|
|
|
|
|
|
if(*t != FLT_MAX)
|
|
|
|
*t *= len;
|
|
|
|
|
|
|
|
if(*t1 != -FLT_MAX)
|
|
|
|
*t1 *= len;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Transorm ray to exit static object in BVH */
|
|
|
|
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
|
2014-03-29 12:03:45 +00:00
|
|
|
{
|
|
|
|
if(*t != FLT_MAX) {
|
2015-10-08 15:30:51 +00:00
|
|
|
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
|
|
|
*t /= len(transform_direction(&tfm, ray->D));
|
2014-03-29 12:03:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
*P = ray->P;
|
2014-04-03 18:08:53 +00:00
|
|
|
*dir = bvh_clamp_direction(ray->D);
|
|
|
|
*idir = bvh_inverse_direction(*dir);
|
2014-03-29 12:03:45 +00:00
|
|
|
}
|
|
|
|
|
2014-04-19 15:02:30 +00:00
|
|
|
/* Same as above, but returns scale factor to apply to multiple intersection distances */
|
|
|
|
|
|
|
|
ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t_fac)
|
|
|
|
{
|
2015-10-08 15:30:51 +00:00
|
|
|
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
|
|
|
*t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
|
2014-04-19 15:02:30 +00:00
|
|
|
|
|
|
|
*P = ray->P;
|
|
|
|
*dir = bvh_clamp_direction(ray->D);
|
|
|
|
*idir = bvh_inverse_direction(*dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-29 12:03:45 +00:00
|
|
|
#ifdef __OBJECT_MOTION__
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Transform ray into object space to enter motion blurred object in BVH */
|
|
|
|
|
2015-05-14 13:46:26 +00:00
|
|
|
ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t, Transform *tfm)
|
2014-03-29 12:03:45 +00:00
|
|
|
{
|
|
|
|
Transform itfm;
|
|
|
|
*tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm);
|
|
|
|
|
|
|
|
*P = transform_point(&itfm, ray->P);
|
|
|
|
|
|
|
|
float len;
|
2014-04-03 18:08:53 +00:00
|
|
|
*dir = bvh_clamp_direction(normalize_len(transform_direction(&itfm, ray->D), &len));
|
|
|
|
*idir = bvh_inverse_direction(*dir);
|
2014-03-29 12:03:45 +00:00
|
|
|
|
|
|
|
if(*t != FLT_MAX)
|
|
|
|
*t *= len;
|
|
|
|
}
|
|
|
|
|
2014-12-25 17:40:02 +00:00
|
|
|
#ifdef __QBVH__
|
|
|
|
/* Same as above, but optimized for QBVH scene intersection,
|
|
|
|
* which needs to modify two max distances.
|
|
|
|
*
|
|
|
|
* TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
|
|
|
|
* so we can avoid having this duplication.
|
|
|
|
*/
|
|
|
|
ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, float *t1, Transform *tfm)
|
|
|
|
{
|
|
|
|
Transform itfm;
|
|
|
|
*tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm);
|
|
|
|
|
|
|
|
*P = transform_point(&itfm, ray->P);
|
|
|
|
|
|
|
|
float len;
|
|
|
|
*dir = bvh_clamp_direction(normalize_len(transform_direction(&itfm, ray->D), &len));
|
|
|
|
*idir = bvh_inverse_direction(*dir);
|
|
|
|
|
|
|
|
|
|
|
|
if(*t != FLT_MAX)
|
|
|
|
*t *= len;
|
|
|
|
|
|
|
|
if(*t1 != -FLT_MAX)
|
|
|
|
*t1 *= len;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2014-03-29 12:03:48 +00:00
|
|
|
/* Transorm ray to exit motion blurred object in BVH */
|
|
|
|
|
2015-05-14 13:46:26 +00:00
|
|
|
ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t, Transform *tfm)
|
2014-03-29 12:03:45 +00:00
|
|
|
{
|
|
|
|
if(*t != FLT_MAX)
|
|
|
|
*t *= len(transform_direction(tfm, 1.0f/(*idir)));
|
|
|
|
|
|
|
|
*P = ray->P;
|
2014-04-03 18:08:53 +00:00
|
|
|
*dir = bvh_clamp_direction(ray->D);
|
|
|
|
*idir = bvh_inverse_direction(*dir);
|
2014-03-29 12:03:45 +00:00
|
|
|
}
|
2014-04-19 15:02:30 +00:00
|
|
|
|
|
|
|
/* Same as above, but returns scale factor to apply to multiple intersection distances */
|
|
|
|
|
|
|
|
ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t_fac, Transform *tfm)
|
|
|
|
{
|
|
|
|
*t_fac = len(transform_direction(tfm, 1.0f/(*idir)));
|
|
|
|
|
|
|
|
*P = ray->P;
|
|
|
|
*dir = bvh_clamp_direction(ray->D);
|
|
|
|
*idir = bvh_inverse_direction(*dir);
|
|
|
|
}
|
|
|
|
|
2014-03-29 12:03:45 +00:00
|
|
|
#endif
|
|
|
|
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
/* TODO(sergey): This is only for until we've got OpenCL 2.0
|
|
|
|
* on all devices we consider supported. It'll be replaced with
|
|
|
|
* generic address space.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef __KERNEL_OPENCL__
|
|
|
|
ccl_device_inline void object_dir_transform_addrspace(KernelGlobals *kg,
|
|
|
|
const ShaderData *sd,
|
|
|
|
ccl_addr_space float3 *D)
|
|
|
|
{
|
|
|
|
float3 private_D = *D;
|
|
|
|
object_dir_transform(kg, sd, &private_D);
|
|
|
|
*D = private_D;
|
|
|
|
}
|
|
|
|
|
|
|
|
ccl_device_inline void object_normal_transform_addrspace(KernelGlobals *kg,
|
|
|
|
const ShaderData *sd,
|
|
|
|
ccl_addr_space float3 *N)
|
|
|
|
{
|
|
|
|
float3 private_N = *N;
|
2015-05-09 19:53:32 +00:00
|
|
|
object_normal_transform(kg, sd, &private_N);
|
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
2015-05-09 14:34:30 +00:00
|
|
|
*N = private_N;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __KERNEL_OPENCL__
|
|
|
|
# define object_dir_transform_auto object_dir_transform
|
|
|
|
# define object_normal_transform_auto object_normal_transform
|
|
|
|
#else
|
|
|
|
# define object_dir_transform_auto object_dir_transform_addrspace
|
|
|
|
# define object_normal_transform_auto object_normal_transform_addrspace
|
|
|
|
#endif
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
CCL_NAMESPACE_END
|
|
|
|
|