forked from bartvdbraak/blender
Fix T75895: Unable to Compile Cycles on NAVI/Linux
This patch will add some compiler hints to break unrolling in the nestled for loops of the voronoi node. Reviewed by: Brecht van Lommel Differential Revision: https://developer.blender.org/D7574
This commit is contained in:
parent
36bf067ddc
commit
6121c28501
@ -71,6 +71,7 @@ __device__ half __float2half(const float f)
|
||||
#define ccl_may_alias
|
||||
#define ccl_addr_space
|
||||
#define ccl_restrict __restrict__
|
||||
#define ccl_loop_no_unroll
|
||||
/* TODO(sergey): In theory we might use references with CUDA, however
|
||||
* performance impact yet to be investigated.
|
||||
*/
|
||||
|
@ -43,6 +43,7 @@
|
||||
#define ccl_local __local
|
||||
#define ccl_local_param __local
|
||||
#define ccl_private __private
|
||||
#define ccl_loop_no_unroll __attribute__((opencl_unroll_hint(1)))
|
||||
#define ccl_restrict restrict
|
||||
#define ccl_ref
|
||||
#define ccl_align(n) __attribute__((aligned(n)))
|
||||
|
@ -70,6 +70,7 @@ __device__ half __float2half(const float f)
|
||||
#define ccl_private
|
||||
#define ccl_may_alias
|
||||
#define ccl_addr_space
|
||||
#define ccl_loop_no_unroll
|
||||
#define ccl_restrict __restrict__
|
||||
#define ccl_ref
|
||||
#define ccl_align(n) __align__(n)
|
||||
|
@ -684,7 +684,8 @@ ccl_device void voronoi_f1_4d(float4 coord,
|
||||
float4 targetPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
for (int u = -1; u <= 1; u++) {
|
||||
for (int k = -1; k <= 1; k++) {
|
||||
for (int j = -1; j <= 1; j++) {
|
||||
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
|
||||
{
|
||||
for (int i = -1; i <= 1; i++) {
|
||||
float4 cellOffset = make_float4(i, j, k, u);
|
||||
float4 pointPosition = cellOffset +
|
||||
@ -722,7 +723,8 @@ ccl_device void voronoi_smooth_f1_4d(float4 coord,
|
||||
float4 smoothPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
for (int u = -2; u <= 2; u++) {
|
||||
for (int k = -2; k <= 2; k++) {
|
||||
for (int j = -2; j <= 2; j++) {
|
||||
ccl_loop_no_unroll for (int j = -2; j <= 2; j++)
|
||||
{
|
||||
for (int i = -2; i <= 2; i++) {
|
||||
float4 cellOffset = make_float4(i, j, k, u);
|
||||
float4 pointPosition = cellOffset +
|
||||
@ -765,7 +767,8 @@ ccl_device void voronoi_f2_4d(float4 coord,
|
||||
float4 positionF2 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
for (int u = -1; u <= 1; u++) {
|
||||
for (int k = -1; k <= 1; k++) {
|
||||
for (int j = -1; j <= 1; j++) {
|
||||
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
|
||||
{
|
||||
for (int i = -1; i <= 1; i++) {
|
||||
float4 cellOffset = make_float4(i, j, k, u);
|
||||
float4 pointPosition = cellOffset +
|
||||
@ -803,7 +806,8 @@ ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, floa
|
||||
float minDistance = 8.0f;
|
||||
for (int u = -1; u <= 1; u++) {
|
||||
for (int k = -1; k <= 1; k++) {
|
||||
for (int j = -1; j <= 1; j++) {
|
||||
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
|
||||
{
|
||||
for (int i = -1; i <= 1; i++) {
|
||||
float4 cellOffset = make_float4(i, j, k, u);
|
||||
float4 vectorToPoint = cellOffset +
|
||||
@ -822,7 +826,8 @@ ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, floa
|
||||
minDistance = 8.0f;
|
||||
for (int u = -1; u <= 1; u++) {
|
||||
for (int k = -1; k <= 1; k++) {
|
||||
for (int j = -1; j <= 1; j++) {
|
||||
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
|
||||
{
|
||||
for (int i = -1; i <= 1; i++) {
|
||||
float4 cellOffset = make_float4(i, j, k, u);
|
||||
float4 vectorToPoint = cellOffset +
|
||||
@ -851,7 +856,8 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
|
||||
float minDistance = 8.0f;
|
||||
for (int u = -1; u <= 1; u++) {
|
||||
for (int k = -1; k <= 1; k++) {
|
||||
for (int j = -1; j <= 1; j++) {
|
||||
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
|
||||
{
|
||||
for (int i = -1; i <= 1; i++) {
|
||||
float4 cellOffset = make_float4(i, j, k, u);
|
||||
float4 pointPosition = cellOffset +
|
||||
@ -871,7 +877,8 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
|
||||
float4 closestPointToClosestPoint = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
for (int u = -1; u <= 1; u++) {
|
||||
for (int k = -1; k <= 1; k++) {
|
||||
for (int j = -1; j <= 1; j++) {
|
||||
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
|
||||
{
|
||||
for (int i = -1; i <= 1; i++) {
|
||||
if (i == 0 && j == 0 && k == 0 && u == 0) {
|
||||
continue;
|
||||
|
@ -45,6 +45,7 @@
|
||||
# define ccl_restrict __restrict
|
||||
# define ccl_ref &
|
||||
# define ccl_optional_struct_init
|
||||
# define ccl_loop_no_unroll
|
||||
# define __KERNEL_WITH_SSE_ALIGN__
|
||||
|
||||
# if defined(_WIN32) && !defined(FREE_WINDOWS)
|
||||
|
Loading…
Reference in New Issue
Block a user