forked from bartvdbraak/blender
Code refactor: split defines into separate header, changes to SSE type headers.
I need to use some macros defined in util_simd.h for float3/float4, to emulate SSE4 instructions on SSE2. But due to issues with order of header includes this was not possible, this does some refactoring to make it work. Differential Revision: https://developer.blender.org/D2764
This commit is contained in:
parent
5e4bad2c00
commit
a8cc0d707e
@ -48,6 +48,7 @@
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_optimization.h"
|
||||
#include "util/util_progress.h"
|
||||
#include "util/util_system.h"
|
||||
#include "util/util_thread.h"
|
||||
|
@ -233,6 +233,7 @@ set(SRC_FILTER_HEADERS
|
||||
set(SRC_UTIL_HEADERS
|
||||
../util/util_atomic.h
|
||||
../util/util_color.h
|
||||
../util/util_defines.h
|
||||
../util/util_half.h
|
||||
../util/util_hash.h
|
||||
../util/util_math.h
|
||||
|
@ -38,6 +38,7 @@ set(SRC_HEADERS
|
||||
util_atomic.h
|
||||
util_boundbox.h
|
||||
util_debug.h
|
||||
util_defines.h
|
||||
util_guarded_allocator.cpp
|
||||
util_foreach.h
|
||||
util_function.h
|
||||
|
134
intern/cycles/util/util_defines.h
Normal file
134
intern/cycles/util/util_defines.h
Normal file
@ -0,0 +1,134 @@
|
||||
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __UTIL_DEFINES_H__
|
||||
#define __UTIL_DEFINES_H__
|
||||
|
||||
/* Bitness */
|
||||
|
||||
#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
|
||||
# define __KERNEL_64_BIT__
|
||||
#endif
|
||||
|
||||
/* Qualifiers for kernel code shared by CPU and GPU */
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
# define ccl_device static inline
|
||||
# define ccl_device_noinline static
|
||||
# define ccl_global
|
||||
# define ccl_constant
|
||||
# define ccl_local
|
||||
# define ccl_local_param
|
||||
# define ccl_private
|
||||
# define ccl_restrict __restrict
|
||||
# define __KERNEL_WITH_SSE_ALIGN__
|
||||
|
||||
# if defined(_WIN32) && !defined(FREE_WINDOWS)
|
||||
# define ccl_device_inline static __forceinline
|
||||
# define ccl_device_forceinline static __forceinline
|
||||
# define ccl_align(...) __declspec(align(__VA_ARGS__))
|
||||
# ifdef __KERNEL_64_BIT__
|
||||
# define ccl_try_align(...) __declspec(align(__VA_ARGS__))
|
||||
# else /* __KERNEL_64_BIT__ */
|
||||
# undef __KERNEL_WITH_SSE_ALIGN__
|
||||
/* No support for function arguments (error C2719). */
|
||||
# define ccl_try_align(...)
|
||||
# endif /* __KERNEL_64_BIT__ */
|
||||
# define ccl_may_alias
|
||||
# define ccl_always_inline __forceinline
|
||||
# define ccl_never_inline __declspec(noinline)
|
||||
# define ccl_maybe_unused
|
||||
# else /* _WIN32 && !FREE_WINDOWS */
|
||||
# define ccl_device_inline static inline __attribute__((always_inline))
|
||||
# define ccl_device_forceinline static inline __attribute__((always_inline))
|
||||
# define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
|
||||
# ifndef FREE_WINDOWS64
|
||||
# define __forceinline inline __attribute__((always_inline))
|
||||
# endif
|
||||
# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
|
||||
# define ccl_may_alias __attribute__((__may_alias__))
|
||||
# define ccl_always_inline __attribute__((always_inline))
|
||||
# define ccl_never_inline __attribute__((noinline))
|
||||
# define ccl_maybe_unused __attribute__((used))
|
||||
# endif /* _WIN32 && !FREE_WINDOWS */
|
||||
|
||||
/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
|
||||
# define ATTR_FALLTHROUGH __attribute__((fallthrough))
|
||||
# else
|
||||
# define ATTR_FALLTHROUGH ((void)0)
|
||||
# endif
|
||||
#endif /* __KERNEL_GPU__ */
|
||||
|
||||
/* macros */
|
||||
|
||||
/* hints for branch prediction, only use in code that runs a _lot_ */
|
||||
#if defined(__GNUC__) && defined(__KERNEL_CPU__)
|
||||
# define LIKELY(x) __builtin_expect(!!(x), 1)
|
||||
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||
#else
|
||||
# define LIKELY(x) (x)
|
||||
# define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
|
||||
# define HAS_CPP11_FEATURES
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
# if defined(HAS_CPP11_FEATURES)
|
||||
/* Some magic to be sure we don't have reference in the type. */
|
||||
template<typename T> static inline T decltype_helper(T x) { return x; }
|
||||
# define TYPEOF(x) decltype(decltype_helper(x))
|
||||
# else
|
||||
# define TYPEOF(x) typeof(x)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Causes warning:
|
||||
* incompatible types when assigning to type 'Foo' from type 'Bar'
|
||||
* ... the compiler optimizes away the temp var */
|
||||
#ifdef __GNUC__
|
||||
#define CHECK_TYPE(var, type) { \
|
||||
TYPEOF(var) *__tmp; \
|
||||
__tmp = (type *)NULL; \
|
||||
(void)__tmp; \
|
||||
} (void)0
|
||||
|
||||
#define CHECK_TYPE_PAIR(var_a, var_b) { \
|
||||
TYPEOF(var_a) *__tmp; \
|
||||
__tmp = (typeof(var_b) *)NULL; \
|
||||
(void)__tmp; \
|
||||
} (void)0
|
||||
#else
|
||||
# define CHECK_TYPE(var, type)
|
||||
# define CHECK_TYPE_PAIR(var_a, var_b)
|
||||
#endif
|
||||
|
||||
/* can be used in simple macros */
|
||||
#define CHECK_TYPE_INLINE(val, type) \
|
||||
((void)(((type)0) != (val)))
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
# include <cassert>
|
||||
# define util_assert(statement) assert(statement)
|
||||
#else
|
||||
# define util_assert(statement)
|
||||
#endif
|
||||
|
||||
#endif /* __UTIL_DEFINES_H__ */
|
||||
|
@ -19,16 +19,6 @@
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
|
||||
/* quiet unused define warnings */
|
||||
#if defined(__KERNEL_SSE2__) || \
|
||||
defined(__KERNEL_SSE3__) || \
|
||||
defined(__KERNEL_SSSE3__) || \
|
||||
defined(__KERNEL_SSE41__) || \
|
||||
defined(__KERNEL_AVX__) || \
|
||||
defined(__KERNEL_AVX2__)
|
||||
/* do nothing */
|
||||
#endif
|
||||
|
||||
/* x86
|
||||
*
|
||||
* Compile a regular, SSE2 and SSE3 kernel. */
|
||||
@ -73,48 +63,6 @@
|
||||
|
||||
#endif /* defined(__x86_64__) || defined(_M_X64) */
|
||||
|
||||
/* SSE Experiment
|
||||
*
|
||||
* This is disabled code for an experiment to use SSE types globally for types
|
||||
* such as float3 and float4. Currently this gives an overall slowdown. */
|
||||
|
||||
#if 0
|
||||
# define __KERNEL_SSE__
|
||||
# ifndef __KERNEL_SSE2__
|
||||
# define __KERNEL_SSE2__
|
||||
# endif
|
||||
# ifndef __KERNEL_SSE3__
|
||||
# define __KERNEL_SSE3__
|
||||
# endif
|
||||
# ifndef __KERNEL_SSSE3__
|
||||
# define __KERNEL_SSSE3__
|
||||
# endif
|
||||
# ifndef __KERNEL_SSE4__
|
||||
# define __KERNEL_SSE4__
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* SSE Intrinsics includes
|
||||
*
|
||||
* We assume __KERNEL_SSEX__ flags to have been defined at this point */
|
||||
|
||||
/* SSE intrinsics headers */
|
||||
#ifndef FREE_WINDOWS64
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
#elif (defined(__x86_64__) || defined(__i386__))
|
||||
# include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
|
||||
* Since we can't avoid including <windows.h>, better only include that */
|
||||
#include "util/util_windows.h"
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* __UTIL_OPTIMIZATION_H__ */
|
||||
|
@ -18,19 +18,38 @@
|
||||
#ifndef __UTIL_SIMD_TYPES_H__
|
||||
#define __UTIL_SIMD_TYPES_H__
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_types.h"
|
||||
#include "util/util_defines.h"
|
||||
|
||||
/* SSE Intrinsics includes
|
||||
*
|
||||
* We assume __KERNEL_SSEX__ flags to have been defined at this point */
|
||||
|
||||
/* SSE intrinsics headers */
|
||||
#ifndef FREE_WINDOWS64
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
#elif (defined(__x86_64__) || defined(__i386__))
|
||||
# include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
|
||||
* Since we can't avoid including <windows.h>, better only include that */
|
||||
#include "util/util_windows.h"
|
||||
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __KERNEL_SSE2__
|
||||
|
||||
struct sseb;
|
||||
struct ssei;
|
||||
struct ssef;
|
||||
|
||||
extern const __m128 _mm_lookupmask_ps[16];
|
||||
|
||||
/* Special Types */
|
||||
@ -496,13 +515,19 @@ ccl_device_inline int bitscan(int value)
|
||||
|
||||
#endif /* __KERNEL_SSE2__ */
|
||||
|
||||
/* quiet unused define warnings */
|
||||
#if defined(__KERNEL_SSE2__) || \
|
||||
defined(__KERNEL_SSE3__) || \
|
||||
defined(__KERNEL_SSSE3__) || \
|
||||
defined(__KERNEL_SSE41__) || \
|
||||
defined(__KERNEL_AVX__) || \
|
||||
defined(__KERNEL_AVX2__)
|
||||
/* do nothing */
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#include "util/util_math.h"
|
||||
#include "util/util_sseb.h"
|
||||
#include "util/util_ssei.h"
|
||||
#include "util/util_ssef.h"
|
||||
#include "util/util_avxf.h"
|
||||
#endif /* __KERNEL_GPU__ */
|
||||
|
||||
#endif /* __UTIL_SIMD_TYPES_H__ */
|
||||
|
||||
|
@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __KERNEL_SSE2__
|
||||
|
||||
struct ssei;
|
||||
struct ssef;
|
||||
|
||||
/*! 4-wide SSE bool type. */
|
||||
struct sseb
|
||||
{
|
||||
|
@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __KERNEL_SSE2__
|
||||
|
||||
struct sseb;
|
||||
struct ssef;
|
||||
|
||||
/*! 4-wide SSE float type. */
|
||||
struct ssef
|
||||
{
|
||||
|
@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __KERNEL_SSE2__
|
||||
|
||||
struct sseb;
|
||||
struct ssef;
|
||||
|
||||
/*! 4-wide SSE integer type. */
|
||||
struct ssei
|
||||
{
|
||||
@ -234,8 +237,10 @@ __forceinline size_t select_max(const sseb& valid, const ssei& v) { const ssei a
|
||||
|
||||
#else
|
||||
|
||||
__forceinline int reduce_min(const ssei& v) { return min(min(v[0],v[1]),min(v[2],v[3])); }
|
||||
__forceinline int reduce_max(const ssei& v) { return max(max(v[0],v[1]),max(v[2],v[3])); }
|
||||
__forceinline int ssei_min(int a, int b) { return (a < b)? a: b; }
|
||||
__forceinline int ssei_max(int a, int b) { return (a > b)? a: b; }
|
||||
__forceinline int reduce_min(const ssei& v) { return ssei_min(ssei_min(v[0],v[1]),ssei_min(v[2],v[3])); }
|
||||
__forceinline int reduce_max(const ssei& v) { return ssei_max(ssei_max(v[0],v[1]),ssei_max(v[2],v[3])); }
|
||||
__forceinline int reduce_add(const ssei& v) { return v[0]+v[1]+v[2]+v[3]; }
|
||||
|
||||
#endif
|
||||
|
@ -21,72 +21,17 @@
|
||||
# include <stdlib.h>
|
||||
#endif
|
||||
|
||||
/* Bitness */
|
||||
|
||||
#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
|
||||
# define __KERNEL_64_BIT__
|
||||
#endif
|
||||
|
||||
/* Qualifiers for kernel code shared by CPU and GPU */
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
# define ccl_device static inline
|
||||
# define ccl_device_noinline static
|
||||
# define ccl_global
|
||||
# define ccl_constant
|
||||
# define ccl_local
|
||||
# define ccl_local_param
|
||||
# define ccl_private
|
||||
# define ccl_restrict __restrict
|
||||
# define __KERNEL_WITH_SSE_ALIGN__
|
||||
|
||||
# if defined(_WIN32) && !defined(FREE_WINDOWS)
|
||||
# define ccl_device_inline static __forceinline
|
||||
# define ccl_device_forceinline static __forceinline
|
||||
# define ccl_align(...) __declspec(align(__VA_ARGS__))
|
||||
# ifdef __KERNEL_64_BIT__
|
||||
# define ccl_try_align(...) __declspec(align(__VA_ARGS__))
|
||||
# else /* __KERNEL_64_BIT__ */
|
||||
# undef __KERNEL_WITH_SSE_ALIGN__
|
||||
/* No support for function arguments (error C2719). */
|
||||
# define ccl_try_align(...)
|
||||
# endif /* __KERNEL_64_BIT__ */
|
||||
# define ccl_may_alias
|
||||
# define ccl_always_inline __forceinline
|
||||
# define ccl_never_inline __declspec(noinline)
|
||||
# define ccl_maybe_unused
|
||||
# else /* _WIN32 && !FREE_WINDOWS */
|
||||
# define ccl_device_inline static inline __attribute__((always_inline))
|
||||
# define ccl_device_forceinline static inline __attribute__((always_inline))
|
||||
# define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
|
||||
# ifndef FREE_WINDOWS64
|
||||
# define __forceinline inline __attribute__((always_inline))
|
||||
# endif
|
||||
# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
|
||||
# define ccl_may_alias __attribute__((__may_alias__))
|
||||
# define ccl_always_inline __attribute__((always_inline))
|
||||
# define ccl_never_inline __attribute__((noinline))
|
||||
# define ccl_maybe_unused __attribute__((used))
|
||||
# endif /* _WIN32 && !FREE_WINDOWS */
|
||||
|
||||
/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
|
||||
# define ATTR_FALLTHROUGH __attribute__((fallthrough))
|
||||
# else
|
||||
# define ATTR_FALLTHROUGH ((void)0)
|
||||
# endif
|
||||
#endif /* __KERNEL_GPU__ */
|
||||
|
||||
/* Standard Integer Types */
|
||||
|
||||
#if !defined(__KERNEL_GPU__) && !defined(_WIN32)
|
||||
# include <stdint.h>
|
||||
#endif
|
||||
|
||||
#include "util/util_defines.h"
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
/* int8_t, uint16_t, and friends */
|
||||
# ifndef _WIN32
|
||||
# include <stdint.h>
|
||||
# endif
|
||||
/* SIMD Types */
|
||||
# include "util/util_optimization.h"
|
||||
#endif /* __KERNEL_GPU__ */
|
||||
# include "util/util_simd.h"
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@ -201,65 +146,8 @@ enum ExtensionType {
|
||||
EXTENSION_NUM_TYPES,
|
||||
};
|
||||
|
||||
/* macros */
|
||||
|
||||
/* hints for branch prediction, only use in code that runs a _lot_ */
|
||||
#if defined(__GNUC__) && defined(__KERNEL_CPU__)
|
||||
# define LIKELY(x) __builtin_expect(!!(x), 1)
|
||||
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||
#else
|
||||
# define LIKELY(x) (x)
|
||||
# define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
|
||||
# define HAS_CPP11_FEATURES
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
# if defined(HAS_CPP11_FEATURES)
|
||||
/* Some magic to be sure we don't have reference in the type. */
|
||||
template<typename T> static inline T decltype_helper(T x) { return x; }
|
||||
# define TYPEOF(x) decltype(decltype_helper(x))
|
||||
# else
|
||||
# define TYPEOF(x) typeof(x)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Causes warning:
|
||||
* incompatible types when assigning to type 'Foo' from type 'Bar'
|
||||
* ... the compiler optimizes away the temp var */
|
||||
#ifdef __GNUC__
|
||||
#define CHECK_TYPE(var, type) { \
|
||||
TYPEOF(var) *__tmp; \
|
||||
__tmp = (type *)NULL; \
|
||||
(void)__tmp; \
|
||||
} (void)0
|
||||
|
||||
#define CHECK_TYPE_PAIR(var_a, var_b) { \
|
||||
TYPEOF(var_a) *__tmp; \
|
||||
__tmp = (typeof(var_b) *)NULL; \
|
||||
(void)__tmp; \
|
||||
} (void)0
|
||||
#else
|
||||
# define CHECK_TYPE(var, type)
|
||||
# define CHECK_TYPE_PAIR(var_a, var_b)
|
||||
#endif
|
||||
|
||||
/* can be used in simple macros */
|
||||
#define CHECK_TYPE_INLINE(val, type) \
|
||||
((void)(((type)0) != (val)))
|
||||
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
# include <cassert>
|
||||
# define util_assert(statement) assert(statement)
|
||||
#else
|
||||
# define util_assert(statement)
|
||||
#endif
|
||||
|
||||
/* Vectorized types declaration. */
|
||||
#include "util/util_types_uchar2.h"
|
||||
#include "util/util_types_uchar3.h"
|
||||
@ -298,5 +186,13 @@ CCL_NAMESPACE_END
|
||||
|
||||
#include "util/util_types_vector3_impl.h"
|
||||
|
||||
/* SSE types. */
|
||||
#ifndef __KERNEL_GPU__
|
||||
# include "util/util_sseb.h"
|
||||
# include "util/util_ssei.h"
|
||||
# include "util/util_ssef.h"
|
||||
# include "util/util_avxf.h"
|
||||
#endif
|
||||
|
||||
#endif /* __UTIL_TYPES_H__ */
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user