BLI: use sse2neon to emulate SSE instructions with Arm Neon
* WITH_CPU_SSE was renamed to WITH_CPU_SIMD, and now covers both SSE and Neon. * For macOS sse2neon.h is included as part of the precompiled libraries. * For Linux it is enabled if the sse2neon.h header file is detected. However this library does not have official releases and is not shipped with any Linux distribution, so manual installation and configuration is required to get this working. Ref D8237, T78710
This commit is contained in:
parent
859118d8f6
commit
db28411fd9
@ -370,8 +370,8 @@ if(WITH_PYTHON_INSTALL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(WITH_CPU_SSE "Enable SIMD instruction if they're detected on the host machine" ON)
|
||||
mark_as_advanced(WITH_CPU_SSE)
|
||||
option(WITH_CPU_SIMD "Enable SIMD instruction if they're detected on the host machine" ON)
|
||||
mark_as_advanced(WITH_CPU_SIMD)
|
||||
|
||||
# Cycles
|
||||
option(WITH_CYCLES "Enable Cycles Render Engine" ON)
|
||||
@ -775,14 +775,6 @@ if(WITH_GHOST_SDL OR WITH_HEADLESS)
|
||||
set(WITH_XR_OPENXR OFF)
|
||||
endif()
|
||||
|
||||
if(WITH_CPU_SSE)
|
||||
TEST_SSE_SUPPORT(COMPILER_SSE_FLAG COMPILER_SSE2_FLAG)
|
||||
else()
|
||||
message(STATUS "SSE and SSE2 optimizations are DISABLED!")
|
||||
set(COMPILER_SSE_FLAG)
|
||||
set(COMPILER_SSE2_FLAG)
|
||||
endif()
|
||||
|
||||
if(WITH_BUILDINFO)
|
||||
find_package(Git)
|
||||
if(NOT GIT_FOUND)
|
||||
@ -962,22 +954,55 @@ if(WITH_INTERNATIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# See TEST_SSE_SUPPORT() for how this is defined.
|
||||
# See TEST_SSE_SUPPORT() and TEST_NEON_SUPPORT() for how these are defined.
|
||||
#
|
||||
# This is done globally, so that all modules can use it if available, and
|
||||
# because these are used in headers used by many modules.
|
||||
if(WITH_CPU_SIMD)
|
||||
set(COMPILER_SSE_FLAG)
|
||||
set(COMPILER_SSE2_FLAG)
|
||||
|
||||
# Do it globally, SSE2 is required for quite some time now.
|
||||
# Doing it now allows to use SSE/SSE2 in inline headers.
|
||||
if(SUPPORT_SSE_BUILD)
|
||||
string(PREPEND PLATFORM_CFLAGS "${COMPILER_SSE_FLAG} ")
|
||||
add_definitions(-D__SSE__ -D__MMX__)
|
||||
endif()
|
||||
if(SUPPORT_SSE2_BUILD)
|
||||
string(APPEND PLATFORM_CFLAGS " ${COMPILER_SSE2_FLAG}")
|
||||
add_definitions(-D__SSE2__)
|
||||
if(NOT SUPPORT_SSE_BUILD) # don't double up
|
||||
add_definitions(-D__MMX__)
|
||||
# Test Neon first since macOS Arm can compile and run x86-64 SSE binaries.
|
||||
TEST_NEON_SUPPORT()
|
||||
if(SUPPORT_NEON_BUILD)
|
||||
# Neon
|
||||
if(SSE2NEON_FOUND)
|
||||
blender_include_dirs_sys("${SSE2NEON_INCLUDE_DIRS}")
|
||||
add_definitions(-DWITH_SSE2NEON)
|
||||
endif()
|
||||
else()
|
||||
# SSE
|
||||
TEST_SSE_SUPPORT(COMPILER_SSE_FLAG COMPILER_SSE2_FLAG)
|
||||
if(SUPPORT_SSE_BUILD)
|
||||
string(PREPEND PLATFORM_CFLAGS "${COMPILER_SSE_FLAG} ")
|
||||
add_definitions(-D__SSE__ -D__MMX__)
|
||||
endif()
|
||||
if(SUPPORT_SSE2_BUILD)
|
||||
string(APPEND PLATFORM_CFLAGS " ${COMPILER_SSE2_FLAG}")
|
||||
add_definitions(-D__SSE2__)
|
||||
if(NOT SUPPORT_SSE_BUILD) # don't double up
|
||||
add_definitions(-D__MMX__)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Print instructions used
|
||||
if(SUPPORT_NEON_BUILD)
|
||||
if(SSE2NEON_FOUND)
|
||||
message(STATUS "Neon SIMD instructions enabled")
|
||||
else()
|
||||
message(STATUS "Neon SIMD instructions detected but unused, requires sse2neon")
|
||||
endif()
|
||||
elseif(SUPPORT_SSE2_BUILD)
|
||||
message(STATUS "SSE2 SIMD instructions enabled")
|
||||
elseif(SUPPORT_SSE_BUILD)
|
||||
message(STATUS "SSE SIMD instructions enabled")
|
||||
else()
|
||||
message(STATUS "No SIMD instructions detected")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "SIMD instructions disabled")
|
||||
endif()
|
||||
|
||||
# set the endian define
|
||||
if(MSVC)
|
||||
|
@ -668,12 +668,6 @@ macro(TEST_SSE_SUPPORT
|
||||
#include <xmmintrin.h>
|
||||
int main(void) { __m128 v = _mm_setzero_ps(); return 0; }"
|
||||
SUPPORT_SSE_BUILD)
|
||||
|
||||
if(SUPPORT_SSE_BUILD)
|
||||
message(STATUS "SSE Support: detected.")
|
||||
else()
|
||||
message(STATUS "SSE Support: missing.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED SUPPORT_SSE2_BUILD)
|
||||
@ -682,17 +676,19 @@ macro(TEST_SSE_SUPPORT
|
||||
#include <emmintrin.h>
|
||||
int main(void) { __m128d v = _mm_setzero_pd(); return 0; }"
|
||||
SUPPORT_SSE2_BUILD)
|
||||
|
||||
if(SUPPORT_SSE2_BUILD)
|
||||
message(STATUS "SSE2 Support: detected.")
|
||||
else()
|
||||
message(STATUS "SSE2 Support: missing.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
unset(CMAKE_REQUIRED_FLAGS)
|
||||
endmacro()
|
||||
|
||||
macro(TEST_NEON_SUPPORT)
|
||||
include(CheckCXXSourceCompiles)
|
||||
check_cxx_source_compiles(
|
||||
"#include <arm_neon.h>
|
||||
int main() {return vaddvq_s32(vdupq_n_s32(1));}"
|
||||
SUPPORT_NEON_BUILD)
|
||||
endmacro()
|
||||
|
||||
# Only print message if running CMake first time
|
||||
macro(message_first_run)
|
||||
if(FIRST_RUN)
|
||||
|
@ -321,8 +321,11 @@ if(WITH_OPENVDB)
|
||||
endif()
|
||||
|
||||
if(WITH_NANOVDB)
|
||||
set(NANOVDB ${LIBDIR}/nanovdb)
|
||||
set(NANOVDB_INCLUDE_DIR ${NANOVDB}/include)
|
||||
find_package(NanoVDB)
|
||||
endif()
|
||||
|
||||
if(WITH_CPU_SIMD)
|
||||
find_package(sse2neon)
|
||||
endif()
|
||||
|
||||
if(WITH_LLVM)
|
||||
|
@ -284,6 +284,10 @@ if(WITH_NANOVDB)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CPU_SIMD)
|
||||
find_package_wrapper(sse2neon)
|
||||
endif()
|
||||
|
||||
if(WITH_ALEMBIC)
|
||||
find_package_wrapper(Alembic)
|
||||
|
||||
|
@ -64,7 +64,7 @@ if(WITH_CYCLES_NATIVE_ONLY)
|
||||
endif()
|
||||
set(CYCLES_KERNEL_FLAGS "${MSVC_NATIVE_ARCH_FLAGS}")
|
||||
endif()
|
||||
elseif(NOT WITH_CPU_SSE)
|
||||
elseif(NOT WITH_CPU_SIMD OR (SUPPORT_NEON_BUILD AND SSE2NEON_FOUND))
|
||||
set(CXX_HAS_SSE FALSE)
|
||||
set(CXX_HAS_AVX FALSE)
|
||||
set(CXX_HAS_AVX2 FALSE)
|
||||
|
@ -22,7 +22,15 @@
|
||||
* SIMD instruction support.
|
||||
*/
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#if defined(__ARM_NEON) && defined(WITH_SSE2NEON)
|
||||
/* SSE/SSE2 emulation on ARM Neon. Match SSE precision. */
|
||||
# define SSE2NEON_PRECISE_MINMAX 1
|
||||
# define SSE2NEON_PRECISE_DIV 1
|
||||
# define SSE2NEON_PRECISE_SQRT 1
|
||||
# include <sse2neon.h>
|
||||
# define BLI_HAVE_SSE2
|
||||
#elif defined(__SSE2__)
|
||||
/* Native SSE2 on Intel/AMD. */
|
||||
# include <emmintrin.h>
|
||||
# define BLI_HAVE_SSE2
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user