diff --git a/CMakeLists.txt b/CMakeLists.txt index b6fb6dbd9dc..c95b8f0f7af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -370,8 +370,8 @@ if(WITH_PYTHON_INSTALL) endif() endif() -option(WITH_CPU_SSE "Enable SIMD instruction if they're detected on the host machine" ON) -mark_as_advanced(WITH_CPU_SSE) +option(WITH_CPU_SIMD "Enable SIMD instruction if they're detected on the host machine" ON) +mark_as_advanced(WITH_CPU_SIMD) # Cycles option(WITH_CYCLES "Enable Cycles Render Engine" ON) @@ -775,14 +775,6 @@ if(WITH_GHOST_SDL OR WITH_HEADLESS) set(WITH_XR_OPENXR OFF) endif() -if(WITH_CPU_SSE) - TEST_SSE_SUPPORT(COMPILER_SSE_FLAG COMPILER_SSE2_FLAG) -else() - message(STATUS "SSE and SSE2 optimizations are DISABLED!") - set(COMPILER_SSE_FLAG) - set(COMPILER_SSE2_FLAG) -endif() - if(WITH_BUILDINFO) find_package(Git) if(NOT GIT_FOUND) @@ -962,22 +954,55 @@ if(WITH_INTERNATIONAL) endif() endif() -# See TEST_SSE_SUPPORT() for how this is defined. +# See TEST_SSE_SUPPORT() and TEST_NEON_SUPPORT() for how these are defined. +# +# This is done globally, so that all modules can use it if available, and +# because these are used in headers used by many modules. +if(WITH_CPU_SIMD) + set(COMPILER_SSE_FLAG) + set(COMPILER_SSE2_FLAG) -# Do it globally, SSE2 is required for quite some time now. -# Doing it now allows to use SSE/SSE2 in inline headers. -if(SUPPORT_SSE_BUILD) - string(PREPEND PLATFORM_CFLAGS "${COMPILER_SSE_FLAG} ") - add_definitions(-D__SSE__ -D__MMX__) -endif() -if(SUPPORT_SSE2_BUILD) - string(APPEND PLATFORM_CFLAGS " ${COMPILER_SSE2_FLAG}") - add_definitions(-D__SSE2__) - if(NOT SUPPORT_SSE_BUILD) # don't double up - add_definitions(-D__MMX__) + # Test Neon first since macOS Arm can compile and run x86-64 SSE binaries. + TEST_NEON_SUPPORT() + if(SUPPORT_NEON_BUILD) + # Neon + if(SSE2NEON_FOUND) + blender_include_dirs_sys("${SSE2NEON_INCLUDE_DIRS}") + add_definitions(-DWITH_SSE2NEON) + endif() + else() + # SSE + TEST_SSE_SUPPORT(COMPILER_SSE_FLAG COMPILER_SSE2_FLAG) + if(SUPPORT_SSE_BUILD) + string(PREPEND PLATFORM_CFLAGS "${COMPILER_SSE_FLAG} ") + add_definitions(-D__SSE__ -D__MMX__) + endif() + if(SUPPORT_SSE2_BUILD) + string(APPEND PLATFORM_CFLAGS " ${COMPILER_SSE2_FLAG}") + add_definitions(-D__SSE2__) + if(NOT SUPPORT_SSE_BUILD) # don't double up + add_definitions(-D__MMX__) + endif() + endif() endif() -endif() + # Print instructions used + if(SUPPORT_NEON_BUILD) + if(SSE2NEON_FOUND) + message(STATUS "Neon SIMD instructions enabled") + else() + message(STATUS "Neon SIMD instructions detected but unused, requires sse2neon") + endif() + elseif(SUPPORT_SSE2_BUILD) + message(STATUS "SSE2 SIMD instructions enabled") + elseif(SUPPORT_SSE_BUILD) + message(STATUS "SSE SIMD instructions enabled") + else() + message(STATUS "No SIMD instructions detected") + endif() +else() + message(STATUS "SIMD instructions disabled") +endif() # set the endian define if(MSVC) diff --git a/build_files/cmake/macros.cmake b/build_files/cmake/macros.cmake index aebcd25e3b6..b8f92a10761 100644 --- a/build_files/cmake/macros.cmake +++ b/build_files/cmake/macros.cmake @@ -668,12 +668,6 @@ macro(TEST_SSE_SUPPORT #include int main(void) { __m128 v = _mm_setzero_ps(); return 0; }" SUPPORT_SSE_BUILD) - - if(SUPPORT_SSE_BUILD) - message(STATUS "SSE Support: detected.") - else() - message(STATUS "SSE Support: missing.") - endif() endif() if(NOT DEFINED SUPPORT_SSE2_BUILD) @@ -682,17 +676,19 @@ macro(TEST_SSE_SUPPORT #include int main(void) { __m128d v = _mm_setzero_pd(); return 0; }" SUPPORT_SSE2_BUILD) - - if(SUPPORT_SSE2_BUILD) - message(STATUS "SSE2 Support: detected.") - else() - message(STATUS "SSE2 Support: missing.") - endif() endif() unset(CMAKE_REQUIRED_FLAGS) endmacro() +macro(TEST_NEON_SUPPORT) + include(CheckCXXSourceCompiles) + check_cxx_source_compiles( + "#include + int main() {return vaddvq_s32(vdupq_n_s32(1));}" + SUPPORT_NEON_BUILD) +endmacro() + # Only print message if running CMake first time macro(message_first_run) if(FIRST_RUN) diff --git a/build_files/cmake/platform/platform_apple.cmake b/build_files/cmake/platform/platform_apple.cmake index 5203ba10863..e7b0097a137 100644 --- a/build_files/cmake/platform/platform_apple.cmake +++ b/build_files/cmake/platform/platform_apple.cmake @@ -321,8 +321,11 @@ if(WITH_OPENVDB) endif() if(WITH_NANOVDB) - set(NANOVDB ${LIBDIR}/nanovdb) - set(NANOVDB_INCLUDE_DIR ${NANOVDB}/include) + find_package(NanoVDB) +endif() + +if(WITH_CPU_SIMD) + find_package(sse2neon) endif() if(WITH_LLVM) diff --git a/build_files/cmake/platform/platform_unix.cmake b/build_files/cmake/platform/platform_unix.cmake index f212741f0b6..5d3f074bdda 100644 --- a/build_files/cmake/platform/platform_unix.cmake +++ b/build_files/cmake/platform/platform_unix.cmake @@ -284,6 +284,10 @@ if(WITH_NANOVDB) endif() endif() +if(WITH_CPU_SIMD) + find_package_wrapper(sse2neon) +endif() + if(WITH_ALEMBIC) find_package_wrapper(Alembic) diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 2a28d905144..b01bf1bd1e2 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -64,7 +64,7 @@ if(WITH_CYCLES_NATIVE_ONLY) endif() set(CYCLES_KERNEL_FLAGS "${MSVC_NATIVE_ARCH_FLAGS}") endif() -elseif(NOT WITH_CPU_SSE) +elseif(NOT WITH_CPU_SIMD OR (SUPPORT_NEON_BUILD AND SSE2NEON_FOUND)) set(CXX_HAS_SSE FALSE) set(CXX_HAS_AVX FALSE) set(CXX_HAS_AVX2 FALSE) diff --git a/source/blender/blenlib/BLI_simd.h b/source/blender/blenlib/BLI_simd.h index 1518b6c1de2..2ebbd7a2250 100644 --- a/source/blender/blenlib/BLI_simd.h +++ b/source/blender/blenlib/BLI_simd.h @@ -22,7 +22,15 @@ * SIMD instruction support. */ -#if defined(__SSE2__) +#if defined(__ARM_NEON) && defined(WITH_SSE2NEON) +/* SSE/SSE2 emulation on ARM Neon. Match SSE precision. */ +# define SSE2NEON_PRECISE_MINMAX 1 +# define SSE2NEON_PRECISE_DIV 1 +# define SSE2NEON_PRECISE_SQRT 1 +# include +# define BLI_HAVE_SSE2 +#elif defined(__SSE2__) +/* Native SSE2 on Intel/AMD. */ # include # define BLI_HAVE_SSE2 #endif