Simplify the design of vectorization support.
Remove the configured file variables, as that causes problems when using an installed version of VTK-m.
This commit is contained in:
parent
4ea567aee9
commit
bfb6c26a98
@ -18,9 +18,6 @@
|
|||||||
## this software.
|
## this software.
|
||||||
##============================================================================
|
##============================================================================
|
||||||
|
|
||||||
set(VTKm_Vectorization "none" CACHE STRING "Level of compiler vectorization support")
|
|
||||||
set_property(CACHE VTKm_Vectorization PROPERTY STRINGS none)
|
|
||||||
|
|
||||||
#Currently all we are going to build is a set of options that are possible
|
#Currently all we are going to build is a set of options that are possible
|
||||||
#based on the compiler. For now we are going on the presumption
|
#based on the compiler. For now we are going on the presumption
|
||||||
#that x86 architecture is the only target for vectorization and therefore
|
#that x86 architecture is the only target for vectorization and therefore
|
||||||
@ -32,16 +29,21 @@ set_property(CACHE VTKm_Vectorization PROPERTY STRINGS none)
|
|||||||
# Do not explicitly enable vectorization, but at the same don't explicitly disable
|
# Do not explicitly enable vectorization, but at the same don't explicitly disable
|
||||||
# vectorization.
|
# vectorization.
|
||||||
#
|
#
|
||||||
# 2. avx
|
# 2. native:
|
||||||
|
# Allow the compiler to use auto-detection based on the systems CPU to determine
|
||||||
|
# the highest level of vectorization support that is allowed. This means that
|
||||||
|
# libraries and executables built with this setting are non-portable.
|
||||||
|
#
|
||||||
|
# 3. avx
|
||||||
# Compile with just AVX enabled, no AVX2 or AVX512 vectorization will be used.
|
# Compile with just AVX enabled, no AVX2 or AVX512 vectorization will be used.
|
||||||
# This means that Sandy Bridge, Ivy Bridge, Haswell, and Skylake are supported,
|
# This means that Sandy Bridge, Ivy Bridge, Haswell, and Skylake are supported,
|
||||||
# but Haswell and newer will not use any AVX2 instructions
|
# but Haswell and newer will not use any AVX2 instructions
|
||||||
#
|
#
|
||||||
# 3. avx2
|
# 4. avx2
|
||||||
# Compile with AVX2/AVX enabled, no AVX512 vectorization will be used.
|
# Compile with AVX2/AVX enabled, no AVX512 vectorization will be used.
|
||||||
# This means that Sandy Bridge, and Ivy Bridge can not run the code.
|
# This means that Sandy Bridge, and Ivy Bridge can not run the code.
|
||||||
#
|
#
|
||||||
# 4. avx512
|
# 5. avx512
|
||||||
# Compile with AVX512/AVX2/AVX options enabled.
|
# Compile with AVX512/AVX2/AVX options enabled.
|
||||||
# This means that Sandy Bridge, Ivy Bridge, Haswell and can not run the code.
|
# This means that Sandy Bridge, Ivy Bridge, Haswell and can not run the code.
|
||||||
# Only XeonPhi Knights Landing and Skylake processors can run the code.
|
# Only XeonPhi Knights Landing and Skylake processors can run the code.
|
||||||
@ -58,66 +60,83 @@ set_property(CACHE VTKm_Vectorization PROPERTY STRINGS none)
|
|||||||
# include(clang.cmake)
|
# include(clang.cmake)
|
||||||
#
|
#
|
||||||
# This way we could also do compile warning flag detection at the same time
|
# This way we could also do compile warning flag detection at the same time
|
||||||
# We need to enable -Wno-pass-failed when using clang atleast to kill the
|
#
|
||||||
# amount of warnings we get
|
#
|
||||||
|
# Note: By default we use 'native' as the default option
|
||||||
|
#
|
||||||
|
#
|
||||||
|
set(vec_levels none native)
|
||||||
|
|
||||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||||
#for now we presume gcc > 4.6
|
#for now we presume gcc > 4.6
|
||||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx)
|
list(APPEND vec_levels avx)
|
||||||
|
|
||||||
#common flags for the avx instructions for the gcc compiler
|
#common flags for the avx instructions for the gcc compiler
|
||||||
|
set_property(GLOBAL PROPERTY VTKm_NATIVE_FLAGS -march=native)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx)
|
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
|
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
|
||||||
|
|
||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.7 OR
|
if (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.7 OR
|
||||||
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.7)
|
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.7)
|
||||||
#if GNU is less than 4.9 you get avx, avx2
|
#if GNU is less than 4.9 you get avx, avx2
|
||||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2)
|
list(APPEND vec_levels avx2)
|
||||||
elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
|
elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
|
||||||
#if GNU is less than 5.1 you get avx, avx2, and some avx512
|
#if GNU is less than 5.1 you get avx, avx2, and some avx512
|
||||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2 avx512)
|
list(APPEND vec_levels avx2 avx512)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512f -mavx512pf -mavx512er -mavx512cd)
|
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512f -mavx512pf -mavx512er -mavx512cd)
|
||||||
else()
|
else()
|
||||||
#if GNU is 5.1+ you get avx, avx2, and more avx512
|
#if GNU is 5.1+ you get avx, avx2, and more avx512
|
||||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2 avx512)
|
list(APPEND vec_levels avx2 avx512)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi)
|
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi)
|
||||||
endif()
|
endif()
|
||||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2 avx512)
|
list(APPEND vec_levels avx avx2 avx512)
|
||||||
|
set_property(GLOBAL PROPERTY VTKm_NATIVE_FLAGS -march=native)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx)
|
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
|
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512)
|
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512)
|
||||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||||
#While Clang support AVX512, no version of AppleClang has that support yet
|
#While Clang support AVX512, no version of AppleClang has that support yet
|
||||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2)
|
list(APPEND vec_levels avx avx2)
|
||||||
|
set_property(GLOBAL PROPERTY VTKm_NATIVE_FLAGS -march=native)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx)
|
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
|
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
|
||||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "PGI")
|
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "PGI")
|
||||||
#I can't find documentation to explicitly state the level of vectorization
|
#I can't find documentation to explicitly state the level of vectorization
|
||||||
#support I want from the PGI compiler
|
#support I want from the PGI compiler
|
||||||
|
#so for now we are going to do nothing
|
||||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||||
#Intel 15.X is the first version with avx512
|
#Intel 15.X is the first version with avx512
|
||||||
#Intel 16.X has way better vector generation compared to 15.X though
|
#Intel 16.X has way better vector generation compared to 15.X though
|
||||||
|
|
||||||
|
set_property(GLOBAL PROPERTY VTKm_NATIVE_FLAGS -xHost)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -xAVX)
|
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -xAVX)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -xCORE-AVX2)
|
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -xCORE-AVX2)
|
||||||
|
|
||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0)
|
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0)
|
||||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2)
|
list(APPEND vec_levels avx avx2)
|
||||||
else()
|
else()
|
||||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2 avx512)
|
list(APPEND vec_levels avx avx2 avx512)
|
||||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -xCORE-AVX512)
|
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -xCORE-AVX512)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
#
|
||||||
|
# Now that we have set up what levels the compiler lets setup the CMake option
|
||||||
|
# We use a combo box style property, so that ccmake and cmake-gui have a
|
||||||
|
# nice interface
|
||||||
|
#
|
||||||
|
set(VTKm_Vectorization "native" CACHE STRING "Level of compiler vectorization support")
|
||||||
|
set_property(CACHE VTKm_Vectorization PROPERTY STRINGS ${vec_levels})
|
||||||
|
|
||||||
#
|
#
|
||||||
# Now that we have set up the options, lets setup the compile flags that
|
# Now that we have set up the options, lets setup the compile flags that
|
||||||
# we are going to require.
|
# we are going to require.
|
||||||
#
|
#
|
||||||
#
|
|
||||||
set(flags)
|
set(flags)
|
||||||
if(VTKm_Vectorization STREQUAL "avx")
|
if(VTKm_Vectorization STREQUAL "native")
|
||||||
|
get_property(flags GLOBAL PROPERTY VTKm_NATIVE_FLAGS)
|
||||||
|
elseif(VTKm_Vectorization STREQUAL "avx")
|
||||||
get_property(flags GLOBAL PROPERTY VTKm_AVX_FLAGS)
|
get_property(flags GLOBAL PROPERTY VTKm_AVX_FLAGS)
|
||||||
elseif(VTKm_Vectorization STREQUAL "avx2")
|
elseif(VTKm_Vectorization STREQUAL "avx2")
|
||||||
get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS)
|
get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS)
|
||||||
@ -134,14 +153,3 @@ endif()
|
|||||||
foreach(flag ${flags})
|
foreach(flag ${flags})
|
||||||
add_compile_options( ${flag} )
|
add_compile_options( ${flag} )
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
#
|
|
||||||
# Lastly we need to setup flags that can be configured into a vtk-m header
|
|
||||||
# file. so that the code understands that we have enabled vectorization
|
|
||||||
#
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -232,11 +232,6 @@ set(VTKM_USE_64BIT_IDS ${VTKm_USE_64BIT_IDS})
|
|||||||
set(VTKM_ENABLE_CUDA ${VTKm_ENABLE_CUDA})
|
set(VTKM_ENABLE_CUDA ${VTKm_ENABLE_CUDA})
|
||||||
set(VTKM_ENABLE_TBB ${VTKm_ENABLE_TBB})
|
set(VTKM_ENABLE_TBB ${VTKm_ENABLE_TBB})
|
||||||
|
|
||||||
set(VTKM_ENABLE_VECTORIZATION ON)
|
|
||||||
if(VTKm_Vectorization STREQUAL "none")
|
|
||||||
set(VTKM_ENABLE_VECTORIZATION OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(VTKM_ENABLE_OPENGL_INTEROP ${VTKm_ENABLE_OPENGL_INTEROP})
|
set(VTKM_ENABLE_OPENGL_INTEROP ${VTKm_ENABLE_OPENGL_INTEROP})
|
||||||
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/vtkm/internal/Configure.h.in
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/vtkm/internal/Configure.h.in
|
||||||
@ -247,8 +242,6 @@ vtkm_install_headers(
|
|||||||
|
|
||||||
unset(VTKM_ENABLE_OPENGL_INTEROP)
|
unset(VTKM_ENABLE_OPENGL_INTEROP)
|
||||||
|
|
||||||
unset(VTKM_ENABLE_VECTORIZATION)
|
|
||||||
|
|
||||||
unset(VTKM_ENABLE_TBB)
|
unset(VTKM_ENABLE_TBB)
|
||||||
unset(VTKM_ENABLE_CUDA)
|
unset(VTKM_ENABLE_CUDA)
|
||||||
|
|
||||||
|
@ -226,16 +226,10 @@ public:
|
|||||||
//into a standard copy, causing the above issue.
|
//into a standard copy, causing the above issue.
|
||||||
T lastValue = inputPortal.Get(numberOfValues - 1);
|
T lastValue = inputPortal.Get(numberOfValues - 1);
|
||||||
|
|
||||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
VTKM_VECTORIZATION_PRE_LOOP
|
||||||
#if defined(VTKM_CLANG)
|
|
||||||
#pragma ivdep
|
|
||||||
#pragma clang loop vectorize(enable) interleave(enable)
|
|
||||||
#elif defined(VTKM_ICC)
|
|
||||||
#pragma simd
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
for(vtkm::Id i=(numberOfValues-1); i >= 1; --i)
|
for(vtkm::Id i=(numberOfValues-1); i >= 1; --i)
|
||||||
{
|
{
|
||||||
|
VTKM_VECTORIZATION_IN_LOOP
|
||||||
//nothing for gcc as input & output could be the same
|
//nothing for gcc as input & output could be the same
|
||||||
outputPortal.Set(i, inputPortal.Get(i-1));
|
outputPortal.Set(i, inputPortal.Get(i-1));
|
||||||
}
|
}
|
||||||
@ -295,23 +289,10 @@ public:
|
|||||||
|
|
||||||
const vtkm::Id size = numInstances;
|
const vtkm::Id size = numInstances;
|
||||||
|
|
||||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
VTKM_VECTORIZATION_PRE_LOOP
|
||||||
#if defined(VTKM_CLANG)
|
|
||||||
#pragma ivdep
|
|
||||||
#pragma clang loop vectorize(enable) interleave(enable)
|
|
||||||
#elif defined(VTKM_ICC)
|
|
||||||
#pragma simd
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
for(vtkm::Id i=0; i < size; ++i)
|
for(vtkm::Id i=0; i < size; ++i)
|
||||||
{
|
{
|
||||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
VTKM_VECTORIZATION_IN_LOOP
|
||||||
#if defined(VTKM_GCC)
|
|
||||||
#pragma Loop_Optimize (Ivdep, Vector)
|
|
||||||
#elif defined(VTKM_ICC)
|
|
||||||
#pragma forceinline recursive
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
kernel(i);
|
kernel(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -343,23 +324,10 @@ public:
|
|||||||
for(vtkm::Id j=0; j < rangeMax[1]; ++j)
|
for(vtkm::Id j=0; j < rangeMax[1]; ++j)
|
||||||
{
|
{
|
||||||
index[1] = j;
|
index[1] = j;
|
||||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
VTKM_VECTORIZATION_PRE_LOOP
|
||||||
#if defined(VTKM_CLANG)
|
|
||||||
#pragma ivdep
|
|
||||||
#pragma clang loop vectorize(enable) interleave(enable)
|
|
||||||
#elif defined(VTKM_ICC)
|
|
||||||
#pragma simd
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
for(vtkm::Id i=0; i < rangeMax[0]; ++i)
|
for(vtkm::Id i=0; i < rangeMax[0]; ++i)
|
||||||
{
|
{
|
||||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
VTKM_VECTORIZATION_IN_LOOP
|
||||||
#if defined(VTKM_GCC)
|
|
||||||
#pragma Loop_Optimize (Ivdep, Vector)
|
|
||||||
#elif defined(VTKM_ICC)
|
|
||||||
#pragma forceinline recursive
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
index[0] = i;
|
index[0] = i;
|
||||||
kernel( index );
|
kernel( index );
|
||||||
}
|
}
|
||||||
@ -394,16 +362,10 @@ private:
|
|||||||
PortalI indexPortal = index.PrepareForInput(Device());
|
PortalI indexPortal = index.PrepareForInput(Device());
|
||||||
PortalVout valuesOutPortal = values_out.PrepareForOutput(n, Device());
|
PortalVout valuesOutPortal = values_out.PrepareForOutput(n, Device());
|
||||||
|
|
||||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
VTKM_VECTORIZATION_PRE_LOOP
|
||||||
#if defined(VTKM_CLANG)
|
|
||||||
#pragma ivdep
|
|
||||||
#pragma clang loop vectorize(enable) interleave(enable)
|
|
||||||
#elif defined(VTKM_ICC)
|
|
||||||
#pragma simd
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
for (vtkm::Id i=0; i<n; i++)
|
for (vtkm::Id i=0; i<n; i++)
|
||||||
{
|
{
|
||||||
|
VTKM_VECTORIZATION_IN_LOOP
|
||||||
valuesOutPortal.Set( i, valuesPortal.Get(indexPortal.Get(i)) );
|
valuesOutPortal.Set( i, valuesPortal.Get(indexPortal.Get(i)) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -339,24 +339,10 @@ public:
|
|||||||
{
|
{
|
||||||
const vtkm::Id start = range.begin();
|
const vtkm::Id start = range.begin();
|
||||||
const vtkm::Id end = range.end();
|
const vtkm::Id end = range.end();
|
||||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
VTKM_VECTORIZATION_PRE_LOOP
|
||||||
#if defined(VTKM_CLANG)
|
|
||||||
#pragma ivdep
|
|
||||||
#pragma clang loop vectorize(enable) interleave(enable)
|
|
||||||
#elif defined(VTKM_ICC)
|
|
||||||
#pragma simd
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
for (vtkm::Id index = start; index != end; index++)
|
for (vtkm::Id index = start; index != end; index++)
|
||||||
{
|
{
|
||||||
|
VTKM_VECTORIZATION_IN_LOOP
|
||||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
|
||||||
#if defined(VTKM_GCC)
|
|
||||||
#pragma Loop_Optimize (Ivdep, Vector)
|
|
||||||
#elif defined(VTKM_ICC)
|
|
||||||
#pragma forceinline recursive
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
this->Functor(index);
|
this->Functor(index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -406,23 +392,10 @@ public:
|
|||||||
index[1] = j;
|
index[1] = j;
|
||||||
const vtkm::Id start =range.cols().begin();
|
const vtkm::Id start =range.cols().begin();
|
||||||
const vtkm::Id end = range.cols().end();
|
const vtkm::Id end = range.cols().end();
|
||||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
VTKM_VECTORIZATION_PRE_LOOP
|
||||||
#if defined(VTKM_CLANG)
|
|
||||||
#pragma ivdep
|
|
||||||
#pragma clang loop vectorize(enable) interleave(enable)
|
|
||||||
#elif defined(VTKM_ICC)
|
|
||||||
#pragma simd
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
for( vtkm::Id i=start; i != end; ++i)
|
for( vtkm::Id i=start; i != end; ++i)
|
||||||
{
|
{
|
||||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
VTKM_VECTORIZATION_IN_LOOP
|
||||||
#if defined(VTKM_GCC)
|
|
||||||
#pragma Loop_Optimize (Ivdep, Vector)
|
|
||||||
#elif defined(VTKM_ICC)
|
|
||||||
#pragma forceinline recursive
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
index[0] = i;
|
index[0] = i;
|
||||||
this->Functor( index );
|
this->Functor( index );
|
||||||
}
|
}
|
||||||
|
@ -148,9 +148,30 @@
|
|||||||
#define VTKM_THIRDPARTY_POST_INCLUDE
|
#define VTKM_THIRDPARTY_POST_INCLUDE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//Mark if we are building with vectorization enabled
|
// Define a pair of macros, VTKM_VECTORIZATION_PRE_LOOP and VTKM_VECTORIZATION_IN_LOOP,
|
||||||
#ifndef VTKM_ENABLE_VECTORIZATION
|
// that should be wrapped around any "for"/"while" that you want vectorized.
|
||||||
#cmakedefine VTKM_ENABLE_VECTORIZATION
|
// This is used to set per compiler pragmas for vectorization, and to disable
|
||||||
|
// any warnings that about vectorization failures.
|
||||||
|
#if defined(VTKM_CLANG)
|
||||||
|
//clang only needs pre loop
|
||||||
|
#define VTKM_VECTORIZATION_PRE_LOOP \
|
||||||
|
_Pragma("clang loop vectorize(enable) interleave(enable)")
|
||||||
|
#define VTKM_VECTORIZATION_IN_LOOP
|
||||||
|
|
||||||
|
#elif defined(VTKM_ICC)
|
||||||
|
//icc needs pre and in loop
|
||||||
|
#define VTKM_VECTORIZATION_PRE_LOOP \
|
||||||
|
_Pragma("simd")
|
||||||
|
#define VTKM_VECTORIZATION_IN_LOOP \
|
||||||
|
_Pragma("forceinline recursive")
|
||||||
|
#elif defined(VTKM_GCC)
|
||||||
|
//gcc only needs in loop
|
||||||
|
#define VTKM_VECTORIZATION_PRE_LOOP
|
||||||
|
_Pragma("ivdep")
|
||||||
|
#define VTKM_VECTORIZATION_IN_LOOP
|
||||||
|
#else
|
||||||
|
#define VTKM_VECTORIZATION_PRE_LOOP
|
||||||
|
#define VTKM_VECTORIZATION_IN_LOOP
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//Mark if we are building with CUDA enabled
|
//Mark if we are building with CUDA enabled
|
||||||
|
Loading…
Reference in New Issue
Block a user