Enable vectorization inside the Serial and TBB backends.
This commit is contained in:
parent
514ea09afc
commit
4ceb111a68
@ -51,8 +51,12 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set(CMAKE_CXX_FLAGS_WARN_EXTRA "-ansi ${CMAKE_CXX_FLAGS_WARN_EXTRA}")
|
||||
endif()
|
||||
|
||||
# Additional warnings just for Clang
|
||||
if(CMAKE_COMPILER_IS_CLANGXX)
|
||||
# Additional warnings just for Clang 3.5+, and AppleClang 7+
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND
|
||||
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.4)
|
||||
set(CMAKE_CXX_FLAGS_WARN_EXTRA "-Wno-pass-failed ${CMAKE_CXX_FLAGS_WARN_EXTRA}")
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND
|
||||
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.99)
|
||||
set(CMAKE_CXX_FLAGS_WARN_EXTRA "-Wno-pass-failed ${CMAKE_CXX_FLAGS_WARN_EXTRA}")
|
||||
endif()
|
||||
|
||||
|
@ -66,32 +66,32 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx)
|
||||
|
||||
#common flags for the avx instructions for the gcc compiler
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-mavx")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx)
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.7
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.7 OR
|
||||
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.7)
|
||||
#if GNU is less than 4.9 you get avx, avx2
|
||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2)
|
||||
elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
|
||||
#if GNU is less than 5.1 you get avx, avx2, and some avx512
|
||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2 avx512)
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512f -mavx512pf -mavx512er -mavx512cd)
|
||||
else()
|
||||
#if GNU is 5.1+ you get avx, avx2, and more avx512
|
||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2 avx512)
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi)
|
||||
endif()
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2 avx512)
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-mavx")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS "-mavx512")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx)
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512)
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
#While Clang support AVX512, no version of AppleClang has that support yet
|
||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2)
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-mavx")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx)
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "PGI")
|
||||
#I can't find documentation to explicitly state the level of vectorization
|
||||
#support I want from the PGI compiler
|
||||
@ -99,14 +99,14 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
#Intel 15.X is the first version with avx512
|
||||
#Intel 16.X has way better vector generation compared to 15.X though
|
||||
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-xAVX")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-xCORE-AVX2")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -xAVX)
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -xCORE-AVX2)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0)
|
||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2)
|
||||
else()
|
||||
set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2 avx512)
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-xCORE-AVX512")
|
||||
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -xCORE-AVX512)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -117,19 +117,25 @@ endif()
|
||||
#
|
||||
#
|
||||
if(VTKm_ENABLE_VECTORIZATION)
|
||||
set(flags)
|
||||
if(VTKm_Vectorization STREQUAL "avx")
|
||||
get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS)
|
||||
add_compile_options( "${avx}" )
|
||||
get_property(flags GLOBAL PROPERTY VTKm_AVX_FLAGS)
|
||||
elseif(VTKm_Vectorization STREQUAL "avx2")
|
||||
get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS)
|
||||
get_property(avx2 GLOBAL PROPERTY VTKm_AVX2_FLAGS)
|
||||
add_compile_options( "${avx}" "${avx2}" )
|
||||
set(flags ${avx} ${avx2})
|
||||
elseif(VTKm_Vectorization STREQUAL "avx512")
|
||||
get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS)
|
||||
get_property(avx2 GLOBAL PROPERTY VTKm_AVX2_FLAGS)
|
||||
get_property(avx512 GLOBAL PROPERTY VTKm_AVX512_FLAGS)
|
||||
add_compile_options( "${avx}" "${avx2}" "${avx512}" )
|
||||
set(flags ${avx} ${avx2} ${avx512})
|
||||
endif()
|
||||
|
||||
#have to specify each compile option separately, can't do them in bulk
|
||||
foreach(flag ${flags})
|
||||
|
||||
add_compile_options( ${flag} )
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
#
|
||||
|
@ -225,8 +225,18 @@ public:
|
||||
//The ICC compiler has been found to improperly optimize the copy_backwards
|
||||
//into a standard copy, causing the above issue.
|
||||
T lastValue = inputPortal.Get(numberOfValues - 1);
|
||||
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_CLANG)
|
||||
#pragma ivdep
|
||||
#pragma clang loop vectorize(enable) interleave(enable)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for(vtkm::Id i=(numberOfValues-1); i >= 1; --i)
|
||||
{
|
||||
//nothing for gcc as input & output could be the same
|
||||
outputPortal.Set(i, inputPortal.Get(i-1));
|
||||
}
|
||||
outputPortal.Set(0, initialValue);
|
||||
@ -284,8 +294,24 @@ public:
|
||||
DeviceAdapterAlgorithm<Device>::ScheduleKernel<Functor> kernel(functor);
|
||||
|
||||
const vtkm::Id size = numInstances;
|
||||
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_CLANG)
|
||||
#pragma ivdep
|
||||
#pragma clang loop vectorize(enable) interleave(enable)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for(vtkm::Id i=0; i < size; ++i)
|
||||
{
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_GCC)
|
||||
#pragma Loop_Optimize (Ivdep, Vector)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma forceinline recursive
|
||||
#endif
|
||||
#endif
|
||||
kernel(i);
|
||||
}
|
||||
|
||||
@ -317,8 +343,23 @@ public:
|
||||
for(vtkm::Id j=0; j < rangeMax[1]; ++j)
|
||||
{
|
||||
index[1] = j;
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_CLANG)
|
||||
#pragma ivdep
|
||||
#pragma clang loop vectorize(enable) interleave(enable)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for(vtkm::Id i=0; i < rangeMax[0]; ++i)
|
||||
{
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_GCC)
|
||||
#pragma Loop_Optimize (Ivdep, Vector)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma forceinline recursive
|
||||
#endif
|
||||
#endif
|
||||
index[0] = i;
|
||||
kernel( index );
|
||||
}
|
||||
@ -353,6 +394,14 @@ private:
|
||||
PortalI indexPortal = index.PrepareForInput(Device());
|
||||
PortalVout valuesOutPortal = values_out.PrepareForOutput(n, Device());
|
||||
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_CLANG)
|
||||
#pragma ivdep
|
||||
#pragma clang loop vectorize(enable) interleave(enable)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (vtkm::Id i=0; i<n; i++)
|
||||
{
|
||||
valuesOutPortal.Set( i, valuesPortal.Get(indexPortal.Get(i)) );
|
||||
|
@ -337,8 +337,26 @@ public:
|
||||
// error and setting the message buffer as expected.
|
||||
try
|
||||
{
|
||||
for (vtkm::Id index = range.begin(); index < range.end(); index++)
|
||||
const vtkm::Id start = range.begin();
|
||||
const vtkm::Id end = range.end();
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_CLANG)
|
||||
#pragma ivdep
|
||||
#pragma clang loop vectorize(enable) interleave(enable)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (vtkm::Id index = start; index != end; index++)
|
||||
{
|
||||
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_GCC)
|
||||
#pragma Loop_Optimize (Ivdep, Vector)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma forceinline recursive
|
||||
#endif
|
||||
#endif
|
||||
this->Functor(index);
|
||||
}
|
||||
}
|
||||
@ -386,8 +404,25 @@ public:
|
||||
for( vtkm::Id j=range.rows().begin(); j!=range.rows().end(); ++j)
|
||||
{
|
||||
index[1] = j;
|
||||
for( vtkm::Id i=range.cols().begin(); i!=range.cols().end(); ++i)
|
||||
const vtkm::Id start =range.cols().begin();
|
||||
const vtkm::Id end = range.cols().end();
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_CLANG)
|
||||
#pragma ivdep
|
||||
#pragma clang loop vectorize(enable) interleave(enable)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for( vtkm::Id i=start; i != end; ++i)
|
||||
{
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_GCC)
|
||||
#pragma Loop_Optimize (Ivdep, Vector)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma forceinline recursive
|
||||
#endif
|
||||
#endif
|
||||
index[0] = i;
|
||||
this->Functor( index );
|
||||
}
|
||||
@ -435,6 +470,13 @@ public:
|
||||
// error and setting the message buffer as expected.
|
||||
try
|
||||
{
|
||||
#ifdef VTKM_ENABLE_VECTORIZATION
|
||||
#if defined(VTKM_CLANG)
|
||||
#pragma clang loop vectorize(enable)
|
||||
#elif defined(VTKM_ICC)
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (vtkm::Id i = range.begin(); i < range.end(); i++)
|
||||
{
|
||||
OutputPortal.Set( i, ValuesPortal.Get(IndexPortal.Get(i)) );
|
||||
@ -481,4 +523,3 @@ VTKM_CONT_EXPORT static void ScatterPortal(InputPortalType inputPortal,
|
||||
}
|
||||
}
|
||||
#endif //vtk_m_cont_tbb_internal_FunctorsTBB_h
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user