From 514ea09afcf6be56f887c96858d55a9a66fb37cb Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 23 Nov 2015 12:44:05 -0500 Subject: [PATCH 1/2] Teach VTK-m how to enable vectorization for gcc, clang, and icc. --- CMake/UseVTKmSerial.cmake | 7 ++ CMake/UseVTKmTBB.cmake | 7 ++ CMake/VTKmCompilerExtras.cmake | 7 +- CMake/VTKmCompilerOptimizations.cmake | 144 ++++++++++++++++++++++++++ CMake/VTKmConfig.cmake.in | 1 + CMakeLists.txt | 16 +++ vtkm/internal/Configure.h.in | 5 + 7 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 CMake/VTKmCompilerOptimizations.cmake diff --git a/CMake/UseVTKmSerial.cmake b/CMake/UseVTKmSerial.cmake index e96bbbfd0..d92a902e1 100644 --- a/CMake/UseVTKmSerial.cmake +++ b/CMake/UseVTKmSerial.cmake @@ -31,6 +31,13 @@ else () # !VTKm_Base_FOUND set(VTKm_Serial_FOUND) endif () +#----------------------------------------------------------------------------- +# Set up the compiler flag optimizations +#----------------------------------------------------------------------------- +if(VTKm_ENABLE_VECTORIZATION) + include(VTKmCompilerOptimizations) +endif() + if (VTKm_Serial_FOUND) set(VTKm_Serial_initialize_complete TRUE) endif () diff --git a/CMake/UseVTKmTBB.cmake b/CMake/UseVTKmTBB.cmake index df48a35e2..b85371f31 100644 --- a/CMake/UseVTKmTBB.cmake +++ b/CMake/UseVTKmTBB.cmake @@ -44,6 +44,13 @@ if (VTKm_Base_FOUND) endif () +#----------------------------------------------------------------------------- +# Set up the compiler flag optimizations +#----------------------------------------------------------------------------- +if(VTKm_ENABLE_VECTORIZATION) + include(VTKmCompilerOptimizations) +endif() + #----------------------------------------------------------------------------- # Set up all these dependent packages (if they were all found). #----------------------------------------------------------------------------- diff --git a/CMake/VTKmCompilerExtras.cmake b/CMake/VTKmCompilerExtras.cmake index 662d14974..86b3ce065 100644 --- a/CMake/VTKmCompilerExtras.cmake +++ b/CMake/VTKmCompilerExtras.cmake @@ -45,12 +45,17 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${CMAKE_CXX_FLAGS_WARN}") - # Addtional warnings for GCC + # Additional warnings for GCC/Clang set(CMAKE_CXX_FLAGS_WARN_EXTRA "-Wno-long-long -Wcast-align -Wconversion -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused-parameter -fno-common") if (VTKm_FORCE_ANSI) set(CMAKE_CXX_FLAGS_WARN_EXTRA "-ansi ${CMAKE_CXX_FLAGS_WARN_EXTRA}") endif() + # Additional warnings just for Clang + if(CMAKE_COMPILER_IS_CLANGXX) + set(CMAKE_CXX_FLAGS_WARN_EXTRA "-Wno-pass-failed ${CMAKE_CXX_FLAGS_WARN_EXTRA}") + endif() + # Set up the debug CXX_FLAGS for extra warnings option(VTKm_EXTRA_COMPILER_WARNINGS "Add compiler flags to do stricter checking when building debug." ON) # We used to add the compiler flags globally, but this caused problems with diff --git a/CMake/VTKmCompilerOptimizations.cmake b/CMake/VTKmCompilerOptimizations.cmake new file mode 100644 index 000000000..85dbd4991 --- /dev/null +++ b/CMake/VTKmCompilerOptimizations.cmake @@ -0,0 +1,144 @@ +##============================================================================ +## Copyright (c) Kitware, Inc. +## All rights reserved. +## See LICENSE.txt for details. +## This software is distributed WITHOUT ANY WARRANTY; without even +## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +## PURPOSE. See the above copyright notice for more information. +## +## Copyright 2014 Sandia Corporation. +## Copyright 2014 UT-Battelle, LLC. +## Copyright 2014 Los Alamos National Security. +## +## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +## the U.S. Government retains certain rights in this software. +## +## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +## Laboratory (LANL), the U.S. Government retains certain rights in +## this software. +##============================================================================ + +set(VTKm_Vectorization "none" CACHE STRING "Level of compiler vectorization support") +set_property(CACHE VTKm_Vectorization PROPERTY STRINGS none) + +#Currently all we are going to build is a set of options that are possible +#based on the compiler. For now we are going on the presumption +#that x86 architecture is the only target for vectorization and therefore +#we don't need any system detect. +# +#Here is the breakdown of what each flag type means: +# +# 1. none: +# Do not explicitly enable vectorization, but at the same don't explicitly disable +# vectorization. +# +# 2. avx +# Compile with just AVX enabled, no AVX2 or AVX512 vectorization will be used. +# This means that Sandy Bridge, Ivy Bridge, Haswell, and Skylake are supported, +# but Haswell and newer will not use any AVX2 instructions +# +# 3. avx2 +# Compile with  AVX2/AVX enabled, no AVX512 vectorization will be used. +# This means that Sandy Bridge, and Ivy Bridge can not run the code. +# +# 4. avx512 +# Compile with AVX512/AVX2/AVX options enabled. +# This means that Sandy Bridge, Ivy Bridge, Haswell and can not run the code. +# Only XeonPhi Knights Landing and Skylake processors can run the code. +# +# AVX512 is designed to mix with avx/avx2 without any performance penalties, +# so we enable AVX2 so that we get AVX2 support for < 32bit value types which +# AVX512 has less support for +# +# +# I wonder if we should go towards a per platform cmake include that stores +# all this knowledge +# include(gcc.cmake) +# include(icc.cmake) +# include(clang.cmake) +# +# This way we could also do compile warning flag detection at the same time +# We need to enable -Wno-pass-failed when using clang atleast to kill the +# amount of warnings we get + +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + #for now we presume gcc > 4.6 + set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx) + + #common flags for the avx instructions for the gcc compiler + set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-mavx") + set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2") + + if (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.7 + CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.7) + #if GNU is less than 4.9 you get avx, avx2 + set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2) + elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) + #if GNU is less than 5.1 you get avx, avx2, and some avx512 + set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2 avx512) + set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd") + else() + #if GNU is 5.1+ you get avx, avx2, and more avx512 + set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2 avx512) + set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi") + endif() +elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2 avx512) + set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-mavx") + set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2") + set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS "-mavx512") +elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") + #While Clang support AVX512, no version of AppleClang has that support yet + set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2) + set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-mavx") + set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2") +elseif(CMAKE_CXX_COMPILER_ID STREQUAL "PGI") + #I can't find documentation to explicitly state the level of vectorization + #support I want from the PGI compiler +elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + #Intel 15.X is the first version with avx512 + #Intel 16.X has way better vector generation compared to 15.X though + + set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-xAVX") + set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-xCORE-AVX2") + + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0) + set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2) + else() + set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2 avx512) + set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-xCORE-AVX512") + endif() +endif() + + +# +# Now that we have set up the options, lets setup the compile flags that +# we are going to require. +# +# +if(VTKm_ENABLE_VECTORIZATION) + if(VTKm_Vectorization STREQUAL "avx") + get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS) + add_compile_options( "${avx}" ) + elseif(VTKm_Vectorization STREQUAL "avx2") + get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS) + get_property(avx2 GLOBAL PROPERTY VTKm_AVX2_FLAGS) + add_compile_options( "${avx}" "${avx2}" ) + elseif(VTKm_Vectorization STREQUAL "avx512") + get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS) + get_property(avx2 GLOBAL PROPERTY VTKm_AVX2_FLAGS) + get_property(avx512 GLOBAL PROPERTY VTKm_AVX512_FLAGS) + add_compile_options( "${avx}" "${avx2}" "${avx512}" ) + endif() +endif() + +# +# Lastly we need to setup flags that can be configured into a vtk-m header +# file. so that the code understands that we have enabled vectorization +# +# + + + + + diff --git a/CMake/VTKmConfig.cmake.in b/CMake/VTKmConfig.cmake.in index 1e871e5ea..fcec91c38 100644 --- a/CMake/VTKmConfig.cmake.in +++ b/CMake/VTKmConfig.cmake.in @@ -43,6 +43,7 @@ set(VTKm_CMAKE_MODULE_PATH "@VTKm_CMAKE_MODULE_PATH_CONFIG@") set(VTKm_ENABLE_CUDA "@VTKm_ENABLE_CUDA@") set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@") +set(VTKm_ENABLE_VECTORIZATION "@VTKm_ENABLE_VECTORIZATION@") # VTKm requires some CMake Find modules not included with CMake, so # include the CMake modules distributed with VTKm. diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cead0a42..a28dbd2a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,6 +92,7 @@ include(CMake/VTKmCompilerExtras.cmake) # Configurable Options option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF) option(VTKm_ENABLE_TBB "Enable TBB support" OFF) +option(VTKm_ENABLE_VECTORIZATION "Enable compiler vectorization support" ON) option(VTKm_ENABLE_TESTING "Enable VTKm Testing" ON) option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF) @@ -232,6 +233,11 @@ set(VTKM_USE_64BIT_IDS ${VTKm_USE_64BIT_IDS}) set(VTKM_ENABLE_CUDA ${VTKm_ENABLE_CUDA}) set(VTKM_ENABLE_TBB ${VTKm_ENABLE_TBB}) +set(VTKM_ENABLE_VECTORIZATION ${VTKm_ENABLE_VECTORIZATION}) +if(VTKm_Vectorization STREQUAL "none") + set(VTKM_ENABLE_VECTORIZATION OFF) +endif() + set(VTKM_ENABLE_OPENGL_INTEROP ${VTKm_ENABLE_OPENGL_INTEROP}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/vtkm/internal/Configure.h.in @@ -242,6 +248,8 @@ vtkm_install_headers( unset(VTKM_ENABLE_OPENGL_INTEROP) +unset(VTKM_ENABLE_VECTORIZATION) + unset(VTKM_ENABLE_TBB) unset(VTKM_ENABLE_CUDA) @@ -310,6 +318,14 @@ install( DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR} ) +# Install support files. +install( + FILES + ${VTKm_SOURCE_DIR}/CMake/VTKmCompilerOptimizations.cmake + DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR} + ) + + # Enable CPack packaging set(CPACK_PACKAGE_DESCRIPTION_FILE ${VTKm_SOURCE_DIR}/README.md) diff --git a/vtkm/internal/Configure.h.in b/vtkm/internal/Configure.h.in index 8fcd41f2c..5375da4a9 100644 --- a/vtkm/internal/Configure.h.in +++ b/vtkm/internal/Configure.h.in @@ -148,6 +148,11 @@ #define VTKM_THIRDPARTY_POST_INCLUDE #endif +//Mark if we are building with vectorization enabled +#ifndef VTKM_ENABLE_VECTORIZATION +#cmakedefine VTKM_ENABLE_VECTORIZATION +#endif + //Mark if we are building with CUDA enabled #ifndef VTKM_ENABLE_CUDA #cmakedefine VTKM_ENABLE_CUDA From 4ceb111a68a38a4dbef0a2d2e54630f9680030c2 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 23 Nov 2015 12:44:26 -0500 Subject: [PATCH 2/2] Enable vectorization inside the Serial and TBB backends. --- CMake/VTKmCompilerExtras.cmake | 8 ++- CMake/VTKmCompilerOptimizations.cmake | 40 ++++++++------- .../internal/DeviceAdapterAlgorithmSerial.h | 49 +++++++++++++++++++ vtkm/cont/tbb/internal/FunctorsTBB.h | 47 ++++++++++++++++-- 4 files changed, 122 insertions(+), 22 deletions(-) diff --git a/CMake/VTKmCompilerExtras.cmake b/CMake/VTKmCompilerExtras.cmake index 86b3ce065..37ca83d2e 100644 --- a/CMake/VTKmCompilerExtras.cmake +++ b/CMake/VTKmCompilerExtras.cmake @@ -51,8 +51,12 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) set(CMAKE_CXX_FLAGS_WARN_EXTRA "-ansi ${CMAKE_CXX_FLAGS_WARN_EXTRA}") endif() - # Additional warnings just for Clang - if(CMAKE_COMPILER_IS_CLANGXX) + # Additional warnings just for Clang 3.5+, and AppleClang 7+ + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND + CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.4) + set(CMAKE_CXX_FLAGS_WARN_EXTRA "-Wno-pass-failed ${CMAKE_CXX_FLAGS_WARN_EXTRA}") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND + CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.99) set(CMAKE_CXX_FLAGS_WARN_EXTRA "-Wno-pass-failed ${CMAKE_CXX_FLAGS_WARN_EXTRA}") endif() diff --git a/CMake/VTKmCompilerOptimizations.cmake b/CMake/VTKmCompilerOptimizations.cmake index 85dbd4991..cbb955743 100644 --- a/CMake/VTKmCompilerOptimizations.cmake +++ b/CMake/VTKmCompilerOptimizations.cmake @@ -66,32 +66,32 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx) #common flags for the avx instructions for the gcc compiler - set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-mavx") - set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2") + set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx) + set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2) - if (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.7 + if (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.7 OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.7) #if GNU is less than 4.9 you get avx, avx2 set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2) elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) #if GNU is less than 5.1 you get avx, avx2, and some avx512 set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2 avx512) - set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd") + set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512f -mavx512pf -mavx512er -mavx512cd) else() #if GNU is 5.1+ you get avx, avx2, and more avx512 set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx2 avx512) - set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi") + set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi) endif() elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2 avx512) - set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-mavx") - set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2") - set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS "-mavx512") + set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx) + set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2) + set_property(GLOBAL PROPERTY VTKm_AVX512_FLAGS -mavx512) elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") #While Clang support AVX512, no version of AppleClang has that support yet set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2) - set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-mavx") - set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2") + set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -mavx) + set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2) elseif(CMAKE_CXX_COMPILER_ID STREQUAL "PGI") #I can't find documentation to explicitly state the level of vectorization #support I want from the PGI compiler @@ -99,14 +99,14 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #Intel 15.X is the first version with avx512 #Intel 16.X has way better vector generation compared to 15.X though - set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS "-xAVX") - set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-xCORE-AVX2") + set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS -xAVX) + set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -xCORE-AVX2) if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0) set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2) else() set_property(CACHE VTKm_Vectorization APPEND PROPERTY STRINGS avx avx2 avx512) - set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS "-xCORE-AVX512") + set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS -xCORE-AVX512) endif() endif() @@ -117,19 +117,25 @@ endif() # # if(VTKm_ENABLE_VECTORIZATION) + set(flags) if(VTKm_Vectorization STREQUAL "avx") - get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS) - add_compile_options( "${avx}" ) + get_property(flags GLOBAL PROPERTY VTKm_AVX_FLAGS) elseif(VTKm_Vectorization STREQUAL "avx2") get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS) get_property(avx2 GLOBAL PROPERTY VTKm_AVX2_FLAGS) - add_compile_options( "${avx}" "${avx2}" ) + set(flags ${avx} ${avx2}) elseif(VTKm_Vectorization STREQUAL "avx512") get_property(avx GLOBAL PROPERTY VTKm_AVX_FLAGS) get_property(avx2 GLOBAL PROPERTY VTKm_AVX2_FLAGS) get_property(avx512 GLOBAL PROPERTY VTKm_AVX512_FLAGS) - add_compile_options( "${avx}" "${avx2}" "${avx512}" ) + set(flags ${avx} ${avx2} ${avx512}) endif() + + #have to specify each compile option separately, can't do them in bulk + foreach(flag ${flags}) + + add_compile_options( ${flag} ) + endforeach() endif() # diff --git a/vtkm/cont/internal/DeviceAdapterAlgorithmSerial.h b/vtkm/cont/internal/DeviceAdapterAlgorithmSerial.h index 1fac8fc7e..8ca8f8d95 100644 --- a/vtkm/cont/internal/DeviceAdapterAlgorithmSerial.h +++ b/vtkm/cont/internal/DeviceAdapterAlgorithmSerial.h @@ -225,8 +225,18 @@ public: //The ICC compiler has been found to improperly optimize the copy_backwards //into a standard copy, causing the above issue. T lastValue = inputPortal.Get(numberOfValues - 1); + +#ifdef VTKM_ENABLE_VECTORIZATION +#if defined(VTKM_CLANG) + #pragma ivdep + #pragma clang loop vectorize(enable) interleave(enable) +#elif defined(VTKM_ICC) + #pragma simd +#endif +#endif for(vtkm::Id i=(numberOfValues-1); i >= 1; --i) { + //nothing for gcc as input & output could be the same outputPortal.Set(i, inputPortal.Get(i-1)); } outputPortal.Set(0, initialValue); @@ -284,8 +294,24 @@ public: DeviceAdapterAlgorithm::ScheduleKernel kernel(functor); const vtkm::Id size = numInstances; + +#ifdef VTKM_ENABLE_VECTORIZATION +#if defined(VTKM_CLANG) + #pragma ivdep + #pragma clang loop vectorize(enable) interleave(enable) +#elif defined(VTKM_ICC) + #pragma simd +#endif +#endif for(vtkm::Id i=0; i < size; ++i) { +#ifdef VTKM_ENABLE_VECTORIZATION +#if defined(VTKM_GCC) + #pragma Loop_Optimize (Ivdep, Vector) +#elif defined(VTKM_ICC) + #pragma forceinline recursive +#endif +#endif kernel(i); } @@ -317,8 +343,23 @@ public: for(vtkm::Id j=0; j < rangeMax[1]; ++j) { index[1] = j; +#ifdef VTKM_ENABLE_VECTORIZATION +#if defined(VTKM_CLANG) + #pragma ivdep + #pragma clang loop vectorize(enable) interleave(enable) +#elif defined(VTKM_ICC) + #pragma simd +#endif +#endif for(vtkm::Id i=0; i < rangeMax[0]; ++i) { +#ifdef VTKM_ENABLE_VECTORIZATION +#if defined(VTKM_GCC) + #pragma Loop_Optimize (Ivdep, Vector) +#elif defined(VTKM_ICC) + #pragma forceinline recursive +#endif +#endif index[0] = i; kernel( index ); } @@ -353,6 +394,14 @@ private: PortalI indexPortal = index.PrepareForInput(Device()); PortalVout valuesOutPortal = values_out.PrepareForOutput(n, Device()); +#ifdef VTKM_ENABLE_VECTORIZATION +#if defined(VTKM_CLANG) + #pragma ivdep + #pragma clang loop vectorize(enable) interleave(enable) +#elif defined(VTKM_ICC) + #pragma simd +#endif +#endif for (vtkm::Id i=0; iFunctor(index); } } @@ -386,8 +404,25 @@ public: for( vtkm::Id j=range.rows().begin(); j!=range.rows().end(); ++j) { index[1] = j; - for( vtkm::Id i=range.cols().begin(); i!=range.cols().end(); ++i) + const vtkm::Id start =range.cols().begin(); + const vtkm::Id end = range.cols().end(); +#ifdef VTKM_ENABLE_VECTORIZATION +#if defined(VTKM_CLANG) + #pragma ivdep + #pragma clang loop vectorize(enable) interleave(enable) +#elif defined(VTKM_ICC) + #pragma simd +#endif +#endif + for( vtkm::Id i=start; i != end; ++i) { +#ifdef VTKM_ENABLE_VECTORIZATION +#if defined(VTKM_GCC) + #pragma Loop_Optimize (Ivdep, Vector) +#elif defined(VTKM_ICC) + #pragma forceinline recursive +#endif +#endif index[0] = i; this->Functor( index ); } @@ -435,6 +470,13 @@ public: // error and setting the message buffer as expected. try { +#ifdef VTKM_ENABLE_VECTORIZATION +#if defined(VTKM_CLANG) + #pragma clang loop vectorize(enable) +#elif defined(VTKM_ICC) + #pragma simd +#endif +#endif for (vtkm::Id i = range.begin(); i < range.end(); i++) { OutputPortal.Set( i, ValuesPortal.Get(IndexPortal.Get(i)) ); @@ -481,4 +523,3 @@ VTKM_CONT_EXPORT static void ScatterPortal(InputPortalType inputPortal, } } #endif //vtk_m_cont_tbb_internal_FunctorsTBB_h -