Use std::Min/Max over fmin/fmax

We had a report that vtkm::Min/Max was significantly slower than other products. This was traced back to the fact that these functions were not completely inlining because they were calling fmin or fmax, and that resulted in an actual C library call. It turns out using the templated functions in the std namespace is faster. This change has the VTK-m min/max functions use the std version in almost all circumstances. The one exception (so far) is that fmin and fmax are used for CUDA devices since the std functions are not declared to run on the device and the nvcc compiler treats these functions special.
2024-09-16 17:22:55 +00:00 · 2015-12-02 14:06:46 -07:00 · 2015-12-02 14:06:46 -07:00 · 5d829f2142
commit 5d829f2142
parent 03661259b8
2 changed files with 20 additions and 10 deletions
--- a/vtkm/Math.h
+++ b/vtkm/Math.h
@ -47,6 +47,11 @@ VTKM_THIRDPARTY_POST_INCLUDE
 #define VTKM_USE_BOOST_SIGN
 #endif // !VTKM_CUDA

+#if !defined(__CUDA_ARCH__)
+#define VTKM_USE_STL_MIN_MAX
+#include <algorithm>
+#endif
+
 #if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
 VTKM_THIRDPARTY_PRE_INCLUDE
 #include <boost/math/special_functions/acosh.hpp>
@ -58,7 +63,7 @@ VTKM_THIRDPARTY_PRE_INCLUDE
 #include <boost/math/special_functions/round.hpp>
 VTKM_THIRDPARTY_POST_INCLUDE
 #define VTKM_USE_BOOST_MATH
-#if _MSC_VER <= 1600
+#if (_MSC_VER <= 1600) && !defined(VTKM_USE_STL_MIN_MAX)
 #define VTKM_USE_STL_MIN_MAX
 #include <algorithm>
 #endif
@ -1298,7 +1303,7 @@ VTKM_EXEC_CONT_EXPORT
 vtkm::Float64 Max(vtkm::Float64 x, vtkm::Float64 y) {
  return (std::max)(x, y);
 }
-#else // !VTKM_USE_BOOST_MATH
+#else // !VTKM_USE_STL_MIN_MAX
 VTKM_EXEC_CONT_EXPORT
 vtkm::Float32 Max(vtkm::Float32 x, vtkm::Float32 y) {
  return VTKM_SYS_MATH_FUNCTION_32(fmax)(x,y);
@ -1307,7 +1312,7 @@ VTKM_EXEC_CONT_EXPORT
 vtkm::Float64 Max(vtkm::Float64 x, vtkm::Float64 y) {
  return VTKM_SYS_MATH_FUNCTION_64(fmax)(x,y);
 }
-#endif // !VTKM_USE_BOOST_MATH
+#endif // !VTKM_USE_STL_MIN_MAX

 /// Returns \p x or \p y, whichever is smaller.
 ///
@ -1323,7 +1328,7 @@ VTKM_EXEC_CONT_EXPORT
 vtkm::Float64 Min(vtkm::Float64 x, vtkm::Float64 y) {
  return (std::min)(x, y);
 }
-#else // !VTKM_USE_BOOST_MATH
+#else // !VTKM_USE_STL_MIN_MAX
 VTKM_EXEC_CONT_EXPORT
 vtkm::Float32 Min(vtkm::Float32 x, vtkm::Float32 y) {
  return VTKM_SYS_MATH_FUNCTION_32(fmin)(x,y);
@ -1332,7 +1337,7 @@ VTKM_EXEC_CONT_EXPORT
 vtkm::Float64 Min(vtkm::Float64 x, vtkm::Float64 y) {
  return VTKM_SYS_MATH_FUNCTION_64(fmin)(x,y);
 }
-#endif // !VTKM_USE_BOOST_MATH
+#endif // !VTKM_USE_STL_MIN_MAX

 namespace detail {

--- a/vtkm/Math.h.in
+++ b/vtkm/Math.h.in
@ -59,6 +59,11 @@ VTKM_THIRDPARTY_POST_INCLUDE
 #define VTKM_USE_BOOST_SIGN
 #endif // !VTKM_CUDA

+#if !defined(__CUDA_ARCH__)
+#define VTKM_USE_STL_MIN_MAX
+#include <algorithm>
+#endif
+
 #if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
 VTKM_THIRDPARTY_PRE_INCLUDE
 #include <boost/math/special_functions/acosh.hpp>
@ -70,7 +75,7 @@ VTKM_THIRDPARTY_PRE_INCLUDE
 #include <boost/math/special_functions/round.hpp>
 VTKM_THIRDPARTY_POST_INCLUDE
 #define VTKM_USE_BOOST_MATH
-#if _MSC_VER <= 1600
+#if (_MSC_VER <= 1600) && !defined(VTKM_USE_STL_MIN_MAX)
 #define VTKM_USE_STL_MIN_MAX
 #include <algorithm>
 #endif
@ -442,10 +447,10 @@ T Max(const T &x, const T &y);
 #ifdef VTKM_USE_STL_MIN_MAX
 $binary_template_function('Max', '(std::max)(x, y)')\
 $#
-#else // !VTKM_USE_BOOST_MATH
+#else // !VTKM_USE_STL_MIN_MAX
 $binary_math_function('Max', 'fmax')\
 $#
-#endif // !VTKM_USE_BOOST_MATH
+#endif // !VTKM_USE_STL_MIN_MAX

 /// Returns \p x or \p y, whichever is smaller.
 ///
@ -455,10 +460,10 @@ T Min(const T &x, const T &y);
 #ifdef VTKM_USE_STL_MIN_MAX
 $binary_template_function('Min', '(std::min)(x, y)')\
 $#
-#else // !VTKM_USE_BOOST_MATH
+#else // !VTKM_USE_STL_MIN_MAX
 $binary_math_function('Min', 'fmin')\
 $#
-#endif // !VTKM_USE_BOOST_MATH
+#endif // !VTKM_USE_STL_MIN_MAX

 namespace detail {