Use std::Min/Max over fmin/fmax

We had a report that vtkm::Min/Max was significantly slower than other
products. This was traced back to the fact that these functions were not
completely inlining because they were calling fmin or fmax, and that
resulted in an actual C library call. It turns out using the templated
functions in the std namespace is faster.

This change has the VTK-m min/max functions use the std version in
almost all circumstances. The one exception (so far) is that fmin and
fmax are used for CUDA devices since the std functions are not declared
to run on the device and the nvcc compiler treats these functions
special.
This commit is contained in:
Kenneth Moreland 2015-12-02 14:06:46 -07:00
parent 03661259b8
commit 5d829f2142
2 changed files with 20 additions and 10 deletions

@ -47,6 +47,11 @@ VTKM_THIRDPARTY_POST_INCLUDE
#define VTKM_USE_BOOST_SIGN
#endif // !VTKM_CUDA
#if !defined(__CUDA_ARCH__)
#define VTKM_USE_STL_MIN_MAX
#include <algorithm>
#endif
#if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
VTKM_THIRDPARTY_PRE_INCLUDE
#include <boost/math/special_functions/acosh.hpp>
@ -58,7 +63,7 @@ VTKM_THIRDPARTY_PRE_INCLUDE
#include <boost/math/special_functions/round.hpp>
VTKM_THIRDPARTY_POST_INCLUDE
#define VTKM_USE_BOOST_MATH
#if _MSC_VER <= 1600
#if (_MSC_VER <= 1600) && !defined(VTKM_USE_STL_MIN_MAX)
#define VTKM_USE_STL_MIN_MAX
#include <algorithm>
#endif
@ -1298,7 +1303,7 @@ VTKM_EXEC_CONT_EXPORT
vtkm::Float64 Max(vtkm::Float64 x, vtkm::Float64 y) {
return (std::max)(x, y);
}
#else // !VTKM_USE_BOOST_MATH
#else // !VTKM_USE_STL_MIN_MAX
VTKM_EXEC_CONT_EXPORT
vtkm::Float32 Max(vtkm::Float32 x, vtkm::Float32 y) {
return VTKM_SYS_MATH_FUNCTION_32(fmax)(x,y);
@ -1307,7 +1312,7 @@ VTKM_EXEC_CONT_EXPORT
vtkm::Float64 Max(vtkm::Float64 x, vtkm::Float64 y) {
return VTKM_SYS_MATH_FUNCTION_64(fmax)(x,y);
}
#endif // !VTKM_USE_BOOST_MATH
#endif // !VTKM_USE_STL_MIN_MAX
/// Returns \p x or \p y, whichever is smaller.
///
@ -1323,7 +1328,7 @@ VTKM_EXEC_CONT_EXPORT
vtkm::Float64 Min(vtkm::Float64 x, vtkm::Float64 y) {
return (std::min)(x, y);
}
#else // !VTKM_USE_BOOST_MATH
#else // !VTKM_USE_STL_MIN_MAX
VTKM_EXEC_CONT_EXPORT
vtkm::Float32 Min(vtkm::Float32 x, vtkm::Float32 y) {
return VTKM_SYS_MATH_FUNCTION_32(fmin)(x,y);
@ -1332,7 +1337,7 @@ VTKM_EXEC_CONT_EXPORT
vtkm::Float64 Min(vtkm::Float64 x, vtkm::Float64 y) {
return VTKM_SYS_MATH_FUNCTION_64(fmin)(x,y);
}
#endif // !VTKM_USE_BOOST_MATH
#endif // !VTKM_USE_STL_MIN_MAX
namespace detail {

@ -59,6 +59,11 @@ VTKM_THIRDPARTY_POST_INCLUDE
#define VTKM_USE_BOOST_SIGN
#endif // !VTKM_CUDA
#if !defined(__CUDA_ARCH__)
#define VTKM_USE_STL_MIN_MAX
#include <algorithm>
#endif
#if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
VTKM_THIRDPARTY_PRE_INCLUDE
#include <boost/math/special_functions/acosh.hpp>
@ -70,7 +75,7 @@ VTKM_THIRDPARTY_PRE_INCLUDE
#include <boost/math/special_functions/round.hpp>
VTKM_THIRDPARTY_POST_INCLUDE
#define VTKM_USE_BOOST_MATH
#if _MSC_VER <= 1600
#if (_MSC_VER <= 1600) && !defined(VTKM_USE_STL_MIN_MAX)
#define VTKM_USE_STL_MIN_MAX
#include <algorithm>
#endif
@ -442,10 +447,10 @@ T Max(const T &x, const T &y);
#ifdef VTKM_USE_STL_MIN_MAX
$binary_template_function('Max', '(std::max)(x, y)')\
$#
#else // !VTKM_USE_BOOST_MATH
#else // !VTKM_USE_STL_MIN_MAX
$binary_math_function('Max', 'fmax')\
$#
#endif // !VTKM_USE_BOOST_MATH
#endif // !VTKM_USE_STL_MIN_MAX
/// Returns \p x or \p y, whichever is smaller.
///
@ -455,10 +460,10 @@ T Min(const T &x, const T &y);
#ifdef VTKM_USE_STL_MIN_MAX
$binary_template_function('Min', '(std::min)(x, y)')\
$#
#else // !VTKM_USE_BOOST_MATH
#else // !VTKM_USE_STL_MIN_MAX
$binary_math_function('Min', 'fmin')\
$#
#endif // !VTKM_USE_BOOST_MATH
#endif // !VTKM_USE_STL_MIN_MAX
namespace detail {