vtk-m/vtkm/Math.h.in

//============================================================================
//  Copyright (c) Kitware, Inc.
//  All rights reserved.
//  See LICENSE.txt for details.
//
//  This software is distributed WITHOUT ANY WARRANTY; without even
//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
//  PURPOSE.  See the above copyright notice for more information.
//============================================================================
$# This file uses the pyexpander macro processing utility to build the
$# FunctionInterface facilities that use a variable number of arguments.
$# Information, documentation, and downloads for pyexpander can be found at:
$#
$#     http://pyexpander.sourceforge.net/
$#
$# To build the source code, execute the following (after installing
$# pyexpander, of course):
$#
$#     expander.py Math.h.in > Math.h
$#
$# Ignore the following comment. It is meant for the generated file.
// **** DO NOT EDIT THIS FILE!!! ****
// This file is automatically generated by Math.h.in

#ifndef vtk_m_Math_h
#define vtk_m_Math_h

#include <vtkm/TypeTraits.h>
#include <vtkm/Types.h>
#include <vtkm/VecTraits.h>

#include <limits> // must be found with or without CUDA.
#ifndef VTKM_CUDA
#include <cmath>
#include <cstring>
#include <limits.h>
#include <math.h>
#include <stdlib.h>
#endif // !VTKM_CUDA

#if !defined(VTKM_CUDA_DEVICE_PASS)
#define VTKM_USE_STL
#include <algorithm>
#endif

#ifdef VTKM_MSVC
#include <intrin.h>                // For bitwise intrinsics (__popcnt, etc)
#include <vtkm/internal/Windows.h> // for types used by MSVC intrinsics.
#ifndef VTKM_CUDA
#include <math.h>
#endif // VTKM_CUDA
#endif // VTKM_MSVC

#define VTKM_CUDA_MATH_FUNCTION_32(func) func##f
#define VTKM_CUDA_MATH_FUNCTION_64(func) func

$py(
def unary_function(name, type, returntype, cuda_expression, std_expression, template_header, static_keywords):
  return '''{5}
{6} VTKM_EXEC_CONT {2} {0}({1} x)
{{
#ifdef VTKM_CUDA
  return {3};
#else
  return {4};
#endif
}}
'''.format(name, type, returntype, cuda_expression, std_expression, template_header, static_keywords)

def unary_Vec_function(vtkmname):
  return '''template <typename T, vtkm::IdComponent N>
static inline VTKM_EXEC_CONT vtkm::Vec<typename detail::FloatingPointReturnType<T>::Type, N> {0}(
  const vtkm::Vec<T, N>& x)
{{
  vtkm::Vec<typename detail::FloatingPointReturnType<T>::Type, N> result;
  for (vtkm::IdComponent index = 0; index < N; index++)
  {{
    result[index] = vtkm::{0}(x[index]);
  }}
  return result;
}}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Vec<typename detail::FloatingPointReturnType<T>::Type, 4> {0}(
  const vtkm::Vec<T, 4>& x)
{{
  return vtkm::Vec<typename detail::FloatingPointReturnType<T>::Type, 4>(
    vtkm::{0}(x[0]), vtkm::{0}(x[1]), vtkm::{0}(x[2]), vtkm::{0}(x[3]));
}}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Vec<typename detail::FloatingPointReturnType<T>::Type, 3> {0}(
  const vtkm::Vec<T, 3>& x)
{{
  return vtkm::Vec<typename detail::FloatingPointReturnType<T>::Type, 3>(
    vtkm::{0}(x[0]), vtkm::{0}(x[1]), vtkm::{0}(x[2]));
}}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Vec<typename detail::FloatingPointReturnType<T>::Type, 2> {0}(
  const vtkm::Vec<T, 2>& x)
{{
  return vtkm::Vec<typename detail::FloatingPointReturnType<T>::Type, 2>(vtkm::{0}(x[0]),
                                                                         vtkm::{0}(x[1]));
}}
'''.format(vtkmname)

def unary_math_function_no_vec(vtkmname, sysname):
  general_type = '''template <typename T>
static inline VTKM_EXEC_CONT typename detail::FloatingPointReturnType<T>::Type {0}(const T& x)
{{
  using RT = typename detail::FloatingPointReturnType<T>::Type;
  return vtkm::{0}(static_cast<RT>(x));
}}
'''.format(vtkmname)
  specialization_types = unary_function(vtkmname,
                                        'vtkm::Float32',
                                        'vtkm::Float32',
                                        'VTKM_CUDA_MATH_FUNCTION_32(' + sysname + ')(x)',
                                        'std::' + sysname + '(x)',
                                        '',
                                        'inline')
  specialization_types += unary_function(vtkmname,
                                         'vtkm::Float64',
                                         'vtkm::Float64',
                                         'VTKM_CUDA_MATH_FUNCTION_64(' + sysname + ')(x)',
                                         'std::' + sysname + '(x)',
                                         '',
                                         'inline')
  return specialization_types + general_type


def unary_math_function(vtkmname, sysname):
  return unary_math_function_no_vec(vtkmname, sysname) + \
         unary_Vec_function(vtkmname)

def unary_template_function_no_vec(vtkmname,
                                   expression,
                                   returntype = None,
                                   preexpression = ''):
  return '''static inline VTKM_EXEC_CONT {2} {0}(vtkm::Float32 x)
{{
{3}  return {1};
}}
'''.format(vtkmname,
           expression,
           'vtkm::Float32' if returntype == None else returntype,
           preexpression) + \
'''static inline VTKM_EXEC_CONT {2} {0}(vtkm::Float64 x)
{{
{3}  return {1};
}}
'''.format(vtkmname,
           expression,
           'vtkm::Float64' if returntype == None else returntype,
           preexpression)

def binary_function(name, type, cuda_expression, std_expression):
  return '''static inline VTKM_EXEC_CONT {1} {0}({1} x, {1} y)
{{
#ifdef VTKM_CUDA
  return {2};
#else
  return {3};
#endif
}}
'''.format(name, type, cuda_expression, std_expression)

def binary_math_function(vtkmname, sysname):
  return binary_function(vtkmname,
                         'vtkm::Float32',
                         'VTKM_CUDA_MATH_FUNCTION_32(' + sysname + ')(x, y)',
                         'std::' + sysname + '(x, y)') + \
         binary_function(vtkmname,
                         'vtkm::Float64',
                         'VTKM_CUDA_MATH_FUNCTION_64(' + sysname + ')(x, y)',
                         'std::' + sysname + '(x, y)')

def binary_template_function(vtkmname, expression):
  return '''static inline VTKM_EXEC_CONT vtkm::Float32 {0}(vtkm::Float32 x, vtkm::Float32 y)
{{
  return {1};
}}
static inline VTKM_EXEC_CONT vtkm::Float64 {0}(vtkm::Float64 x, vtkm::Float64 y)
{{
  return {1};
}}
'''.format(vtkmname, expression)

)\
$extend(unary_math_function)\
$extend(unary_math_function_no_vec)\
$extend(unary_Vec_function)\
$extend(unary_template_function_no_vec)\
$extend(binary_math_function)\
$extend(binary_template_function)\
\
namespace vtkm
{

//-----------------------------------------------------------------------------
namespace detail
{
template <typename T>
struct FloatingPointReturnType
{
  using ctype = typename vtkm::VecTraits<T>::ComponentType;
  using representable_as_float_type =
    std::integral_constant<bool,
                           ((sizeof(ctype) < sizeof(float)) ||
                            std::is_same<ctype, vtkm::Float32>::value)>;
  using Type = typename std::
    conditional<representable_as_float_type::value, vtkm::Float32, vtkm::Float64>::type;
};
} // namespace detail

/// Returns the constant 2 times Pi.
///
template <typename T = vtkm::Float64>
static constexpr inline VTKM_EXEC_CONT typename detail::FloatingPointReturnType<T>::Type TwoPi()
{
  using FT = typename detail::FloatingPointReturnType<T>::Type;
  return static_cast<FT>(6.28318530717958647692528676655900576);
}

/// Returns the constant Pi.
///
template <typename T = vtkm::Float64>
static constexpr inline VTKM_EXEC_CONT typename detail::FloatingPointReturnType<T>::Type Pi()
{
  using FT = typename detail::FloatingPointReturnType<T>::Type;
  return static_cast<FT>(3.14159265358979323846264338327950288);
}

/// Returns the constant Pi halves.
///
template <typename T = vtkm::Float64>
static constexpr inline VTKM_EXEC_CONT typename detail::FloatingPointReturnType<T>::Type Pi_2()
{
  using FT = typename detail::FloatingPointReturnType<T>::Type;
  return static_cast<FT>(1.57079632679489661923132169163975144);
}

/// Returns the constant Pi thirds.
///
template <typename T = vtkm::Float64>
static constexpr inline VTKM_EXEC_CONT typename detail::FloatingPointReturnType<T>::Type Pi_3()
{
  using FT = typename detail::FloatingPointReturnType<T>::Type;
  return static_cast<FT>(1.04719755119659774615421446109316762);
}

/// Returns the constant Pi fourths.
///
template <typename T = vtkm::Float64>
static constexpr inline VTKM_EXEC_CONT typename detail::FloatingPointReturnType<T>::Type Pi_4()
{
  using FT = typename detail::FloatingPointReturnType<T>::Type;
  return static_cast<FT>(0.78539816339744830961566084581987572);
}

/// Returns the constant Pi one hundred and eightieth.
///
template <typename T = vtkm::Float64>
static constexpr inline VTKM_EXEC_CONT typename detail::FloatingPointReturnType<T>::Type Pi_180()
{
  using FT = typename detail::FloatingPointReturnType<T>::Type;
  return static_cast<FT>(0.01745329251994329547437168059786927);
}

/// Returns the constant 2 times Pi in float32.
///
static constexpr inline VTKM_EXEC_CONT vtkm::Float32 TwoPif()
{
  return TwoPi<vtkm::Float32>();
}

/// Returns the constant Pi in float32.
///
static constexpr inline VTKM_EXEC_CONT vtkm::Float32 Pif()
{
  return Pi<vtkm::Float32>();
}

/// Returns the constant Pi halves in float32.
///
static constexpr inline VTKM_EXEC_CONT vtkm::Float32 Pi_2f()
{
  return Pi_2<vtkm::Float32>();
}

/// Returns the constant Pi thirds in float32.
///
static constexpr inline VTKM_EXEC_CONT vtkm::Float32 Pi_3f()
{
  return Pi_3<vtkm::Float32>();
}

/// Returns the constant Pi fourths in float32.
///
static constexpr inline VTKM_EXEC_CONT vtkm::Float32 Pi_4f()
{
  return Pi_4<vtkm::Float32>();
}

/// Returns the constant Pi one hundred and eightieth in float32.
///
static constexpr inline VTKM_EXEC_CONT vtkm::Float32 Pi_180f()
{
  return Pi_180<vtkm::Float32>();
}

// clang-format off

///@{
/// Compute the sine of \p x.
///
$unary_math_function('Sin', 'sin')\
///@}

///@{
/// Compute the cosine of \p x.
///
$unary_math_function('Cos', 'cos')\
///@}

///@{
/// Compute the tangent of \p x.
///
$unary_math_function('Tan', 'tan')\
///@}

///@{
/// Compute the arc sine of \p x.
///
$unary_math_function('ASin', 'asin')\
///@}

///@{
/// Compute the arc cosine of \p x.
///
$unary_math_function('ACos', 'acos')\
///@}

///@{
/// Compute the arc tangent of \p x.
///
$unary_math_function('ATan', 'atan')\
///@}

///@{
/// Compute the arc tangent of \p x / \p y using the signs of both arguments
/// to determine the quadrant of the return value.
///
$binary_math_function('ATan2', 'atan2')\
///@}

///@{
/// Compute the hyperbolic sine of \p x.
///
$unary_math_function('SinH', 'sinh')\
///@}

///@{
/// Compute the hyperbolic cosine of \p x.
///
$unary_math_function('CosH', 'cosh')\
///@}

///@{
/// Compute the hyperbolic tangent of \p x.
///
$unary_math_function('TanH', 'tanh')\
///@}

///@{
/// Compute the hyperbolic arc sine of \p x.
///
$unary_math_function_no_vec('ASinH', 'asinh')\
$#
$unary_Vec_function('ASinH')\
///@}

///@{
/// Compute the hyperbolic arc cosine of \p x.
///
$unary_math_function_no_vec('ACosH', 'acosh')\
$#
$unary_Vec_function('ACosH')\
///@}

///@{
/// Compute the hyperbolic arc tangent of \p x.
///
$unary_math_function_no_vec('ATanH', 'atanh')\
$#
$unary_Vec_function('ATanH')\
///@}

//-----------------------------------------------------------------------------
///@{
/// Computes \p x raised to the power of \p y.
///
$binary_math_function('Pow', 'pow')\
///@}

///@{
/// Compute the square root of \p x.
/// On some hardware it is faster to find the reciprocal square root, so `RSqrt`
/// should be used if you actually plan to divide by the square root.
$unary_math_function('Sqrt', 'sqrt')\
///@}

///@{
/// Compute the reciprocal square root of \p x. The result of this function is
/// equivalent to <tt>1/Sqrt(x)</tt>. However, on some devices it is faster to
/// compute the reciprocal square root than the regular square root. Thus, you
/// should use this function whenever dividing by the square root.
///
#ifdef VTKM_CUDA
static inline VTKM_EXEC_CONT vtkm::Float32 RSqrt(vtkm::Float32 x)
{
  return rsqrtf(x);
}
static inline VTKM_EXEC_CONT vtkm::Float64 RSqrt(vtkm::Float64 x)
{
  return rsqrt(x);
}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Float64 RSqrt(T x)
{
  return rsqrt(static_cast<vtkm::Float64>(x));
}
#else  // !VTKM_CUDA
static inline VTKM_EXEC_CONT vtkm::Float32 RSqrt(vtkm::Float32 x)
{
  return 1 / vtkm::Sqrt(x);
}
static inline VTKM_EXEC_CONT vtkm::Float64 RSqrt(vtkm::Float64 x)
{
  return 1 / vtkm::Sqrt(x);
}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Float64 RSqrt(T x)
{
  return 1 / static_cast<vtkm::Float64>(x);
}
#endif // !VTKM_CUDA

$unary_Vec_function('RSqrt')\
///@}

///@{
/// Compute the cube root of \p x.
///
$unary_math_function_no_vec('Cbrt', 'cbrt')\
$#
$unary_Vec_function('Cbrt')\
///@}

///@{
/// Compute the reciprocal cube root of \p x. The result of this function is
/// equivalent to <tt>1/Cbrt(x)</tt>. However, on some devices it is faster to
/// compute the reciprocal cube root than the regular cube root. Thus, you
/// should use this function whenever dividing by the cube root.
///
#ifdef VTKM_CUDA
static inline VTKM_EXEC_CONT vtkm::Float32 RCbrt(vtkm::Float32 x)
{
  return rcbrtf(x);
}
static inline VTKM_EXEC_CONT vtkm::Float64 RCbrt(vtkm::Float64 x)
{
  return rcbrt(x);
}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Float64 RCbrt(T x)
{
  return rcbrt(static_cast<vtkm::Float64>(x));
}
#else  // !VTKM_CUDA
static inline VTKM_EXEC_CONT vtkm::Float32 RCbrt(vtkm::Float32 x)
{
  return 1 / vtkm::Cbrt(x);
}
static inline VTKM_EXEC_CONT vtkm::Float64 RCbrt(vtkm::Float64 x)
{
  return 1 / vtkm::Cbrt(x);
}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Float64 RCbrt(T x)
{
  return 1 / vtkm::Cbrt(static_cast<vtkm::Float64>(x));
}
#endif // !VTKM_CUDA

$unary_Vec_function('RCbrt')\
///@}

///@{
/// Computes e^x, the base-e exponential of `x`.
///
$unary_math_function('Exp', 'exp')\
///@}

///@{
/// Computes 2^x, the base-2 exponential of `x`.
///
$unary_math_function_no_vec('Exp2', 'exp2')\
$#
$unary_Vec_function('Exp2')\
///@}

///@{
/// Computes (e^x) - 1, the of base-e exponental of `x` then minus 1. The
/// accuracy of this function is good even for very small values of `x`.
///
$unary_math_function_no_vec('ExpM1', 'expm1')\
$#
$unary_Vec_function('ExpM1')\
///@}

///@{
/// Computes 10^x, the base-10 exponential of `x`.
///
#ifdef VTKM_CUDA
static inline VTKM_EXEC_CONT vtkm::Float32 Exp10(vtkm::Float32 x)
{
  return exp10f(x);
}
static inline VTKM_EXEC_CONT vtkm::Float64 Exp10(vtkm::Float64 x)
{
  return exp10(x);
}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Float64 Exp10(T x)
{
  return exp10(static_cast<vtkm::Float64>(x));
}
#else  // !VTKM_CUDA
static inline VTKM_EXEC_CONT vtkm::Float32 Exp10(vtkm::Float32 x)
{
  return vtkm::Pow(10, x);
}
static inline VTKM_EXEC_CONT vtkm::Float64 Exp10(vtkm::Float64 x)
{
  return vtkm::Pow(10, x);
}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Float64 Exp10(T x)
{
  return vtkm::Pow(10, static_cast<vtkm::Float64>(x));
}
#endif // !VTKM_CUDA

$unary_Vec_function('Exp10')\
///@}

///@{
/// Computes the natural logarithm of \p x.
///
$unary_math_function('Log', 'log')\
///@}

///@{
/// Computes the logarithm base 2 of \p x.
///
$unary_math_function_no_vec('Log2', 'log2')\
$#
$unary_Vec_function('Log2')\
///@}

///@{
/// Computes the logarithm base 10 of \p x.
///
$unary_math_function('Log10', 'log10')\
///@}

///@{
/// Computes the value of log(1+x). This method is more accurate for very small values of x
/// than the `vtkm::Log` function.
$unary_math_function_no_vec('Log1P', 'log1p')\
$#
$unary_Vec_function('Log1P')\
///@}

//-----------------------------------------------------------------------------
///@{
/// Returns \p x or \p y, whichever is larger.
///
template <typename T>
static inline VTKM_EXEC_CONT T Max(const T& x, const T& y);
#ifdef VTKM_USE_STL
$binary_template_function('Max', '(std::max)(x, y)')\
$#
#else // !VTKM_USE_STL
$binary_math_function('Max', 'fmax')\
$#
#endif // !VTKM_USE_STL
///@}

///@{
/// Returns \p x or \p y, whichever is smaller.
///
template <typename T>
static inline VTKM_EXEC_CONT T Min(const T& x, const T& y);
#if defined(VTKM_USE_STL) && !defined(VTKM_HIP)
$binary_template_function('Min', '(std::min)(x, y)')\
$#
#else // !VTKM_USE_STL OR HIP
$binary_math_function('Min', 'fmin')\
$#
#endif // !VTKM_USE_STL
///@}

namespace detail
{

template <typename T>
static inline VTKM_EXEC_CONT T Max(T x, T y, vtkm::TypeTraitsScalarTag)
{
  return (x < y) ? y : x;
}

template <typename T>
static inline VTKM_EXEC_CONT T Max(const T& x, const T& y, vtkm::TypeTraitsVectorTag)
{
  using Traits = vtkm::VecTraits<T>;
  T result;
  for (vtkm::IdComponent index = 0; index < Traits::NUM_COMPONENTS; index++)
  {
    Traits::SetComponent(
      result, index, vtkm::Max(Traits::GetComponent(x, index), Traits::GetComponent(y, index)));
  }
  return result;
}

template <typename T>
static inline VTKM_EXEC_CONT T Min(T x, T y, vtkm::TypeTraitsScalarTag)
{
  return (x < y) ? x : y;
}

template <typename T>
static inline VTKM_EXEC_CONT T Min(const T& x, const T& y, vtkm::TypeTraitsVectorTag)
{
  using Traits = vtkm::VecTraits<T>;
  T result;
  for (vtkm::IdComponent index = 0; index < Traits::NUM_COMPONENTS; index++)
  {
    Traits::SetComponent(
      result, index, vtkm::Min(Traits::GetComponent(x, index), Traits::GetComponent(y, index)));
  }
  return result;
}

} // namespace detail

/// Returns \p x or \p y, whichever is larger.
///
template <typename T>
static inline VTKM_EXEC_CONT T Max(const T& x, const T& y)
{
  return detail::Max(x, y, typename vtkm::TypeTraits<T>::DimensionalityTag());
}

/// Returns \p x or \p y, whichever is smaller.
///
template <typename T>
static inline VTKM_EXEC_CONT T Min(const T& x, const T& y)
{
  return detail::Min(x, y, typename vtkm::TypeTraits<T>::DimensionalityTag());
}

///@{
/// Clamp \p x to the given range.
///
inline VTKM_EXEC_CONT vtkm::Float32 Clamp(vtkm::Float32 x, vtkm::Float32 lo, vtkm::Float32 hi)
{
  return x > lo ? (x < hi ? x : hi) : lo;
}

inline VTKM_EXEC_CONT vtkm::Float64 Clamp(vtkm::Float64 x, vtkm::Float64 lo, vtkm::Float64 hi)
{
  return x > lo ? (x < hi ? x : hi) : lo;
}
///@}

//-----------------------------------------------------------------------------

//#ifdef VTKM_CUDA
#define VTKM_USE_IEEE_NONFINITE
//#endif

#ifdef VTKM_USE_IEEE_NONFINITE

namespace detail
{

union IEEE754Bits32 {
  vtkm::UInt32 bits;
  vtkm::Float32 scalar;
};
#define VTKM_NAN_BITS_32 0x7FC00000U
#define VTKM_INF_BITS_32 0x7F800000U
#define VTKM_NEG_INF_BITS_32 0xFF800000U
#define VTKM_EPSILON_32 1e-5f

union IEEE754Bits64 {
  vtkm::UInt64 bits;
  vtkm::Float64 scalar;
};
#define VTKM_NAN_BITS_64 0x7FF8000000000000ULL
#define VTKM_INF_BITS_64 0x7FF0000000000000ULL
#define VTKM_NEG_INF_BITS_64 0xFFF0000000000000ULL
#define VTKM_EPSILON_64 1e-9

template <typename T>
struct FloatLimits;

template <>
struct FloatLimits<vtkm::Float32>
{
  using BitsType = vtkm::detail::IEEE754Bits32;

  VTKM_EXEC_CONT
  static vtkm::Float32 Nan()
  {
    BitsType nan = { VTKM_NAN_BITS_32 };
    return nan.scalar;
  }

  VTKM_EXEC_CONT
  static vtkm::Float32 Infinity()
  {
    BitsType inf = { VTKM_INF_BITS_32 };
    return inf.scalar;
  }

  VTKM_EXEC_CONT
  static vtkm::Float32 NegativeInfinity()
  {
    BitsType neginf = { VTKM_NEG_INF_BITS_32 };
    return neginf.scalar;
  }

  VTKM_EXEC_CONT
  static vtkm::Float32 Epsilon() { return VTKM_EPSILON_32; }
};

template <int N>
struct FloatLimits<vtkm::Vec<vtkm::Float32, N>>
{
  using BitsType = vtkm::detail::IEEE754Bits32;

  VTKM_EXEC_CONT
  static vtkm::Vec<vtkm::Float32, N> Nan()
  {
    BitsType nan = { VTKM_NAN_BITS_32 };
    return vtkm::Vec<vtkm::Float32, N>(nan.scalar);
  }

  VTKM_EXEC_CONT
  static vtkm::Vec<vtkm::Float32, N> Infinity()
  {
    BitsType inf = { VTKM_INF_BITS_32 };
    return vtkm::Vec<vtkm::Float32, N>(inf.scalar);
  }

  VTKM_EXEC_CONT
  static vtkm::Vec<vtkm::Float32, N> NegativeInfinity()
  {
    BitsType neginf = { VTKM_NEG_INF_BITS_32 };
    return vtkm::Vec<vtkm::Float32, N>(neginf.scalar);
  }

  VTKM_EXEC_CONT
  static vtkm::Vec<vtkm::Float32, N> Epsilon()
  {
    return vtkm::Vec<vtkm::Float32, N>(VTKM_EPSILON_32);
  }
};

template <>
struct FloatLimits<vtkm::Float64>
{
  using BitsType = vtkm::detail::IEEE754Bits64;

  VTKM_EXEC_CONT
  static vtkm::Float64 Nan()
  {
    BitsType nan = { VTKM_NAN_BITS_64 };
    return nan.scalar;
  }

  VTKM_EXEC_CONT
  static vtkm::Float64 Infinity()
  {
    BitsType inf = { VTKM_INF_BITS_64 };
    return inf.scalar;
  }

  VTKM_EXEC_CONT
  static vtkm::Float64 NegativeInfinity()
  {
    BitsType neginf = { VTKM_NEG_INF_BITS_64 };
    return neginf.scalar;
  }

  VTKM_EXEC_CONT
  static vtkm::Float64 Epsilon() { return VTKM_EPSILON_64; }
};

template <int N>
struct FloatLimits<vtkm::Vec<vtkm::Float64, N>>
{
  using BitsType = vtkm::detail::IEEE754Bits64;

  VTKM_EXEC_CONT
  static vtkm::Vec<vtkm::Float64, N> Nan()
  {
    BitsType nan = { VTKM_NAN_BITS_64 };
    return vtkm::Vec<vtkm::Float64, N>(nan.scalar);
  }

  VTKM_EXEC_CONT
  static vtkm::Vec<vtkm::Float64, N> Infinity()
  {
    BitsType inf = { VTKM_INF_BITS_64 };
    return vtkm::Vec<vtkm::Float64, N>(inf.scalar);
  }

  VTKM_EXEC_CONT
  static vtkm::Vec<vtkm::Float64, N> NegativeInfinity()
  {
    BitsType neginf = { VTKM_NEG_INF_BITS_64 };
    return vtkm::Vec<vtkm::Float64, N>(neginf.scalar);
  }

  VTKM_EXEC_CONT
  static vtkm::Vec<vtkm::Float64, N> Epsilon()
  {
    return vtkm::Vec<vtkm::Float64, N>(VTKM_EPSILON_64);
  }
};

#undef VTKM_NAN_BITS_32
#undef VTKM_INF_BITS_32
#undef VTKM_NEG_INF_BITS_32
#undef VTKM_EPSILON_32
#undef VTKM_NAN_BITS_64
#undef VTKM_INF_BITS_64
#undef VTKM_NEG_INF_BITS_64
#undef VTKM_EPSILON_64

} // namespace detail
#endif //VTKM_USE_IEEE_NONFINITE

///@{
/// Returns the representation for infinity. The result is greater than any other
/// number except another infinity or NaN. When comparing two infinities or infinity
/// to NaN, neither is greater than, less than, nor equal to the other. The
/// `Nan()` function is templated to specify either a 32 or 64 bit floating point
/// number. The convenience functions `Nan32()` and `Nan64()` are non-templated
/// versions that return the precision for a particular precision.
#ifdef VTKM_USE_IEEE_NONFINITE
template <typename T>
static inline VTKM_EXEC_CONT T Nan()
{
  return detail::FloatLimits<T>::Nan();
}
#else  // !VTKM_USE_IEEE_NONFINITE
template <typename T>
static inline VTKM_EXEC_CONT T Nan()
{
  return std::numeric_limits<T>::quiet_NaN();
}
#endif // !VTKM_USE_IEEE_NONFINITE
static inline VTKM_EXEC_CONT vtkm::Float32 Nan32()
{
  return vtkm::Nan<vtkm::Float32>();
}
static inline VTKM_EXEC_CONT vtkm::Float64 Nan64()
{
  return vtkm::Nan<vtkm::Float64>();
}
///@}

///@{
/// Returns the representation for infinity. The result is greater than any other
/// number except another infinity or NaN. When comparing two infinities or infinity
/// to NaN, neither is greater than, less than, nor equal to the other. The
/// `Infinity()` function is templated to specify either a 32 or 64 bit floating point
/// number. The convenience functions `Infinity32()` and`Infinity64()` are non-templated
/// versions that return the precision for a particular precision.
#ifdef VTKM_USE_IEEE_NONFINITE
template <typename T>
static inline VTKM_EXEC_CONT T Infinity()
{
  return detail::FloatLimits<T>::Infinity();
}
#else  // !VTKM_USE_IEEE_NONFINITE
template <typename T>
static inline VTKM_EXEC_CONT T Infinity()
{
  return std::numeric_limits<T>::infinity();
}
#endif // !VTKM_USE_IEEE_NONFINITE
static inline VTKM_EXEC_CONT vtkm::Float32 Infinity32()
{
  return vtkm::Infinity<vtkm::Float32>();
}
static inline VTKM_EXEC_CONT vtkm::Float64 Infinity64()
{
  return vtkm::Infinity<vtkm::Float64>();
}
///@}

///@{
/// Returns the representation for negative infinity. The result is less than any
/// other number except another negative infinity or NaN. When comparing two
/// negative infinities or negative infinity to NaN, neither is greater than, less
/// than, nor equal to the other. The `NegativeInfinity()` function is templated to
/// specify either a 32 or 64 bit floating point number. The convenience functions
/// `NegativeInfinity32()` and`NegativeInfinity64()` are non-templated versions that
/// return the precision for a particular precision.
#ifdef VTKM_USE_IEEE_NONFINITE
template <typename T>
static inline VTKM_EXEC_CONT T NegativeInfinity()
{
  return detail::FloatLimits<T>::NegativeInfinity();
}
#else  // !VTKM_USE_IEEE_NONFINITE
template <typename T>
static inline VTKM_EXEC_CONT T NegativeInfinity()
{
  return -std::numeric_limits<T>::infinity();
}
#endif // !VTKM_USE_IEEE_NONFINITE
static inline VTKM_EXEC_CONT vtkm::Float32 NegativeInfinity32()
{
  return vtkm::NegativeInfinity<vtkm::Float32>();
}
static inline VTKM_EXEC_CONT vtkm::Float64 NegativeInfinity64()
{
  return vtkm::NegativeInfinity<vtkm::Float64>();
}
///@}

///@{
/// Returns the difference between 1 and the least value greater than 1 that
/// is representable by a floating point number. Epsilon is useful for specifying
/// the tolerance one should have when considering numerical error. The `Epsilon()`
/// function is templated to specify either a 32 or 64 bit floating point number. The
/// convenience functions `Epsilon32()` and`Epsilon64()` are non-templated versions that
/// return the precision for a particular precision.
#ifdef VTKM_USE_IEEE_NONFINITE
template <typename T>
static inline VTKM_EXEC_CONT T Epsilon()
{
  return detail::FloatLimits<T>::Epsilon();
}
#else  // !VTKM_USE_IEEE_NONFINITE
template <typename T>
static inline VTKM_EXEC_CONT T Epsilon()
{
  return std::numeric_limits<T>::epsilon();
}
#endif // !VTKM_USE_IEEE_NONFINITE
static inline VTKM_EXEC_CONT vtkm::Float32 Epsilon32()
{
  return vtkm::Epsilon<vtkm::Float32>();
}
static inline VTKM_EXEC_CONT vtkm::Float64 Epsilon64()
{
  return vtkm::Epsilon<vtkm::Float64>();
}
///@}


//-----------------------------------------------------------------------------
/// Returns true if \p x is not a number.
///
template <typename T>
static inline VTKM_EXEC_CONT bool IsNan(T x)
{
#ifndef VTKM_CUDA
  using std::isnan;
#endif
  return (isnan(x) != 0);
}

/// Returns true if \p x is positive or negative infinity.
///
template <typename T>
static inline VTKM_EXEC_CONT bool IsInf(T x)
{
#ifndef VTKM_CUDA
  using std::isinf;
#endif
  return (isinf(x) != 0);
}

/// Returns true if \p x is a normal number (not NaN or infinite).
///
template <typename T>
static inline VTKM_EXEC_CONT bool IsFinite(T x)
{
#ifndef VTKM_CUDA
  using std::isfinite;
#endif
  return (isfinite(x) != 0);
}

//-----------------------------------------------------------------------------
///@{
/// Round \p x to the smallest integer value not less than x.
///
$unary_math_function('Ceil', 'ceil')\
///@}

///@{
/// Round \p x to the largest integer value not greater than x.
///
$unary_math_function('Floor', 'floor')\
///@}

///@{
/// Round \p x to the nearest integral value.
///
$unary_math_function_no_vec('Round', 'round')\
$#
$unary_Vec_function('Round')\
///@}

//-----------------------------------------------------------------------------
///@{
/// Computes the remainder on division of 2 floating point numbers. The return
/// value is \p numerator - n \p denominator, where n is the quotient of \p
/// numerator divided by \p denominator rounded towards zero to an integer. For
/// example, <tt>FMod(6.5, 2.3)</tt> returns 1.9, which is 6.5 - 2*2.3.
///
$binary_math_function('FMod', 'fmod')\
///@}

///@{
/// Computes the remainder on division of 2 floating point numbers. The return
/// value is \p numerator - n \p denominator, where n is the quotient of \p
/// numerator divided by \p denominator rounded towards the nearest integer
/// (instead of toward zero like FMod). For example, <tt>FMod(6.5, 2.3)</tt>
/// returns -0.4, which is 6.5 - 3*2.3.
///
#ifdef VTKM_MSVC
template <typename T>
static inline VTKM_EXEC_CONT T Remainder(T numerator, T denominator)
{
  T quotient = vtkm::Round(numerator / denominator);
  return numerator - quotient * denominator;
}
#else // !VTKM_MSVC
$binary_math_function('Remainder', 'remainder')\
$#
#endif // !VTKM_MSVC
///@}

///@{
/// Returns the remainder on division of 2 floating point numbers just like
/// Remainder. In addition, this function also returns the \c quotient used to
/// get that remainder.
///
template <typename QType>
static inline VTKM_EXEC_CONT vtkm::Float32 RemainderQuotient(vtkm::Float32 numerator,
                                                             vtkm::Float32 denominator,
                                                             QType& quotient)
{
  int iQuotient;
  // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
#if defined(VTKM_CUDA) || defined(VTKM_HIP)
  const vtkm::Float32 result =
    VTKM_CUDA_MATH_FUNCTION_32(remquo)(numerator, denominator, &iQuotient);
#else
  const vtkm::Float32 result = std::remquo(numerator, denominator, &iQuotient);
#endif
  quotient = static_cast<QType>(iQuotient);
  return result;
}
template <typename QType>
static inline VTKM_EXEC_CONT vtkm::Float64 RemainderQuotient(vtkm::Float64 numerator,
                                                             vtkm::Float64 denominator,
                                                             QType& quotient)
{
  int iQuotient;
#ifdef VTKM_CUDA
  const vtkm::Float64 result =
    VTKM_CUDA_MATH_FUNCTION_64(remquo)(numerator, denominator, &iQuotient);
#else
  const vtkm::Float64 result = std::remquo(numerator, denominator, &iQuotient);
#endif
  quotient = static_cast<QType>(iQuotient);
  return result;
}
///@}

///@{
/// Gets the integral and fractional parts of \c x. The return value is the
/// fractional part and \c integral is set to the integral part.
///
static inline VTKM_EXEC_CONT vtkm::Float32 ModF(vtkm::Float32 x, vtkm::Float32& integral)
{
  // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
#if defined(VTKM_CUDA) || defined(VTKM_HIP)
  return VTKM_CUDA_MATH_FUNCTION_32(modf)(x, &integral);
#else
  return std::modf(x, &integral);
#endif
}
static inline VTKM_EXEC_CONT vtkm::Float64 ModF(vtkm::Float64 x, vtkm::Float64& integral)
{
#if defined(VTKM_CUDA)
  return VTKM_CUDA_MATH_FUNCTION_64(modf)(x, &integral);
#else
  return std::modf(x, &integral);
#endif
}
///@}

//-----------------------------------------------------------------------------
///@{
/// Return the absolute value of \p x. That is, return \p x if it is positive or
/// \p -x if it is negative.
///
static inline VTKM_EXEC_CONT vtkm::Int32 Abs(vtkm::Int32 x)
{
  return abs(x);
}
static inline VTKM_EXEC_CONT vtkm::Int64 Abs(vtkm::Int64 x)
{
#if VTKM_SIZE_LONG == 8
  return labs(x);
#elif VTKM_SIZE_LONG_LONG == 8
  return llabs(x);
#else
#error Unknown size of Int64.
#endif
}
static inline VTKM_EXEC_CONT vtkm::Float32 Abs(vtkm::Float32 x)
{
#ifdef VTKM_CUDA
  return VTKM_CUDA_MATH_FUNCTION_32(fabs)(x);
#else
  return std::fabs(x);
#endif
}
static inline VTKM_EXEC_CONT vtkm::Float64 Abs(vtkm::Float64 x)
{
#ifdef VTKM_CUDA
  return VTKM_CUDA_MATH_FUNCTION_64(fabs)(x);
#else
  return std::fabs(x);
#endif
}
template <typename T>
static inline VTKM_EXEC_CONT typename detail::FloatingPointReturnType<T>::Type Abs(T x)
{
#ifdef VTKM_CUDA
  return VTKM_CUDA_MATH_FUNCTION_64(fabs)(static_cast<vtkm::Float64>(x));
#else
  return std::fabs(static_cast<vtkm::Float64>(x));
#endif
}
template <typename T, vtkm::IdComponent N>
static inline VTKM_EXEC_CONT vtkm::Vec<T, N> Abs(const vtkm::Vec<T, N>& x)
{
  vtkm::Vec<T, N> result;
  for (vtkm::IdComponent index = 0; index < N; index++)
  {
    result[index] = vtkm::Abs(x[index]);
  }
  return result;
}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Vec<T, 4> Abs(const vtkm::Vec<T, 4>& x)
{
  return vtkm::Vec<T, 4>(vtkm::Abs(x[0]), vtkm::Abs(x[1]), vtkm::Abs(x[2]), vtkm::Abs(x[3]));
}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Vec<T, 3> Abs(const vtkm::Vec<T, 3>& x)
{
  return vtkm::Vec<T, 3>(vtkm::Abs(x[0]), vtkm::Abs(x[1]), vtkm::Abs(x[2]));
}
template <typename T>
static inline VTKM_EXEC_CONT vtkm::Vec<T, 2> Abs(const vtkm::Vec<T, 2>& x)
{
  return vtkm::Vec<T, 2>(vtkm::Abs(x[0]), vtkm::Abs(x[1]));
}
///@}

///@{
/// Returns a nonzero value if \p x is negative.
///
$unary_template_function_no_vec('SignBit',
                                'static_cast<vtkm::Int32>(signbit(x))',
                                'vtkm::Int32',
'''#ifndef VTKM_CUDA
  using std::signbit;
#endif
''')\
///@}

///@{
/// Returns true if \p x is less than zero, false otherwise.
///
$unary_template_function_no_vec('IsNegative', '(vtkm::SignBit(x) != 0)', 'bool')\
///@}

///@{
/// Copies the sign of \p y onto \p x.  If \p y is positive, returns Abs(\p x).
/// If \p y is negative, returns -Abs(\p x).
///
$binary_math_function('CopySign', 'copysign')\
$#

template <typename T, vtkm::IdComponent N>
static inline VTKM_EXEC_CONT vtkm::Vec<T, N> CopySign(const vtkm::Vec<T, N>& x,
                                                      const vtkm::Vec<T, N>& y)
{
  vtkm::Vec<T, N> result;
  for (vtkm::IdComponent index = 0; index < N; index++)
  {
    result[index] = vtkm::CopySign(x[index], y[index]);
  }
  return result;
}
///@}

///@{
/// Decompose floating poing value
///
inline VTKM_EXEC_CONT vtkm::Float32 Frexp(vtkm::Float32 x, vtkm::Int32 *exponent)
{
  // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
#if defined(VTKM_CUDA) || defined(VTKM_HIP)
  return VTKM_CUDA_MATH_FUNCTION_32(frexp)(x, exponent);
#else
  return std::frexp(x, exponent);
#endif
}

inline VTKM_EXEC_CONT vtkm::Float64 Frexp(vtkm::Float64 x, vtkm::Int32 *exponent)
{
#ifdef VTKM_CUDA
  return VTKM_CUDA_MATH_FUNCTION_64(frexp)(x, exponent);
#else
  return std::frexp(x, exponent);
#endif
}
///@}

inline VTKM_EXEC_CONT vtkm::Float32 Ldexp(vtkm::Float32 x, vtkm::Int32 exponent)
{
#ifdef VTKM_CUDA
  return VTKM_CUDA_MATH_FUNCTION_32(ldexp)(x, exponent);
#else
  return std::ldexp(x, exponent);
#endif
}

inline VTKM_EXEC_CONT vtkm::Float64 Ldexp(vtkm::Float64 x, vtkm::Int32 exponent)
{
#ifdef VTKM_CUDA
  return VTKM_CUDA_MATH_FUNCTION_64(ldexp)(x, exponent);
#else
  return std::ldexp(x, exponent);
#endif
}

///////////////
// Float distance.
// See: https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/ for why this works.

namespace detail
{

inline VTKM_EXEC_CONT vtkm::UInt64 FloatDistancePositive(vtkm::Float64 x, vtkm::Float64 y)
{
  VTKM_ASSERT(x >= 0);
  VTKM_ASSERT(y >= 0);

  // Note that:
  // int64_t xi = *reinterpret_cast<int64_t*>(&x);
  // int64_t yi = *reinterpret_cast<int64_t*>(&y);
  // also works (usually), but generates warnings because it is technically undefined behavior
  // according to the C++ standard.
  // Good option to have if we get compile errors off memcpy or don't want to #include <cstring> though.
  // At least on gcc, both versions generate the same assembly.
  vtkm::UInt64 xi;
  vtkm::UInt64 yi;
  memcpy(&xi, &x, sizeof(vtkm::UInt64));
  memcpy(&yi, &y, sizeof(vtkm::UInt64));
  if (yi > xi) {
    return yi - xi;
  }
  return xi - yi;
}


inline VTKM_EXEC_CONT vtkm::UInt64 FloatDistancePositive(vtkm::Float32 x, vtkm::Float32 y)
{
  VTKM_ASSERT(x >= 0);
  VTKM_ASSERT(y >= 0);

  vtkm::UInt32 xi_32;
  vtkm::UInt32 yi_32;
  memcpy(&xi_32, &x, sizeof(vtkm::UInt32));
  memcpy(&yi_32, &y, sizeof(vtkm::UInt32));
  vtkm::UInt64 xi = xi_32;
  vtkm::UInt64 yi = yi_32;
  if (yi > xi) {
    return yi - xi;
  }
  return xi - yi;
}

} // namespace detail

/// Computes the number of representables between two floating point numbers. This function
/// is non-negative and symmetric in its arguments. If either argument is non-finite, the
/// value returned is the maximum value allowed by 64-bit unsigned integers: 2^64-1.
inline VTKM_EXEC_CONT vtkm::UInt64 FloatDistance(vtkm::Float64 x, vtkm::Float64 y)
{
  static_assert(sizeof(vtkm::Float64) == sizeof(vtkm::UInt64), "vtkm::Float64 is incorrect size.");
  static_assert(std::numeric_limits<vtkm::Float64>::has_denorm == std::denorm_present, "FloatDistance presumes the floating-point type has subnormal numbers.");

  if (!vtkm::IsFinite(x) || !vtkm::IsFinite(y)) {
    return 0xFFFFFFFFFFFFFFFFL;
  }

  // Signed zero is the sworn enemy of this process.
  if (y == 0) {
    y = vtkm::Abs(y);
  }
  if (x == 0) {
    x = vtkm::Abs(x);
  }

  if ( (x < 0 && y >= 0) || (x >= 0 && y < 0) )
  {
    vtkm::UInt64 dx, dy;
    if (x < 0) {
      dy = detail::FloatDistancePositive(0.0, y);
      dx = detail::FloatDistancePositive(0.0, -x);
    }
    else {
      dy = detail::FloatDistancePositive(0.0, -y);
      dx = detail::FloatDistancePositive(0.0, x);
    }

    return dx + dy;
  }

  if (x < 0 && y < 0) {
    return detail::FloatDistancePositive(-x, -y);
  }

  return detail::FloatDistancePositive(x, y);
}

/// @copydoc FloatDistance
inline VTKM_EXEC_CONT vtkm::UInt64 FloatDistance(vtkm::Float32 x, vtkm::Float32 y)
{
  static_assert(sizeof(vtkm::Float32) == sizeof(vtkm::Int32), "vtkm::Float32 is incorrect size.");
  static_assert(std::numeric_limits<vtkm::Float32>::has_denorm == std::denorm_present, "FloatDistance presumes the floating-point type has subnormal numbers.");

  if (!vtkm::IsFinite(x) || !vtkm::IsFinite(y)) {
    return 0xFFFFFFFFFFFFFFFFL;
  }

  if (y == 0) {
    y = vtkm::Abs(y);
  }
  if (x == 0) {
    x = vtkm::Abs(x);
  }

  if ( (x < 0 && y >= 0) || (x >= 0 && y < 0) )
  {
    vtkm::UInt64 dx, dy;
    if (x < 0) {
      dy = detail::FloatDistancePositive(0.0f, y);
      dx = detail::FloatDistancePositive(0.0f, -x);
    }
    else {
      dy = detail::FloatDistancePositive(0.0f, -y);
      dx = detail::FloatDistancePositive(0.0f, x);
    }
    return dx + dy;
  }

  if (x < 0 && y < 0) {
    return detail::FloatDistancePositive(-x, -y);
  }

  return detail::FloatDistancePositive(x, y);
}

// Computes ab - cd.
// See: https://pharr.org/matt/blog/2019/11/03/difference-of-floats.html
template<typename T>
inline VTKM_EXEC_CONT T DifferenceOfProducts(T a, T b, T c, T d)
{
    T cd = c * d;
    T err = std::fma(-c, d, cd);
    T dop = std::fma(a, b, -cd);
    return dop + err;
}

/// @brief Solves ax² + bx + c = 0.
///
/// Only returns the real roots.
/// If there are real roots, the first element of the pair is less than or equal to the second.
/// If there are no real roots, both elements are NaNs.
/// If VTK-m is compiled with FMA support, each root is accurate to 3 ulps; otherwise
/// the discriminant is prone to catastrophic subtractive cancellation and no accuracy
/// guarantees can be provided.
template<typename T>
inline VTKM_EXEC_CONT vtkm::Vec<T, 2> QuadraticRoots(T a, T b, T c)
{
  if (a == 0)
  {
    if (b == 0)
    {
      if (c == 0)
      {
        // A degenerate case. All real numbers are roots; hopefully this arbitrary decision interacts gracefully with use.
        return vtkm::Vec<T,2>(0,0);
      }
      else
      {
        return vtkm::Vec<T,2>(vtkm::Nan<T>(), vtkm::Nan<T>());
      }
    }
    return vtkm::Vec<T,2>(-c/b, -c/b);
  }
  T delta = DifferenceOfProducts(b, b, 4*a, c);
  if (delta < 0)
  {
    return vtkm::Vec<T,2>(vtkm::Nan<T>(), vtkm::Nan<T>());
  }

  T q = -(b + vtkm::CopySign(vtkm::Sqrt(delta), b)) / 2;
  T r0 = q / a;
  T r1 = c / q;
  if (r0 < r1)
  {
    return vtkm::Vec<T,2>(r0, r1);
  }
  return vtkm::Vec<T,2>(r1, r0);
}

/// Bitwise operations
///

/// Find the first set bit in @a word, and return its position (1-32). If no
/// bits are set, returns 0.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __ffs is device only.
inline __device__
vtkm::Int32 FindFirstSetBit(vtkm::UInt32 word)
{
  // Output is [0,32], with ffs(0) == 0
  return __ffs(static_cast<int>(word));
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 FindFirstSetBit(vtkm::UInt32 word)
{
#  if defined(VTKM_GCC) || defined(VTKM_CLANG)

  // Output is [0,32], with ffs(0) == 0
  return __builtin_ffs(static_cast<int>(word));

#  elif defined(VTKM_MSVC)

  // Output is [0, 31], check return code to see if bits are set:
  vtkm::UInt32 firstSet;
  return _BitScanForward(reinterpret_cast<DWORD*>(&firstSet), word) != 0
         ? static_cast<vtkm::Int32>(firstSet + 1) : 0;

#  elif defined(VTKM_ICC)

  // Output is [0, 31], undefined if word is 0.
  return word != 0 ? _bit_scan_forward(word) + 1 : 0;

#  else

  // Naive implementation:
  if (word == 0)
  {
    return 0;
  }

  vtkm::Int32 bit = 1;
  while ((word & 0x1) == 0)
  {
    word >>= 1;
    ++bit;
  }
  return bit;

#  endif
}
#endif // CUDA_DEVICE_PASS

/// Find the first set bit in @a word, and return its position (1-64). If no
/// bits are set, returns 0.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __ffsll is device only.
inline __device__
vtkm::Int32 FindFirstSetBit(vtkm::UInt64 word)
{

  // Output is [0,64], with ffs(0) == 0
  return __ffsll(static_cast<long long int>(word));
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 FindFirstSetBit(vtkm::UInt64 word)
{
#  if defined(VTKM_GCC) || defined(VTKM_CLANG) || defined(VTKM_ICC)

  // Output is [0,64], with ffs(0) == 0
  return __builtin_ffsll(static_cast<long long int>(word));

#  elif defined(VTKM_MSVC)

  // Output is [0, 63], check return code to see if bits are set:
  vtkm::UInt32 firstSet;
  return _BitScanForward64(reinterpret_cast<DWORD*>(&firstSet), word) != 0
         ? static_cast<vtkm::Int32>(firstSet + 1) : 0;

#  else

  // Naive implementation:
  if (word == 0)
  {
    return 0;
  }

  vtkm::Int32 bit = 1;
  while ((word & 0x1) == 0)
  {
    word >>= 1;
    ++bit;
  }
  return bit;

#  endif
}
#endif // CUDA_DEVICE_PASS

/// Count the total number of bits set in @a word.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __popc is device only.
inline __device__
vtkm::Int32 CountSetBits(vtkm::UInt32 word)
{
  return __popc(word);
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 CountSetBits(vtkm::UInt32 word)
{
#  if defined(VTKM_GCC) || defined(VTKM_CLANG)

  return __builtin_popcount(word);

#  elif defined(VTKM_MSVC) && !defined(_M_ARM64)

  return static_cast<vtkm::Int32>(__popcnt(word));

#  elif defined(VTKM_ICC)

  return _popcnt32(static_cast<int>(word));

#  else

  // Naive implementation:
  vtkm::Int32 bits = 0;
  while (word)
  {
    if (word & 0x1)
    {
      ++bits;
    }
    word >>= 1;
  }
  return bits;

#  endif
}
#endif // CUDA_DEVICE_PASS

/// Count the total number of bits set in @a word.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __popcll is device only.
inline __device__
vtkm::Int32 CountSetBits(vtkm::UInt64 word)
{
  return __popcll(word);
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 CountSetBits(vtkm::UInt64 word)
{
#  if defined(VTKM_GCC) || defined(VTKM_CLANG)

  return __builtin_popcountll(word);

#  elif defined(VTKM_MSVC) && !defined(_M_ARM64)

  return static_cast<vtkm::Int32>(__popcnt64(word));

#  elif defined(VTKM_ICC)

  return _popcnt64(static_cast<vtkm::Int64>(word));

#  else

  // Naive implementation:
  vtkm::Int32 bits = 0;
  while (word)
  {
    if (word & 0x1)
    {
      ++bits;
    }
    word >>= 1;
  }
  return bits;

#  endif
}
#endif // CUDA_DEVICE_PASS

} // namespace vtkm
// clang-format on

#endif //vtk_m_Math_h