Reduction on CUDA handles different input and output types better

When reducing an input type that differs from the output type you need to write a custom binary operator that also implements how to do the unary transformation.
2024-09-16 17:22:55 +00:00 · 2019-04-10 14:31:33 -04:00 · 2019-04-10 14:31:33 -04:00 · 89ec4aae2f
commit 89ec4aae2f
parent fda02b3a91
4 changed files with 115 additions and 14 deletions
--- a/docs/changelog/reduction-support-differing-input-output-types.md
+++ b/docs/changelog/reduction-support-differing-input-output-types.md
@ -0,0 +1,53 @@
+# DeviceAdapter Reduction supports differing input and output types
+
+It is common to want to perform a reduction where the input and output types
+are of differing types. A basic example would be when the input is `vtkm::UInt8`
+but the output is `vtkm::UInt64`. This has been supported since v1.2, as the input
+type can be implicitly convertible to the output type.
+
+What we now support is when the input type is not implicitly convertible to the output type,
+such as when the output type is `vtkm::Pair< vtkm::UInt64, vtkm::UInt64>`. For this to work
+we require that the custom binary operator implements also an `operator()` which handles
+the unary transformation of input to output. 
+
+An example of a custom reduction operator for differing input and output types is:
+
+```cxx
+
+  struct CustomMinAndMax
+  {
+    using OutputType = vtkm::Pair<vtkm::Float64, vtkm::Float64>;
+
+    VTKM_EXEC_CONT
+    OutputType operator()(vtkm::Float64 a) const
+    {
+    return OutputType(a, a);
+    }
+
+    VTKM_EXEC_CONT
+    OutputType operator()(vtkm::Float64 a, vtkm::Float64 b) const
+    {
+      return OutputType(vtkm::Min(a, b), vtkm::Max(a, b));
+    }
+
+    VTKM_EXEC_CONT
+    OutputType operator()(const OutputType& a, const OutputType& b) const
+    {
+      return OutputType(vtkm::Min(a.first, b.first), vtkm::Max(a.second, b.second));
+    }
+
+    VTKM_EXEC_CONT
+    OutputType operator()(vtkm::Float64 a, const OutputType& b) const
+    {
+      return OutputType(vtkm::Min(a, b.first), vtkm::Max(a, b.second));
+    }
+
+    VTKM_EXEC_CONT
+    OutputType operator()(const OutputType& a, vtkm::Float64 b) const
+    {
+      return OutputType(vtkm::Min(a.first, b), vtkm::Max(a.second, b));
+    }
+  };
+
+
+```
--- a/vtkm/BinaryOperators.h
+++ b/vtkm/BinaryOperators.h
@ -97,6 +97,9 @@ struct Minimum
 template <typename T>
 struct MinAndMax
 {
+  VTKM_EXEC_CONT
+  vtkm::Vec<T, 2> operator()(const T& a) const { return vtkm::make_Vec(a, a); }
+
  VTKM_EXEC_CONT
  vtkm::Vec<T, 2> operator()(const T& a, const T& b) const
  {
--- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
+++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
@ -126,16 +126,19 @@ __global__ void SumExclusiveScan(T a, T b, T result, BinaryOperationType binary_
 #pragma GCC diagnostic pop
 #endif

-template <typename PortalType, typename OutValueType>
+template <typename PortalType, typename BinaryAndUnaryFunctor>
 struct CastPortal
 {
-  using ValueType = OutValueType;
+  using InputType = typename PortalType::ValueType;
+  using ValueType = decltype(std::declval<BinaryAndUnaryFunctor>()(std::declval<InputType>()));

  PortalType Portal;
+  BinaryAndUnaryFunctor Functor;

  VTKM_CONT
-  CastPortal(const PortalType& portal)
+  CastPortal(const PortalType& portal, const BinaryAndUnaryFunctor& functor)
    : Portal(portal)
+    , Functor(functor)
  {
  }

@ -143,7 +146,7 @@ struct CastPortal
  vtkm::Id GetNumberOfValues() const { return this->Portal.GetNumberOfValues(); }

  VTKM_EXEC
-  ValueType Get(vtkm::Id index) const { return static_cast<OutValueType>(this->Portal.Get(index)); }
+  ValueType Get(vtkm::Id index) const { return this->Functor(this->Portal.Get(index)); }
 };
 }
 } // end namespace cuda::internal
@ -332,7 +335,8 @@ private:
    //The portal type and the initial value AREN'T the same type so we have
    //to a slower approach, where we wrap the input portal inside a cast
    //portal
-    vtkm::cont::cuda::internal::CastPortal<InputPortal, T> castPortal(input);
+    vtkm::cont::cuda::internal::CastPortal<InputPortal, BinaryFunctor> castPortal(input,
+                                                                                  binary_functor);

    vtkm::exec::cuda::internal::WrappedBinaryOperator<T, BinaryFunctor> bop(binary_functor);

--- a/vtkm/cont/testing/TestingDeviceAdapter.h
+++ b/vtkm/cont/testing/TestingDeviceAdapter.h
@ -410,6 +410,38 @@ public:
    IdPortalType Result;
  };

+  struct CustomPairOp
+  {
+    using ValueType = vtkm::Pair<vtkm::Id, vtkm::Float32>;
+
+    VTKM_EXEC
+    ValueType operator()(const vtkm::Id& a) const { return ValueType(a, 0.0f); }
+
+    VTKM_EXEC
+    ValueType operator()(const vtkm::Id& a, const vtkm::Id& b) const
+    {
+      return ValueType(vtkm::Max(a, b), 0.0f);
+    }
+
+    VTKM_EXEC
+    ValueType operator()(const ValueType& a, const ValueType& b) const
+    {
+      return ValueType(vtkm::Max(a.first, b.first), 0.0f);
+    }
+
+    VTKM_EXEC
+    ValueType operator()(const vtkm::Id& a, const ValueType& b) const
+    {
+      return ValueType(vtkm::Max(a, b.first), 0.0f);
+    }
+
+    VTKM_EXEC
+    ValueType operator()(const ValueType& a, const vtkm::Id& b) const
+    {
+      return ValueType(vtkm::Max(a.first, b), 0.0f);
+    }
+  };
+
  struct CustomTForReduce
  {
    constexpr CustomTForReduce()
@ -425,20 +457,18 @@ public:
    VTKM_EXEC_CONT
    constexpr float value() const { return this->Value; }

-    //required due to how the CUDA::Reduction is implemented when
-    //the return Type of Reduction is different than the input type
-    VTKM_EXEC_CONT
-    constexpr explicit operator vtkm::Vec<float, 2>() const
-    {
-      return vtkm::Vec<float, 2>(this->Value);
-    }
-
    float Value;
  };

  template <typename T>
  struct CustomMinAndMax
  {
+    VTKM_EXEC_CONT
+    vtkm::Vec<float, 2> operator()(const T& a) const
+    {
+      return vtkm::make_Vec(a.value(), a.value());
+    }
+
    VTKM_EXEC_CONT
    vtkm::Vec<float, 2> operator()(const T& a, const T& b) const
    {
@ -1299,10 +1329,21 @@ private:
      Algorithm::Reduce(input, vtkm::Vec<vtkm::Id, 2>(0, 0), vtkm::MinAndMax<vtkm::Id>());

    VTKM_TEST_ASSERT(maxValue == range[1], "Got bad value from Reduce with comparison object");
-
    VTKM_TEST_ASSERT(0 == range[0], "Got bad value from Reduce with comparison object");


+    std::cout << "  Reduce vtkm::Id array with custom functor that returns vtkm::Pair<>."
+              << std::endl;
+    auto pairInit = vtkm::Pair<vtkm::Id, vtkm::Float32>(0, 0.0f);
+    vtkm::Pair<vtkm::Id, vtkm::Float32> pairRange =
+      Algorithm::Reduce(input, pairInit, CustomPairOp());
+
+    VTKM_TEST_ASSERT(maxValue == pairRange.first,
+                     "Got bad value from Reduce with pair comparison object");
+    VTKM_TEST_ASSERT(0.0f == pairRange.second,
+                     "Got bad value from Reduce with pair comparison object");
+
+
    std::cout << "  Reduce bool array with vtkm::BitwiseAnd to see if all values are true."
              << std::endl;
    //construct an array of bools and verify that they aren't all true