Reduction on CUDA handles different input and output types better

When reducing an input type that differs from the output type
you need to write a custom binary operator that also implements
how to do the unary transformation.
This commit is contained in:
Robert Maynard 2019-04-10 14:31:33 -04:00
parent fda02b3a91
commit 89ec4aae2f
4 changed files with 115 additions and 14 deletions

@ -0,0 +1,53 @@
# DeviceAdapter Reduction supports differing input and output types
It is common to want to perform a reduction where the input and output types
are of differing types. A basic example would be when the input is `vtkm::UInt8`
but the output is `vtkm::UInt64`. This has been supported since v1.2, as the input
type can be implicitly convertible to the output type.
What we now support is when the input type is not implicitly convertible to the output type,
such as when the output type is `vtkm::Pair< vtkm::UInt64, vtkm::UInt64>`. For this to work
we require that the custom binary operator implements also an `operator()` which handles
the unary transformation of input to output.
An example of a custom reduction operator for differing input and output types is:
```cxx
struct CustomMinAndMax
{
using OutputType = vtkm::Pair<vtkm::Float64, vtkm::Float64>;
VTKM_EXEC_CONT
OutputType operator()(vtkm::Float64 a) const
{
return OutputType(a, a);
}
VTKM_EXEC_CONT
OutputType operator()(vtkm::Float64 a, vtkm::Float64 b) const
{
return OutputType(vtkm::Min(a, b), vtkm::Max(a, b));
}
VTKM_EXEC_CONT
OutputType operator()(const OutputType& a, const OutputType& b) const
{
return OutputType(vtkm::Min(a.first, b.first), vtkm::Max(a.second, b.second));
}
VTKM_EXEC_CONT
OutputType operator()(vtkm::Float64 a, const OutputType& b) const
{
return OutputType(vtkm::Min(a, b.first), vtkm::Max(a, b.second));
}
VTKM_EXEC_CONT
OutputType operator()(const OutputType& a, vtkm::Float64 b) const
{
return OutputType(vtkm::Min(a.first, b), vtkm::Max(a.second, b));
}
};
```

@ -97,6 +97,9 @@ struct Minimum
template <typename T>
struct MinAndMax
{
VTKM_EXEC_CONT
vtkm::Vec<T, 2> operator()(const T& a) const { return vtkm::make_Vec(a, a); }
VTKM_EXEC_CONT
vtkm::Vec<T, 2> operator()(const T& a, const T& b) const
{

@ -126,16 +126,19 @@ __global__ void SumExclusiveScan(T a, T b, T result, BinaryOperationType binary_
#pragma GCC diagnostic pop
#endif
template <typename PortalType, typename OutValueType>
template <typename PortalType, typename BinaryAndUnaryFunctor>
struct CastPortal
{
using ValueType = OutValueType;
using InputType = typename PortalType::ValueType;
using ValueType = decltype(std::declval<BinaryAndUnaryFunctor>()(std::declval<InputType>()));
PortalType Portal;
BinaryAndUnaryFunctor Functor;
VTKM_CONT
CastPortal(const PortalType& portal)
CastPortal(const PortalType& portal, const BinaryAndUnaryFunctor& functor)
: Portal(portal)
, Functor(functor)
{
}
@ -143,7 +146,7 @@ struct CastPortal
vtkm::Id GetNumberOfValues() const { return this->Portal.GetNumberOfValues(); }
VTKM_EXEC
ValueType Get(vtkm::Id index) const { return static_cast<OutValueType>(this->Portal.Get(index)); }
ValueType Get(vtkm::Id index) const { return this->Functor(this->Portal.Get(index)); }
};
}
} // end namespace cuda::internal
@ -332,7 +335,8 @@ private:
//The portal type and the initial value AREN'T the same type so we have
//to a slower approach, where we wrap the input portal inside a cast
//portal
vtkm::cont::cuda::internal::CastPortal<InputPortal, T> castPortal(input);
vtkm::cont::cuda::internal::CastPortal<InputPortal, BinaryFunctor> castPortal(input,
binary_functor);
vtkm::exec::cuda::internal::WrappedBinaryOperator<T, BinaryFunctor> bop(binary_functor);

@ -410,6 +410,38 @@ public:
IdPortalType Result;
};
struct CustomPairOp
{
using ValueType = vtkm::Pair<vtkm::Id, vtkm::Float32>;
VTKM_EXEC
ValueType operator()(const vtkm::Id& a) const { return ValueType(a, 0.0f); }
VTKM_EXEC
ValueType operator()(const vtkm::Id& a, const vtkm::Id& b) const
{
return ValueType(vtkm::Max(a, b), 0.0f);
}
VTKM_EXEC
ValueType operator()(const ValueType& a, const ValueType& b) const
{
return ValueType(vtkm::Max(a.first, b.first), 0.0f);
}
VTKM_EXEC
ValueType operator()(const vtkm::Id& a, const ValueType& b) const
{
return ValueType(vtkm::Max(a, b.first), 0.0f);
}
VTKM_EXEC
ValueType operator()(const ValueType& a, const vtkm::Id& b) const
{
return ValueType(vtkm::Max(a.first, b), 0.0f);
}
};
struct CustomTForReduce
{
constexpr CustomTForReduce()
@ -425,20 +457,18 @@ public:
VTKM_EXEC_CONT
constexpr float value() const { return this->Value; }
//required due to how the CUDA::Reduction is implemented when
//the return Type of Reduction is different than the input type
VTKM_EXEC_CONT
constexpr explicit operator vtkm::Vec<float, 2>() const
{
return vtkm::Vec<float, 2>(this->Value);
}
float Value;
};
template <typename T>
struct CustomMinAndMax
{
VTKM_EXEC_CONT
vtkm::Vec<float, 2> operator()(const T& a) const
{
return vtkm::make_Vec(a.value(), a.value());
}
VTKM_EXEC_CONT
vtkm::Vec<float, 2> operator()(const T& a, const T& b) const
{
@ -1299,10 +1329,21 @@ private:
Algorithm::Reduce(input, vtkm::Vec<vtkm::Id, 2>(0, 0), vtkm::MinAndMax<vtkm::Id>());
VTKM_TEST_ASSERT(maxValue == range[1], "Got bad value from Reduce with comparison object");
VTKM_TEST_ASSERT(0 == range[0], "Got bad value from Reduce with comparison object");
std::cout << " Reduce vtkm::Id array with custom functor that returns vtkm::Pair<>."
<< std::endl;
auto pairInit = vtkm::Pair<vtkm::Id, vtkm::Float32>(0, 0.0f);
vtkm::Pair<vtkm::Id, vtkm::Float32> pairRange =
Algorithm::Reduce(input, pairInit, CustomPairOp());
VTKM_TEST_ASSERT(maxValue == pairRange.first,
"Got bad value from Reduce with pair comparison object");
VTKM_TEST_ASSERT(0.0f == pairRange.second,
"Got bad value from Reduce with pair comparison object");
std::cout << " Reduce bool array with vtkm::BitwiseAnd to see if all values are true."
<< std::endl;
//construct an array of bools and verify that they aren't all true