Merge topic 'fix-optimizing-threadindices'

d5906eccd add ThreadIndicesTopologyMap optimized specializations

Acked-by: Kitware Robot <kwrobot@kitware.com>
Acked-by: Robert Maynard <robert.maynard@kitware.com>
Merge-request: !2027
This commit is contained in:
Vicente Bolea 2020-05-12 22:08:11 +00:00 committed by Kitware Robot
commit ba77f36347
7 changed files with 247 additions and 60 deletions

@ -30,10 +30,11 @@ template <typename FetchType, typename ExecObjectType>
struct Fetch<FetchType, vtkm::exec::arg::AspectTagIncidentElementIndices, ExecObjectType>
{
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename Device>
VTKM_EXEC auto Load(const vtkm::exec::arg::ThreadIndicesTopologyMap<
vtkm::exec::ConnectivityExtrude<Device>>& indices,
const ExecObjectType&) const -> vtkm::Vec<vtkm::Id, 6>
template <typename Device, typename ScatterAndMaskMode>
VTKM_EXEC auto Load(
const vtkm::exec::arg::ThreadIndicesTopologyMap<vtkm::exec::ConnectivityExtrude<Device>,
ScatterAndMaskMode>& indices,
const ExecObjectType&) const -> vtkm::Vec<vtkm::Id, 6>
{
// std::cout << "opimized fetch for point ids" << std::endl;
const auto& xgcidx = indices.GetIndicesIncident();
@ -50,9 +51,10 @@ struct Fetch<FetchType, vtkm::exec::arg::AspectTagIncidentElementIndices, ExecOb
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename ConnectivityType>
VTKM_EXEC auto Load(const vtkm::exec::arg::ThreadIndicesTopologyMap<ConnectivityType>& indices,
const ExecObjectType&) const -> decltype(indices.GetIndicesIncident())
template <typename ConnectivityType, typename ScatterAndMaskMode>
VTKM_EXEC auto Load(
const vtkm::exec::arg::ThreadIndicesTopologyMap<ConnectivityType, ScatterAndMaskMode>& indices,
const ExecObjectType&) const -> decltype(indices.GetIndicesIncident())
{
return indices.GetIndicesIncident();
}
@ -133,10 +135,11 @@ struct Fetch<vtkm::exec::arg::FetchTagArrayDirectIn,
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename Device>
VTKM_EXEC auto Load(const vtkm::exec::arg::ThreadIndicesTopologyMap<
vtkm::exec::ReverseConnectivityExtrude<Device>>& indices,
const vtkm::exec::ArrayPortalExtrude<T>& points)
template <typename Device, typename ScatterAndMaskMode>
VTKM_EXEC auto Load(
const vtkm::exec::arg::ThreadIndicesTopologyMap<vtkm::exec::ReverseConnectivityExtrude<Device>,
ScatterAndMaskMode>& indices,
const vtkm::exec::ArrayPortalExtrude<T>& points)
-> decltype(points.Get(indices.GetIndexLogical()))
{
// std::cout << "optimized fetch for point coordinates" << std::endl;

@ -213,11 +213,11 @@ struct Fetch<vtkm::exec::arg::FetchTagArrayTopologyMapIn,
//Optimized fetch for point arrays when iterating the cells ConnectivityExtrude
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename Device>
VTKM_EXEC auto Load(const vtkm::exec::arg::ThreadIndicesTopologyMap<
vtkm::exec::ConnectivityExtrude<Device>>& indices,
const ExecObjectType& portal)
-> vtkm::Vec<typename ExecObjectType::ValueType, 6>
template <typename Device, typename ScatterAndMaskMode>
VTKM_EXEC auto Load(
const vtkm::exec::arg::ThreadIndicesTopologyMap<vtkm::exec::ConnectivityExtrude<Device>,
ScatterAndMaskMode>& indices,
const ExecObjectType& portal) -> vtkm::Vec<typename ExecObjectType::ValueType, 6>
{
// std::cout << "opimized fetch for point values" << std::endl;
const auto& xgcidx = indices.GetIndicesIncident();

@ -21,8 +21,8 @@ namespace arg
{
// Specialization for extrude types.
template <typename Device>
class ThreadIndicesTopologyMap<vtkm::exec::ConnectivityExtrude<Device>>
template <typename Device, typename ScatterAndMaskMode>
class ThreadIndicesTopologyMap<vtkm::exec::ConnectivityExtrude<Device>, ScatterAndMaskMode>
{
using ConnectivityType = vtkm::exec::ConnectivityExtrude<Device>;
@ -175,8 +175,8 @@ private:
};
// Specialization for extrude types.
template <typename Device>
class ThreadIndicesTopologyMap<vtkm::exec::ReverseConnectivityExtrude<Device>>
template <typename Device, typename ScatterAndMaskMode>
class ThreadIndicesTopologyMap<vtkm::exec::ReverseConnectivityExtrude<Device>, ScatterAndMaskMode>
{
using ConnectivityType = vtkm::exec::ReverseConnectivityExtrude<Device>;

@ -69,6 +69,17 @@ static inline VTKM_EXEC vtkm::Id2 Deflate(const vtkm::Id3& index, vtkm::Id2)
} // namespace detail
/// \brief Uses spaces optimizations when using MaskNone and ScatterIdentity
///
struct DefaultScatterAndMaskTag
{
};
/// \brief Used for when not using MaskNone and ScatterIdentity.
///
struct CustomScatterOrMaskTag
{
};
/// \brief Container for thread indices in a topology map
///
@ -80,7 +91,7 @@ static inline VTKM_EXEC vtkm::Id2 Deflate(const vtkm::Id3& index, vtkm::Id2)
/// This class is templated on the type that stores the connectivity (such
/// as \c ConnectivityExplicit or \c ConnectivityStructured).
///
template <typename ConnectivityType>
template <typename ConnectivityType, typename ScatterAndMaskMode>
class ThreadIndicesTopologyMap : public vtkm::exec::arg::ThreadIndicesBasic
{
using Superclass = vtkm::exec::arg::ThreadIndicesBasic;
@ -144,10 +155,11 @@ private:
CellShapeTag CellShape;
};
// Specialization for structured connectivity types.
/// \brief Specialization for CustomScatterOrMaskTag
template <typename VisitTopology, typename IncidentTopology, vtkm::IdComponent Dimension>
class ThreadIndicesTopologyMap<
vtkm::exec::ConnectivityStructured<VisitTopology, IncidentTopology, Dimension>>
vtkm::exec::ConnectivityStructured<VisitTopology, IncidentTopology, Dimension>,
CustomScatterOrMaskTag>
{
using ConnectivityType =
vtkm::exec::ConnectivityStructured<VisitTopology, IncidentTopology, Dimension>;
@ -160,15 +172,15 @@ public:
vtkm::exec::ConnectivityStructured<VisitTopology, IncidentTopology, Dimension>;
VTKM_EXEC ThreadIndicesTopologyMap(vtkm::Id threadIndex,
vtkm::Id inIndex,
vtkm::Id inputIndex,
vtkm::IdComponent visitIndex,
vtkm::Id outIndex,
vtkm::Id outputIndex,
const ConnectivityType& connectivity)
{
this->ThreadIndex = threadIndex;
this->InputIndex = inIndex;
this->InputIndex = inputIndex;
this->VisitIndex = visitIndex;
this->OutputIndex = outIndex;
this->OutputIndex = outputIndex;
this->LogicalIndex = connectivity.FlatToLogicalToIndex(this->InputIndex);
this->IndicesIncident = connectivity.GetIndices(this->LogicalIndex);
this->CellShape = connectivity.GetCellShape(this->InputIndex);
@ -181,9 +193,6 @@ public:
// This constructor handles multidimensional indices on one-to-one input-to-output
auto logicalIndex = detail::Deflate(threadIndex3D, LogicalIndexType());
this->ThreadIndex = threadIndex1D;
this->InputIndex = threadIndex1D;
this->OutputIndex = threadIndex1D;
this->VisitIndex = 0;
this->LogicalIndex = logicalIndex;
this->IndicesIncident = connectivity.GetIndices(logicalIndex);
this->CellShape = connectivity.GetCellShape(threadIndex1D);
@ -207,6 +216,7 @@ public:
this->CellShape = connectivity.GetCellShape(threadIndex1D);
}
/// \brief The index of the thread or work invocation.
///
/// This index refers to which instance of the worklet is being invoked. Every invocation of the
@ -292,20 +302,176 @@ public:
private:
vtkm::Id ThreadIndex;
vtkm::Id InputIndex;
vtkm::IdComponent VisitIndex;
LogicalIndexType LogicalIndex;
IndicesIncidentType IndicesIncident;
CellShapeTag CellShape;
vtkm::Id InputIndex;
vtkm::Id OutputIndex;
};
/// \brief Specialization for DefaultScatterAndMaskTag
///
/// It does not store VisitIndex, InputIndex and OutputIndex
/// since this is used only when Scatter is set as ScatterIdentity
/// and Mask is set as MaskNone which does not performs any transformation onto the
/// indices.
///
template <typename VisitTopology, typename IncidentTopology, vtkm::IdComponent Dimension>
class ThreadIndicesTopologyMap<
vtkm::exec::ConnectivityStructured<VisitTopology, IncidentTopology, Dimension>,
DefaultScatterAndMaskTag>
{
using ConnectivityType =
vtkm::exec::ConnectivityStructured<VisitTopology, IncidentTopology, Dimension>;
public:
using IndicesIncidentType = typename ConnectivityType::IndicesType;
using CellShapeTag = typename ConnectivityType::CellShapeTag;
using LogicalIndexType = typename ConnectivityType::SchedulingRangeType;
using Connectivity = ConnectivityType;
VTKM_EXEC ThreadIndicesTopologyMap(vtkm::Id threadIndex,
vtkm::Id inputIndex,
vtkm::IdComponent vtkmNotUsed(visitIndex),
vtkm::Id vtkmNotUsed(outputIndex),
const ConnectivityType& connectivity)
{
this->ThreadIndex = threadIndex;
this->LogicalIndex = connectivity.FlatToLogicalToIndex(inputIndex);
this->IndicesIncident = connectivity.GetIndices(this->LogicalIndex);
this->CellShape = connectivity.GetCellShape(inputIndex);
}
VTKM_EXEC ThreadIndicesTopologyMap(const vtkm::Id3& threadIndex3D,
vtkm::Id threadIndex1D,
const ConnectivityType& connectivity)
{
// This constructor handles multidimensional indices on one-to-one input-to-output
auto logicalIndex = detail::Deflate(threadIndex3D, LogicalIndexType());
this->ThreadIndex = threadIndex1D;
this->LogicalIndex = logicalIndex;
this->IndicesIncident = connectivity.GetIndices(logicalIndex);
this->CellShape = connectivity.GetCellShape(threadIndex1D);
}
VTKM_EXEC ThreadIndicesTopologyMap(const vtkm::Id3& threadIndex3D,
vtkm::Id threadIndex1D,
vtkm::Id vtkmNotUsed(inIndex),
vtkm::IdComponent vtkmNotUsed(visitIndex),
vtkm::Id vtkmNotUsed(outIndex),
const ConnectivityType& connectivity)
{
// This constructor handles multidimensional indices on many-to-many input-to-output
auto logicalIndex = detail::Deflate(threadIndex3D, LogicalIndexType());
this->ThreadIndex = threadIndex1D;
this->LogicalIndex = logicalIndex;
this->IndicesIncident = connectivity.GetIndices(logicalIndex);
this->CellShape = connectivity.GetCellShape(threadIndex1D);
}
/// \brief The index of the thread or work invocation.
///
/// This index refers to which instance of the worklet is being invoked. Every invocation of the
/// worklet has a unique thread index. This is also called the work index depending on the
/// context.
///
VTKM_EXEC
vtkm::Id GetThreadIndex() const { return this->ThreadIndex; }
/// \brief The logical index into the input domain.
///
/// This is similar to \c GetIndex3D except the Vec size matches the actual
/// dimensions of the data.
///
VTKM_EXEC
LogicalIndexType GetIndexLogical() const { return this->LogicalIndex; }
/// \brief The index into the input domain.
///
/// This index refers to the input element (array value, cell, etc.) that
/// this thread is being invoked for. This is the typical index used during
/// fetches.
///
VTKM_EXEC
vtkm::Id GetInputIndex() const { return this->ThreadIndex; }
/// \brief The 3D index into the input domain.
///
/// Overloads the implementation in the base class to return the 3D index
/// for the input.
///
VTKM_EXEC
vtkm::Id3 GetInputIndex3D() const { return detail::InflateTo3D(this->GetIndexLogical()); }
/// \brief The index into the output domain.
///
/// This index refers to the output element (array value, cell, etc.) that
/// this thread is creating. This is the typical index used during
/// Fetch::Store.
///
VTKM_EXEC
vtkm::Id GetOutputIndex() const { return this->ThreadIndex; }
/// \brief The visit index.
///
/// When multiple output indices have the same input index, they are
/// distinguished using the visit index.
///
VTKM_EXEC
vtkm::IdComponent GetVisitIndex() const { return 0; }
/// \brief The indices of the incident elements.
///
/// A topology map has "visited" and "incident" elements (e.g. points, cells,
/// etc). For each worklet invocation, there is exactly one visited element,
/// but there can be several incident elements. This method returns a
/// Vec-like object containing the indices to the incident elements.
///
VTKM_EXEC
const IndicesIncidentType& GetIndicesIncident() const { return this->IndicesIncident; }
/// \brief The input indices of the incident elements in pointer form.
///
/// Returns the same object as GetIndicesIncident except that it returns a
/// pointer to the internally held object rather than a reference or copy.
/// Since the from indices can be a sizeable Vec (8 entries is common), it is
/// best not to have a bunch a copies. Thus, you can pass around a pointer
/// instead. However, care should be taken to make sure that this object does
/// not go out of scope, at which time the returned pointer becomes invalid.
///
VTKM_EXEC
const IndicesIncidentType* GetIndicesIncidentPointer() const { return &this->IndicesIncident; }
/// \brief The shape of the input cell.
///
/// In topology maps that map from points to something, the indices make up
/// the structure of a cell. Although the shape tag is not technically and
/// index, it defines the meaning of the indices, so we put it here. (That
/// and this class is the only convenient place to store it.)
///
VTKM_EXEC
CellShapeTag GetCellShape() const { return this->CellShape; }
private:
vtkm::Id ThreadIndex;
LogicalIndexType LogicalIndex;
IndicesIncidentType IndicesIncident;
CellShapeTag CellShape;
};
// Specialization for permuted structured connectivity types.
/// \brief Specialization for permuted structured connectivity types.
template <typename PermutationPortal, vtkm::IdComponent Dimension>
class ThreadIndicesTopologyMap<vtkm::exec::ConnectivityPermutedVisitCellsWithPoints<
PermutationPortal,
vtkm::exec::
ConnectivityStructured<vtkm::TopologyElementTagCell, vtkm::TopologyElementTagPoint, Dimension>>>
PermutationPortal,
vtkm::exec::ConnectivityStructured<vtkm::TopologyElementTagCell,
vtkm::TopologyElementTagPoint,
Dimension>>,
CustomScatterOrMaskTag>
{
using PermutedConnectivityType = vtkm::exec::ConnectivityPermutedVisitCellsWithPoints<
PermutationPortal,

@ -87,7 +87,9 @@ struct FetchArrayTopologyMapInTests
void TryInvocation(const Invocation& invocation) const
{
using ConnectivityType = typename Invocation::InputDomainType;
using ThreadIndicesType = vtkm::exec::arg::ThreadIndicesTopologyMap<ConnectivityType>;
using ThreadIndicesType =
vtkm::exec::arg::ThreadIndicesTopologyMap<ConnectivityType,
vtkm::exec::arg::CustomScatterOrMaskTag>;
using FetchType = vtkm::exec::arg::Fetch<vtkm::exec::arg::FetchTagArrayTopologyMapIn,
vtkm::exec::arg::AspectTagDefault,
@ -166,7 +168,9 @@ template <vtkm::IdComponent NumDimensions, vtkm::IdComponent ParamIndex, typenam
void TryStructuredPointCoordinatesInvocation(const Invocation& invocation)
{
using ConnectivityType = typename Invocation::InputDomainType;
using ThreadIndicesType = vtkm::exec::arg::ThreadIndicesTopologyMap<ConnectivityType>;
using ThreadIndicesType =
vtkm::exec::arg::ThreadIndicesTopologyMap<ConnectivityType,
vtkm::exec::arg::CustomScatterOrMaskTag>;
vtkm::exec::arg::Fetch<vtkm::exec::arg::FetchTagArrayTopologyMapIn,
vtkm::exec::arg::AspectTagDefault,

@ -164,15 +164,17 @@ public:
typename VisitArrayType,
typename ThreadToOutArrayType,
typename InputDomainType>
VTKM_EXEC vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType> GetThreadIndices(
vtkm::Id threadIndex,
const OutToInArrayType& outToIn,
const VisitArrayType& visit,
const ThreadToOutArrayType& threadToOut,
const InputDomainType& connectivity) const
VTKM_EXEC vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType,
vtkm::exec::arg::CustomScatterOrMaskTag>
GetThreadIndices(vtkm::Id threadIndex,
const OutToInArrayType& outToIn,
const VisitArrayType& visit,
const ThreadToOutArrayType& threadToOut,
const InputDomainType& connectivity) const
{
const vtkm::Id outIndex = threadToOut.Get(threadIndex);
return vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType>(
return vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType,
vtkm::exec::arg::CustomScatterOrMaskTag>(
threadIndex, outToIn.Get(outIndex), visit.Get(outIndex), outIndex, connectivity);
}
@ -199,7 +201,10 @@ public:
typename InputDomainType,
bool S = IsScatterIdentity,
bool M = IsMaskNone>
VTKM_EXEC EnableFnWhen<S && M, vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType>>
VTKM_EXEC EnableFnWhen<
S && M,
vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType,
vtkm::exec::arg::DefaultScatterAndMaskTag>>
GetThreadIndices(vtkm::Id threadIndex1D,
const vtkm::Id3& threadIndex3D,
const OutToInArrayType& vtkmNotUsed(outToIn),
@ -207,7 +212,8 @@ public:
const ThreadToOutArrayType& vtkmNotUsed(threadToOut),
const InputDomainType& connectivity) const
{
return vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType>(
return vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType,
vtkm::exec::arg::DefaultScatterAndMaskTag>(
threadIndex3D, threadIndex1D, connectivity);
}
@ -219,21 +225,26 @@ public:
typename InputDomainType,
bool S = IsScatterIdentity,
bool M = IsMaskNone>
VTKM_EXEC EnableFnWhen<!(S && M), vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType>>
GetThreadIndices(vtkm::Id threadIndex1D,
const vtkm::Id3& threadIndex3D,
const OutToInArrayType& outToIn,
const VisitArrayType& visit,
const ThreadToOutArrayType& threadToOut,
const InputDomainType& connectivity) const
VTKM_EXEC
EnableFnWhen<!(S && M),
vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType,
vtkm::exec::arg::CustomScatterOrMaskTag>>
GetThreadIndices(vtkm::Id threadIndex1D,
const vtkm::Id3& threadIndex3D,
const OutToInArrayType& outToIn,
const VisitArrayType& visit,
const ThreadToOutArrayType& threadToOut,
const InputDomainType& connectivity) const
{
const vtkm::Id outIndex = threadToOut.Get(threadIndex1D);
return vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType>(threadIndex3D,
threadIndex1D,
outToIn.Get(outIndex),
visit.Get(outIndex),
outIndex,
connectivity);
return vtkm::exec::arg::ThreadIndicesTopologyMap<InputDomainType,
vtkm::exec::arg::CustomScatterOrMaskTag>(
threadIndex3D,
threadIndex1D,
outToIn.Get(outIndex),
visit.Get(outIndex),
outIndex,
connectivity);
}
};

@ -50,7 +50,10 @@ struct PointGradient : public vtkm::worklet::WorkletVisitPointsWithCells
const WholeFieldIn& inputField,
GradientOutType& outputGradient) const
{
using CellThreadIndices = vtkm::exec::arg::ThreadIndicesTopologyMap<CellSetInType>;
// Use optimized ThreadIndicesTopologyMap
using CellThreadIndices =
vtkm::exec::arg::ThreadIndicesTopologyMap<CellSetInType,
vtkm::exec::arg::DefaultScatterAndMaskTag>;
using ValueType = typename WholeFieldIn::ValueType;
using CellShapeTag = typename CellSetInType::CellShapeTag;