From d5906eccd96bc9e93b41993d4f85ecb84028c9fb Mon Sep 17 00:00:00 2001 From: Vicente Adolfo Bolea Sanchez Date: Tue, 12 May 2020 13:45:43 -0400 Subject: [PATCH] add ThreadIndicesTopologyMap optimized specializations This commit splits ThreadIndicesTopologyMap into two different specializations which can be instanciated with the tags: DefaultScatterAndMaskTag and CustomScatterAndMaskTag. These specialization will allow ThreadIndicesTopologyMap instances to avoid holding in memory InputIndex, OutputIndex and ThreadIndex variables when Mask = MaskNone and Scatter = ScatterIdentity which in this case are not needed since no transformation are done. Signed-off-by: Vicente Adolfo Bolea Sanchez --- vtkm/exec/arg/FetchExtrude.h | 25 ++- vtkm/exec/arg/FetchTagArrayTopologyMapIn.h | 10 +- vtkm/exec/arg/ThreadIndicesExtrude.h | 8 +- vtkm/exec/arg/ThreadIndicesTopologyMap.h | 196 ++++++++++++++++-- .../UnitTestFetchArrayTopologyMapIn.cxx | 8 +- vtkm/worklet/WorkletMapTopology.h | 55 +++-- vtkm/worklet/gradient/PointGradient.h | 5 +- 7 files changed, 247 insertions(+), 60 deletions(-) diff --git a/vtkm/exec/arg/FetchExtrude.h b/vtkm/exec/arg/FetchExtrude.h index 05bac2a33..4189328be 100644 --- a/vtkm/exec/arg/FetchExtrude.h +++ b/vtkm/exec/arg/FetchExtrude.h @@ -30,10 +30,11 @@ template struct Fetch { VTKM_SUPPRESS_EXEC_WARNINGS - template - VTKM_EXEC auto Load(const vtkm::exec::arg::ThreadIndicesTopologyMap< - vtkm::exec::ConnectivityExtrude>& indices, - const ExecObjectType&) const -> vtkm::Vec + template + VTKM_EXEC auto Load( + const vtkm::exec::arg::ThreadIndicesTopologyMap, + ScatterAndMaskMode>& indices, + const ExecObjectType&) const -> vtkm::Vec { // std::cout << "opimized fetch for point ids" << std::endl; const auto& xgcidx = indices.GetIndicesIncident(); @@ -50,9 +51,10 @@ struct Fetch - VTKM_EXEC auto Load(const vtkm::exec::arg::ThreadIndicesTopologyMap& indices, - const ExecObjectType&) const -> decltype(indices.GetIndicesIncident()) + template + VTKM_EXEC auto Load( + const vtkm::exec::arg::ThreadIndicesTopologyMap& indices, + const ExecObjectType&) const -> decltype(indices.GetIndicesIncident()) { return indices.GetIndicesIncident(); } @@ -133,10 +135,11 @@ struct Fetch - VTKM_EXEC auto Load(const vtkm::exec::arg::ThreadIndicesTopologyMap< - vtkm::exec::ReverseConnectivityExtrude>& indices, - const vtkm::exec::ArrayPortalExtrude& points) + template + VTKM_EXEC auto Load( + const vtkm::exec::arg::ThreadIndicesTopologyMap, + ScatterAndMaskMode>& indices, + const vtkm::exec::ArrayPortalExtrude& points) -> decltype(points.Get(indices.GetIndexLogical())) { // std::cout << "optimized fetch for point coordinates" << std::endl; diff --git a/vtkm/exec/arg/FetchTagArrayTopologyMapIn.h b/vtkm/exec/arg/FetchTagArrayTopologyMapIn.h index 28af1c867..60074269f 100644 --- a/vtkm/exec/arg/FetchTagArrayTopologyMapIn.h +++ b/vtkm/exec/arg/FetchTagArrayTopologyMapIn.h @@ -213,11 +213,11 @@ struct Fetch - VTKM_EXEC auto Load(const vtkm::exec::arg::ThreadIndicesTopologyMap< - vtkm::exec::ConnectivityExtrude>& indices, - const ExecObjectType& portal) - -> vtkm::Vec + template + VTKM_EXEC auto Load( + const vtkm::exec::arg::ThreadIndicesTopologyMap, + ScatterAndMaskMode>& indices, + const ExecObjectType& portal) -> vtkm::Vec { // std::cout << "opimized fetch for point values" << std::endl; const auto& xgcidx = indices.GetIndicesIncident(); diff --git a/vtkm/exec/arg/ThreadIndicesExtrude.h b/vtkm/exec/arg/ThreadIndicesExtrude.h index 21f9f1f5c..bcc95abbb 100644 --- a/vtkm/exec/arg/ThreadIndicesExtrude.h +++ b/vtkm/exec/arg/ThreadIndicesExtrude.h @@ -21,8 +21,8 @@ namespace arg { // Specialization for extrude types. -template -class ThreadIndicesTopologyMap> +template +class ThreadIndicesTopologyMap, ScatterAndMaskMode> { using ConnectivityType = vtkm::exec::ConnectivityExtrude; @@ -175,8 +175,8 @@ private: }; // Specialization for extrude types. -template -class ThreadIndicesTopologyMap> +template +class ThreadIndicesTopologyMap, ScatterAndMaskMode> { using ConnectivityType = vtkm::exec::ReverseConnectivityExtrude; diff --git a/vtkm/exec/arg/ThreadIndicesTopologyMap.h b/vtkm/exec/arg/ThreadIndicesTopologyMap.h index a32f85a0b..6c5fb6ffe 100644 --- a/vtkm/exec/arg/ThreadIndicesTopologyMap.h +++ b/vtkm/exec/arg/ThreadIndicesTopologyMap.h @@ -69,6 +69,17 @@ static inline VTKM_EXEC vtkm::Id2 Deflate(const vtkm::Id3& index, vtkm::Id2) } // namespace detail +/// \brief Uses spaces optimizations when using MaskNone and ScatterIdentity +/// +struct DefaultScatterAndMaskTag +{ +}; + +/// \brief Used for when not using MaskNone and ScatterIdentity. +/// +struct CustomScatterOrMaskTag +{ +}; /// \brief Container for thread indices in a topology map /// @@ -80,7 +91,7 @@ static inline VTKM_EXEC vtkm::Id2 Deflate(const vtkm::Id3& index, vtkm::Id2) /// This class is templated on the type that stores the connectivity (such /// as \c ConnectivityExplicit or \c ConnectivityStructured). /// -template +template class ThreadIndicesTopologyMap : public vtkm::exec::arg::ThreadIndicesBasic { using Superclass = vtkm::exec::arg::ThreadIndicesBasic; @@ -144,10 +155,11 @@ private: CellShapeTag CellShape; }; -// Specialization for structured connectivity types. +/// \brief Specialization for CustomScatterOrMaskTag template class ThreadIndicesTopologyMap< - vtkm::exec::ConnectivityStructured> + vtkm::exec::ConnectivityStructured, + CustomScatterOrMaskTag> { using ConnectivityType = vtkm::exec::ConnectivityStructured; @@ -160,15 +172,15 @@ public: vtkm::exec::ConnectivityStructured; VTKM_EXEC ThreadIndicesTopologyMap(vtkm::Id threadIndex, - vtkm::Id inIndex, + vtkm::Id inputIndex, vtkm::IdComponent visitIndex, - vtkm::Id outIndex, + vtkm::Id outputIndex, const ConnectivityType& connectivity) { this->ThreadIndex = threadIndex; - this->InputIndex = inIndex; + this->InputIndex = inputIndex; this->VisitIndex = visitIndex; - this->OutputIndex = outIndex; + this->OutputIndex = outputIndex; this->LogicalIndex = connectivity.FlatToLogicalToIndex(this->InputIndex); this->IndicesIncident = connectivity.GetIndices(this->LogicalIndex); this->CellShape = connectivity.GetCellShape(this->InputIndex); @@ -181,9 +193,6 @@ public: // This constructor handles multidimensional indices on one-to-one input-to-output auto logicalIndex = detail::Deflate(threadIndex3D, LogicalIndexType()); this->ThreadIndex = threadIndex1D; - this->InputIndex = threadIndex1D; - this->OutputIndex = threadIndex1D; - this->VisitIndex = 0; this->LogicalIndex = logicalIndex; this->IndicesIncident = connectivity.GetIndices(logicalIndex); this->CellShape = connectivity.GetCellShape(threadIndex1D); @@ -207,6 +216,7 @@ public: this->CellShape = connectivity.GetCellShape(threadIndex1D); } + /// \brief The index of the thread or work invocation. /// /// This index refers to which instance of the worklet is being invoked. Every invocation of the @@ -292,20 +302,176 @@ public: private: vtkm::Id ThreadIndex; - vtkm::Id InputIndex; vtkm::IdComponent VisitIndex; + LogicalIndexType LogicalIndex; + IndicesIncidentType IndicesIncident; + CellShapeTag CellShape; + vtkm::Id InputIndex; vtkm::Id OutputIndex; +}; + +/// \brief Specialization for DefaultScatterAndMaskTag +/// +/// It does not store VisitIndex, InputIndex and OutputIndex +/// since this is used only when Scatter is set as ScatterIdentity +/// and Mask is set as MaskNone which does not performs any transformation onto the +/// indices. +/// +template +class ThreadIndicesTopologyMap< + vtkm::exec::ConnectivityStructured, + DefaultScatterAndMaskTag> +{ + using ConnectivityType = + vtkm::exec::ConnectivityStructured; + +public: + using IndicesIncidentType = typename ConnectivityType::IndicesType; + using CellShapeTag = typename ConnectivityType::CellShapeTag; + using LogicalIndexType = typename ConnectivityType::SchedulingRangeType; + using Connectivity = ConnectivityType; + + VTKM_EXEC ThreadIndicesTopologyMap(vtkm::Id threadIndex, + vtkm::Id inputIndex, + vtkm::IdComponent vtkmNotUsed(visitIndex), + vtkm::Id vtkmNotUsed(outputIndex), + const ConnectivityType& connectivity) + { + this->ThreadIndex = threadIndex; + this->LogicalIndex = connectivity.FlatToLogicalToIndex(inputIndex); + this->IndicesIncident = connectivity.GetIndices(this->LogicalIndex); + this->CellShape = connectivity.GetCellShape(inputIndex); + } + + + + + VTKM_EXEC ThreadIndicesTopologyMap(const vtkm::Id3& threadIndex3D, + vtkm::Id threadIndex1D, + const ConnectivityType& connectivity) + { + // This constructor handles multidimensional indices on one-to-one input-to-output + auto logicalIndex = detail::Deflate(threadIndex3D, LogicalIndexType()); + this->ThreadIndex = threadIndex1D; + this->LogicalIndex = logicalIndex; + this->IndicesIncident = connectivity.GetIndices(logicalIndex); + this->CellShape = connectivity.GetCellShape(threadIndex1D); + } + + VTKM_EXEC ThreadIndicesTopologyMap(const vtkm::Id3& threadIndex3D, + vtkm::Id threadIndex1D, + vtkm::Id vtkmNotUsed(inIndex), + vtkm::IdComponent vtkmNotUsed(visitIndex), + vtkm::Id vtkmNotUsed(outIndex), + const ConnectivityType& connectivity) + { + // This constructor handles multidimensional indices on many-to-many input-to-output + auto logicalIndex = detail::Deflate(threadIndex3D, LogicalIndexType()); + this->ThreadIndex = threadIndex1D; + this->LogicalIndex = logicalIndex; + this->IndicesIncident = connectivity.GetIndices(logicalIndex); + this->CellShape = connectivity.GetCellShape(threadIndex1D); + } + + /// \brief The index of the thread or work invocation. + /// + /// This index refers to which instance of the worklet is being invoked. Every invocation of the + /// worklet has a unique thread index. This is also called the work index depending on the + /// context. + /// + VTKM_EXEC + vtkm::Id GetThreadIndex() const { return this->ThreadIndex; } + + /// \brief The logical index into the input domain. + /// + /// This is similar to \c GetIndex3D except the Vec size matches the actual + /// dimensions of the data. + /// + VTKM_EXEC + LogicalIndexType GetIndexLogical() const { return this->LogicalIndex; } + + /// \brief The index into the input domain. + /// + /// This index refers to the input element (array value, cell, etc.) that + /// this thread is being invoked for. This is the typical index used during + /// fetches. + /// + VTKM_EXEC + vtkm::Id GetInputIndex() const { return this->ThreadIndex; } + + /// \brief The 3D index into the input domain. + /// + /// Overloads the implementation in the base class to return the 3D index + /// for the input. + /// + VTKM_EXEC + vtkm::Id3 GetInputIndex3D() const { return detail::InflateTo3D(this->GetIndexLogical()); } + + /// \brief The index into the output domain. + /// + /// This index refers to the output element (array value, cell, etc.) that + /// this thread is creating. This is the typical index used during + /// Fetch::Store. + /// + VTKM_EXEC + vtkm::Id GetOutputIndex() const { return this->ThreadIndex; } + + /// \brief The visit index. + /// + /// When multiple output indices have the same input index, they are + /// distinguished using the visit index. + /// + VTKM_EXEC + vtkm::IdComponent GetVisitIndex() const { return 0; } + + /// \brief The indices of the incident elements. + /// + /// A topology map has "visited" and "incident" elements (e.g. points, cells, + /// etc). For each worklet invocation, there is exactly one visited element, + /// but there can be several incident elements. This method returns a + /// Vec-like object containing the indices to the incident elements. + /// + VTKM_EXEC + const IndicesIncidentType& GetIndicesIncident() const { return this->IndicesIncident; } + + /// \brief The input indices of the incident elements in pointer form. + /// + /// Returns the same object as GetIndicesIncident except that it returns a + /// pointer to the internally held object rather than a reference or copy. + /// Since the from indices can be a sizeable Vec (8 entries is common), it is + /// best not to have a bunch a copies. Thus, you can pass around a pointer + /// instead. However, care should be taken to make sure that this object does + /// not go out of scope, at which time the returned pointer becomes invalid. + /// + VTKM_EXEC + const IndicesIncidentType* GetIndicesIncidentPointer() const { return &this->IndicesIncident; } + + /// \brief The shape of the input cell. + /// + /// In topology maps that map from points to something, the indices make up + /// the structure of a cell. Although the shape tag is not technically and + /// index, it defines the meaning of the indices, so we put it here. (That + /// and this class is the only convenient place to store it.) + /// + VTKM_EXEC + CellShapeTag GetCellShape() const { return this->CellShape; } + +private: + vtkm::Id ThreadIndex; LogicalIndexType LogicalIndex; IndicesIncidentType IndicesIncident; CellShapeTag CellShape; }; -// Specialization for permuted structured connectivity types. + +/// \brief Specialization for permuted structured connectivity types. template class ThreadIndicesTopologyMap>> + PermutationPortal, + vtkm::exec::ConnectivityStructured>, + CustomScatterOrMaskTag> { using PermutedConnectivityType = vtkm::exec::ConnectivityPermutedVisitCellsWithPoints< PermutationPortal, diff --git a/vtkm/exec/arg/testing/UnitTestFetchArrayTopologyMapIn.cxx b/vtkm/exec/arg/testing/UnitTestFetchArrayTopologyMapIn.cxx index 1ab639ed1..db2d68364 100644 --- a/vtkm/exec/arg/testing/UnitTestFetchArrayTopologyMapIn.cxx +++ b/vtkm/exec/arg/testing/UnitTestFetchArrayTopologyMapIn.cxx @@ -87,7 +87,9 @@ struct FetchArrayTopologyMapInTests void TryInvocation(const Invocation& invocation) const { using ConnectivityType = typename Invocation::InputDomainType; - using ThreadIndicesType = vtkm::exec::arg::ThreadIndicesTopologyMap; + using ThreadIndicesType = + vtkm::exec::arg::ThreadIndicesTopologyMap; using FetchType = vtkm::exec::arg::Fetch; + using ThreadIndicesType = + vtkm::exec::arg::ThreadIndicesTopologyMap; vtkm::exec::arg::Fetch - VTKM_EXEC vtkm::exec::arg::ThreadIndicesTopologyMap GetThreadIndices( - vtkm::Id threadIndex, - const OutToInArrayType& outToIn, - const VisitArrayType& visit, - const ThreadToOutArrayType& threadToOut, - const InputDomainType& connectivity) const + VTKM_EXEC vtkm::exec::arg::ThreadIndicesTopologyMap + GetThreadIndices(vtkm::Id threadIndex, + const OutToInArrayType& outToIn, + const VisitArrayType& visit, + const ThreadToOutArrayType& threadToOut, + const InputDomainType& connectivity) const { const vtkm::Id outIndex = threadToOut.Get(threadIndex); - return vtkm::exec::arg::ThreadIndicesTopologyMap( + return vtkm::exec::arg::ThreadIndicesTopologyMap( threadIndex, outToIn.Get(outIndex), visit.Get(outIndex), outIndex, connectivity); } @@ -199,7 +201,10 @@ public: typename InputDomainType, bool S = IsScatterIdentity, bool M = IsMaskNone> - VTKM_EXEC EnableFnWhen> + VTKM_EXEC EnableFnWhen< + S && M, + vtkm::exec::arg::ThreadIndicesTopologyMap> GetThreadIndices(vtkm::Id threadIndex1D, const vtkm::Id3& threadIndex3D, const OutToInArrayType& vtkmNotUsed(outToIn), @@ -207,7 +212,8 @@ public: const ThreadToOutArrayType& vtkmNotUsed(threadToOut), const InputDomainType& connectivity) const { - return vtkm::exec::arg::ThreadIndicesTopologyMap( + return vtkm::exec::arg::ThreadIndicesTopologyMap( threadIndex3D, threadIndex1D, connectivity); } @@ -219,21 +225,26 @@ public: typename InputDomainType, bool S = IsScatterIdentity, bool M = IsMaskNone> - VTKM_EXEC EnableFnWhen> - GetThreadIndices(vtkm::Id threadIndex1D, - const vtkm::Id3& threadIndex3D, - const OutToInArrayType& outToIn, - const VisitArrayType& visit, - const ThreadToOutArrayType& threadToOut, - const InputDomainType& connectivity) const + VTKM_EXEC + EnableFnWhen> + GetThreadIndices(vtkm::Id threadIndex1D, + const vtkm::Id3& threadIndex3D, + const OutToInArrayType& outToIn, + const VisitArrayType& visit, + const ThreadToOutArrayType& threadToOut, + const InputDomainType& connectivity) const { const vtkm::Id outIndex = threadToOut.Get(threadIndex1D); - return vtkm::exec::arg::ThreadIndicesTopologyMap(threadIndex3D, - threadIndex1D, - outToIn.Get(outIndex), - visit.Get(outIndex), - outIndex, - connectivity); + return vtkm::exec::arg::ThreadIndicesTopologyMap( + threadIndex3D, + threadIndex1D, + outToIn.Get(outIndex), + visit.Get(outIndex), + outIndex, + connectivity); } }; diff --git a/vtkm/worklet/gradient/PointGradient.h b/vtkm/worklet/gradient/PointGradient.h index 8960c569c..033327bc9 100644 --- a/vtkm/worklet/gradient/PointGradient.h +++ b/vtkm/worklet/gradient/PointGradient.h @@ -50,7 +50,10 @@ struct PointGradient : public vtkm::worklet::WorkletVisitPointsWithCells const WholeFieldIn& inputField, GradientOutType& outputGradient) const { - using CellThreadIndices = vtkm::exec::arg::ThreadIndicesTopologyMap; + // Use optimized ThreadIndicesTopologyMap + using CellThreadIndices = + vtkm::exec::arg::ThreadIndicesTopologyMap; using ValueType = typename WholeFieldIn::ValueType; using CellShapeTag = typename CellSetInType::CellShapeTag;