diff --git a/vtkm/filter/scalar_topology/testing/UnitTestContourTreeUniformAugmentedFilter.cxx b/vtkm/filter/scalar_topology/testing/UnitTestContourTreeUniformAugmentedFilter.cxx index dc197bad2..6f4ce2218 100644 --- a/vtkm/filter/scalar_topology/testing/UnitTestContourTreeUniformAugmentedFilter.cxx +++ b/vtkm/filter/scalar_topology/testing/UnitTestContourTreeUniformAugmentedFilter.cxx @@ -50,12 +50,18 @@ // Oliver Ruebel (LBNL) //============================================================================== +#include #include -#include #include #include + +#ifdef VTKM_ENABLE_MPI +#include "TestingContourTreeUniformDistributedFilter.h" +#include +#endif + namespace { @@ -67,6 +73,258 @@ using vtkm::cont::testing::MakeTestDataSet; class TestContourTreeUniformAugmented { private: +#ifdef VTKM_ENABLE_MPI + void ShiftLogicalOriginToZero(vtkm::cont::PartitionedDataSet& pds) const + { + // Shift the logical origin (minimum of LocalPointIndexStart) to zero + // along each dimension + + // Compute minimum global point index start for all data sets on this MPI rank + std::vector minimumGlobalPointIndexStartThisRank; + using ds_const_iterator = vtkm::cont::PartitionedDataSet::const_iterator; + for (ds_const_iterator ds_it = pds.cbegin(); ds_it != pds.cend(); ++ds_it) + { + ds_it->GetCellSet().CastAndCallForTypes( + [&minimumGlobalPointIndexStartThisRank](const auto& css) { + minimumGlobalPointIndexStartThisRank.resize(css.Dimension, + std::numeric_limits::max()); + for (vtkm::IdComponent d = 0; d < css.Dimension; ++d) + { + minimumGlobalPointIndexStartThisRank[d] = + std::min(minimumGlobalPointIndexStartThisRank[d], css.GetGlobalPointIndexStart()[d]); + } + }); + } + + // Perform global reduction to find GlobalPointDimensions across all ranks + std::vector minimumGlobalPointIndexStart; + auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator(); + vtkmdiy::mpi::all_reduce(comm, + minimumGlobalPointIndexStartThisRank, + minimumGlobalPointIndexStart, + vtkmdiy::mpi::minimum{}); + + // Shift all cell sets so that minimum global point index start + // along each dimension is zero + using ds_iterator = vtkm::cont::PartitionedDataSet::iterator; + for (ds_iterator ds_it = pds.begin(); ds_it != pds.end(); ++ds_it) + { + // This does not work, i.e., it does not really change the cell set for the DataSet + ds_it->GetCellSet().CastAndCallForTypes( + [&minimumGlobalPointIndexStart, &ds_it](auto& css) { + auto pointIndexStart = css.GetGlobalPointIndexStart(); + typename std::remove_reference_t::SchedulingRangeType + shiftedPointIndexStart; + for (vtkm::IdComponent d = 0; d < css.Dimension; ++d) + { + shiftedPointIndexStart[d] = pointIndexStart[d] - minimumGlobalPointIndexStart[d]; + } + css.SetGlobalPointIndexStart(shiftedPointIndexStart); + // Why is the following necessary? Shouldn't it be sufficient to update the + // CellSet through the reference? + ds_it->SetCellSet(css); + }); + } + } + + void ComputeGlobalPointSize(vtkm::cont::PartitionedDataSet& pds) const + { + // Compute GlobalPointDimensions as maximum of GlobalPointIndexStart + PointDimensions + // for each dimension across all blocks + + // Compute GlobalPointDimensions for all data sets on this MPI rank + std::vector globalPointDimensionsThisRank; + using ds_const_iterator = vtkm::cont::PartitionedDataSet::const_iterator; + for (ds_const_iterator ds_it = pds.cbegin(); ds_it != pds.cend(); ++ds_it) + { + ds_it->GetCellSet().CastAndCallForTypes( + [&globalPointDimensionsThisRank](const auto& css) { + globalPointDimensionsThisRank.resize(css.Dimension, -1); + for (vtkm::IdComponent d = 0; d < css.Dimension; ++d) + { + globalPointDimensionsThisRank[d] = + std::max(globalPointDimensionsThisRank[d], + css.GetGlobalPointIndexStart()[d] + css.GetPointDimensions()[d]); + } + }); + } + + // Perform global reduction to find GlobalPointDimensions across all ranks + std::vector globalPointDimensions; + auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator(); + vtkmdiy::mpi::all_reduce(comm, + globalPointDimensionsThisRank, + globalPointDimensions, + vtkmdiy::mpi::maximum{}); + + // Set this information in all cell sets + using ds_iterator = vtkm::cont::PartitionedDataSet::iterator; + for (ds_iterator ds_it = pds.begin(); ds_it != pds.end(); ++ds_it) + { + // This does not work, i.e., it does not really change the cell set for the DataSet + ds_it->GetCellSet().CastAndCallForTypes( + [&globalPointDimensions, &ds_it](auto& css) { + typename std::remove_reference_t::SchedulingRangeType gpd; + for (vtkm::IdComponent d = 0; d < css.Dimension; ++d) + { + gpd[d] = globalPointDimensions[d]; + } + css.SetGlobalPointDimensions(gpd); + // Why is the following necessary? Shouldn't it be sufficient to update the + // CellSet through the reference? + ds_it->SetCellSet(css); + }); + } + } + + void GetPartitionedDataSet(const vtkm::cont::DataSet& ds, + const std::string& fieldName, + const int numberOfBlocks, + const int rank, + const int numberOfRanks, + vtkm::cont::PartitionedDataSet& pds) const + { + // Get dimensions of data set + vtkm::Id3 globalSize; + vtkm::cont::CastAndCall( + ds.GetCellSet(), vtkm::worklet::contourtree_augmented::GetPointDimensions(), globalSize); + + // Determine split + vtkm::Id3 blocksPerAxis = + vtkm::filter::testing::contourtree_uniform_distributed::ComputeNumberOfBlocksPerAxis( + globalSize, numberOfBlocks); + vtkm::Id blocksPerRank = numberOfBlocks / numberOfRanks; + vtkm::Id numRanksWithExtraBlock = numberOfBlocks % numberOfRanks; + vtkm::Id blocksOnThisRank, startBlockNo; + + if (rank < numRanksWithExtraBlock) + { + blocksOnThisRank = blocksPerRank + 1; + startBlockNo = (blocksPerRank + 1) * rank; + } + else + { + blocksOnThisRank = blocksPerRank; + startBlockNo = numRanksWithExtraBlock * (blocksPerRank + 1) + + (rank - numRanksWithExtraBlock) * blocksPerRank; + } + + // Created partitioned (split) data set + //vtkm::cont::PartitionedDataSet pds; + vtkm::cont::ArrayHandle localBlockIndices; + + localBlockIndices.Allocate(blocksOnThisRank); + + auto localBlockIndicesPortal = localBlockIndices.WritePortal(); + + for (vtkm::Id blockNo = 0; blockNo < blocksOnThisRank; ++blockNo) + { + vtkm::Id3 blockOrigin, blockSize, blockIndex; + std::tie(blockIndex, blockOrigin, blockSize) = + vtkm::filter::testing::contourtree_uniform_distributed::ComputeBlockExtents( + globalSize, blocksPerAxis, startBlockNo + blockNo); + pds.AppendPartition(vtkm::filter::testing::contourtree_uniform_distributed::CreateSubDataSet( + ds, blockOrigin, blockSize, fieldName)); + localBlockIndicesPortal.Set(blockNo, blockIndex); + } + } +#endif + + template + void analysis(vtkm::filter::scalar_topology::ContourTreeAugmented& filter, + bool dataFieldIsSorted, + const vtkm::cont::UnknownArrayHandle& arr, + const vtkm::Id& levels, + std::vector& isoValues) const + { + namespace caugmented_ns = vtkm::worklet::contourtree_augmented; + + DataValueType eps = 0.00001f; // Distance away from critical point + vtkm::Id numComp = levels + 1; // Number of components the tree should be simplified to + bool usePersistenceSorter = true; + + // Compute the branch decomposition + // Compute the volume for each hyperarc and superarc + caugmented_ns::IdArrayType superarcIntrinsicWeight; + caugmented_ns::IdArrayType superarcDependentWeight; + caugmented_ns::IdArrayType supernodeTransferWeight; + caugmented_ns::IdArrayType hyperarcDependentWeight; + + caugmented_ns::ProcessContourTree::ComputeVolumeWeightsSerial( + filter.GetContourTree(), + filter.GetNumIterations(), + superarcIntrinsicWeight, // (output) + superarcDependentWeight, // (output) + supernodeTransferWeight, // (output) + hyperarcDependentWeight); // (output) + + // Compute the branch decomposition by volume + caugmented_ns::IdArrayType whichBranch; + caugmented_ns::IdArrayType branchMinimum; + caugmented_ns::IdArrayType branchMaximum; + caugmented_ns::IdArrayType branchSaddle; + caugmented_ns::IdArrayType branchParent; + +#ifdef DEBUG + PrintArrayHandle(superarcIntrinsicWeight, "superarcIntrinsicWeight"); + PrintArrayHandle(superarcDependentWeight, "superarcDependentWeight"); + PrintArrayHandle(supernodeTransferWeight, "superarcDependentWeight"); + PrintArrayHandle(hyperarcDependentWeight, "hyperarcDependentWeight"); +#endif // DEBUG + + + caugmented_ns::ProcessContourTree::ComputeVolumeBranchDecompositionSerial( + filter.GetContourTree(), + superarcDependentWeight, + superarcIntrinsicWeight, + whichBranch, // (output) + branchMinimum, // (output) + branchMaximum, // (output) + branchSaddle, // (output) + branchParent); // (output) + + // Create explicit representation of the branch decompostion from the array representation + using ValueArray = vtkm::cont::ArrayHandle; + ValueArray dataField; + + arr.AsArrayHandle(dataField); + + using BranchType = + vtkm::worklet::contourtree_augmented::process_contourtree_inc::Branch; + + BranchType* branchDecompositionRoot = + caugmented_ns::ProcessContourTree::ComputeBranchDecomposition( + filter.GetContourTree().Superparents, + filter.GetContourTree().Supernodes, + whichBranch, + branchMinimum, + branchMaximum, + branchSaddle, + branchParent, + filter.GetSortOrder(), + dataField, + dataFieldIsSorted); + + // Simplify the contour tree of the branch decompostion + branchDecompositionRoot->SimplifyToSize(numComp, usePersistenceSorter); + + int contourType = 0; + + branchDecompositionRoot->GetRelevantValues(contourType, eps, isoValues); + + // Print the compute iso values + std::sort(isoValues.begin(), isoValues.end()); + + // Unique isovalues + auto it = std::unique(isoValues.begin(), isoValues.end()); + isoValues.resize(std::distance(isoValues.begin(), it)); + + if (branchDecompositionRoot) + { + delete branchDecompositionRoot; + } + } + // // Internal helper function to execute the contour tree and save repeat code in tests // @@ -437,6 +695,79 @@ public: "Wrong result for ContourTree filter"); } + void TestAnalysis() const + { + std::cout << "Testing ContourTree_Augmented With Analysis" << std::endl; + + using ValueType = vtkm::Float32; + vtkm::cont::DataSet ds = MakeTestDataSet().Make3DUniformDataSet1(); + + std::string fieldName = "pointvar"; + bool useMarchingCubes = false; + bool computeRegularStructure = true; + vtkm::filter::scalar_topology::ContourTreeAugmented filter(useMarchingCubes, + computeRegularStructure); + filter.SetActiveField(fieldName); + +#ifdef VTKM_ENABLE_MPI + vtkm::cont::PartitionedDataSet pds; + int mpiRank = 0; + int mpiSize = 1; + + MPI_Comm_size(MPI_COMM_WORLD, &mpiSize); + MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank); + GetPartitionedDataSet(ds, fieldName, mpiSize, mpiRank, mpiSize, pds); + ShiftLogicalOriginToZero(pds); + ComputeGlobalPointSize(pds); + auto result = filter.Execute(pds); +#else + filter.Execute(ds); +#endif + + // Compute the saddle peaks to make sure the contour tree is correct + vtkm::worklet::contourtree_augmented::EdgePairArray saddlePeak; + vtkm::worklet::contourtree_augmented::ProcessContourTree::CollectSortedSuperarcs( + filter.GetContourTree(), filter.GetSortOrder(), saddlePeak); + + // Print the contour tree we computed + std::cout << "Computed Contour Tree" << std::endl; + vtkm::worklet::contourtree_augmented::PrintEdgePairArrayColumnLayout(saddlePeak); + + // Do Analysis. + bool dataFieldIsSorted = false; + std::vector isoValues; + +#ifdef VTKM_ENABLE_MPI + if (mpiRank != 0) + return; + + if (mpiSize == 1) + { + analysis(filter, + dataFieldIsSorted, + pds.GetPartitions()[0].GetField(fieldName).GetData(), + 3, + isoValues); + } + else + { + dataFieldIsSorted = true; + analysis( + filter, dataFieldIsSorted, result.GetPartitions()[0].GetField(0).GetData(), 3, isoValues); + } +#else + analysis(filter, dataFieldIsSorted, ds.GetField(fieldName).GetData(), 3, isoValues); +#endif + + std::ostringstream os; + + os << "[" << isoValues[0] << "," << isoValues[1] << "," << isoValues[2] << "]"; + std::cout << "COMPUTED_ISOVALUES:" << os.str() << std::endl; + std::cout << "EXPECTED ISOVALUES:" + << "[40,75,87]" << std::endl; + VTKM_TEST_ASSERT(os.str() == "[40,75,87]"); + } + void operator()() const { // Test 2D Freudenthal with augmentation @@ -480,6 +811,9 @@ public: this->TestContourTree_Mesh3D_MarchingCubes_NonCubicExtents(0); // Make sure the contour tree does not change when we use boundary augmentation this->TestContourTree_Mesh3D_MarchingCubes_NonCubicExtents(2); + + // Test Analysis + this->TestAnalysis(); } }; } diff --git a/vtkm/filter/scalar_topology/worklet/contourtree_augmented/ProcessContourTree.h b/vtkm/filter/scalar_topology/worklet/contourtree_augmented/ProcessContourTree.h index 26a56e165..cc36eed87 100644 --- a/vtkm/filter/scalar_topology/worklet/contourtree_augmented/ProcessContourTree.h +++ b/vtkm/filter/scalar_topology/worklet/contourtree_augmented/ProcessContourTree.h @@ -436,7 +436,7 @@ public: vtkm::cont::ArrayHandle superarcList; vtkm::cont::ArrayCopy(vtkm::cont::ArrayHandleConstant(EdgePair(-1, -1), nSuperarcs), superarcList); - auto superarcListPortal = superarcList.WritePortal(); + auto superarcListWritePortal = superarcList.WritePortal(); vtkm::Id totalVolume = contourTree.Nodes.GetNumberOfValues(); #ifdef DEBUG_PRINT std::cout << "Total Volume: " << totalVolume << std::endl; @@ -449,8 +449,8 @@ public: { // per superarc if (IsAscending(superarcsPortal.Get(superarc))) { // ascending superarc - superarcListPortal.Set(superarc, - EdgePair(superarc, MaskedIndex(superarcsPortal.Get(superarc)))); + superarcListWritePortal.Set(superarc, + EdgePair(superarc, MaskedIndex(superarcsPortal.Get(superarc)))); upWeightPortal.Set(superarc, superarcDependentWeightPortal.Get(superarc)); // at the inner end, dependent weight is the total in the subtree. Then there are vertices along the edge itself (intrinsic weight), including the supernode at the outer end // So, to get the "dependent" weight in the other direction, we start with totalVolume - dependent, then subtract (intrinsic - 1) @@ -460,8 +460,8 @@ public: } // ascending superarc else { // descending superarc - superarcListPortal.Set(superarc, - EdgePair(MaskedIndex(superarcsPortal.Get(superarc)), superarc)); + superarcListWritePortal.Set(superarc, + EdgePair(MaskedIndex(superarcsPortal.Get(superarc)), superarc)); downWeightPortal.Set(superarc, superarcDependentWeightPortal.Get(superarc)); // at the inner end, dependent weight is the total in the subtree. Then there are vertices along the edge itself (intrinsic weight), including the supernode at the outer end // So, to get the "dependent" weight in the other direction, we start with totalVolume - dependent, then subtract (intrinsic - 1) @@ -493,17 +493,21 @@ public: superarcSorter, process_contourtree_inc_ns::SuperArcVolumetricComparator(upWeight, superarcList, false)); + // Initialize after in-place sort algorithm. (Kokkos) + auto superarcSorterReadPortal = superarcSorter.ReadPortal(); + // II B 2. Per segment, best superarc writes to the best upward array for (vtkm::Id superarc = 0; superarc < nSuperarcs; superarc++) { // per superarc - vtkm::Id superarcID = superarcSorterPortal.Get(superarc); - const EdgePair& edge = superarcListPortal.Get(superarcID); + vtkm::Id superarcID = superarcSorterReadPortal.Get(superarc); + const EdgePair& edge = superarcListWritePortal.Get(superarcID); // if it's the last one if (superarc == nSuperarcs - 1) bestDownwardPortal.Set(edge.second, edge.first); else { // not the last one - const EdgePair& nextEdge = superarcListPortal.Get(superarcSorterPortal.Get(superarc + 1)); + const EdgePair& nextEdge = + superarcListWritePortal.Get(superarcSorterReadPortal.Get(superarc + 1)); // if the next edge belongs to another, we're the highest if (nextEdge.second != edge.second) bestDownwardPortal.Set(edge.second, edge.first); @@ -515,17 +519,21 @@ public: superarcSorter, process_contourtree_inc_ns::SuperArcVolumetricComparator(downWeight, superarcList, true)); + // Re-initialize after in-place sort algorithm. (Kokkos) + superarcSorterReadPortal = superarcSorter.ReadPortal(); + // II B 2. Per segment, best superarc writes to the best upward array for (vtkm::Id superarc = 0; superarc < nSuperarcs; superarc++) { // per superarc - vtkm::Id superarcID = superarcSorterPortal.Get(superarc); - const EdgePair& edge = superarcListPortal.Get(superarcID); + vtkm::Id superarcID = superarcSorterReadPortal.Get(superarc); + const EdgePair& edge = superarcListWritePortal.Get(superarcID); // if it's the last one if (superarc == nSuperarcs - 1) bestUpwardPortal.Set(edge.first, edge.second); else { // not the last one - const EdgePair& nextEdge = superarcListPortal.Get(superarcSorterPortal.Get(superarc + 1)); + const EdgePair& nextEdge = + superarcListWritePortal.Get(superarcSorterReadPortal.Get(superarc + 1)); // if the next edge belongs to another, we're the highest if (nextEdge.first != edge.first) bestUpwardPortal.Set(edge.first, edge.second);