diff --git a/docs/changelog/statistics-filter.md b/docs/changelog/statistics-filter.md index d20cfa74d..0601c5045 100644 --- a/docs/changelog/statistics-filter.md +++ b/docs/changelog/statistics-filter.md @@ -1,3 +1,4 @@ # New Statistics filter -The statistics filter computes the descriptive statistics of the fields specified by users based on `DescriptiveStatistics`. Users can set `RequiredStatsList` to specify which statistics will be stored in the output data set. +The statistics filter computes the descriptive statistics of the fields specified by users based on `DescriptiveStatistics`. Users can set `RequiredStatsList` to specify which statistics will be stored in the output data set. The statistics filter supports the distributed memory case based on the vtkmdiy, and the process with rank 0 will return the correct final reduced results. + diff --git a/vtkm/filter/density_estimate/Statistics.cxx b/vtkm/filter/density_estimate/Statistics.cxx index 2b9a459f8..4d1c61128 100644 --- a/vtkm/filter/density_estimate/Statistics.cxx +++ b/vtkm/filter/density_estimate/Statistics.cxx @@ -7,9 +7,15 @@ // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // PURPOSE. See the above copyright notice for more information. //============================================================================ +#include #include #include +#include #include +#ifdef VTKM_ENABLE_MPI +#include +#include +#endif namespace vtkm { @@ -17,98 +23,193 @@ namespace filter { namespace density_estimate { +//refer to this paper https://www.osti.gov/servlets/purl/1028931 +//for the math of computing distributed statistics +//using anonymous namespace +namespace +{ +using StatValueType = vtkm::worklet::DescriptiveStatistics::StatState; +class DistributedStatistics +{ + vtkm::cont::ArrayHandle localStatisticsValues; + +public: + DistributedStatistics(vtkm::Id numLocalBlocks) + { + this->localStatisticsValues.Allocate(numLocalBlocks); + } + + void SetLocalStatistics(vtkm::Id index, StatValueType& value) + { + this->localStatisticsValues.WritePortal().Set(index, value); + } + + StatValueType ReduceStatisticsDiy() const + { + using Algorithm = vtkm::cont::Algorithm; + // The StatValueType struct overloads the + operator. Reduce is using to properly + // combine statistical measures such as mean, standard deviation, and others. So, + // the Reduce is computing the global statistics over partitions rather than a + // simple sum. + StatValueType statePerRank = Algorithm::Reduce(this->localStatisticsValues, StatValueType{}); + StatValueType stateResult = statePerRank; +#ifdef VTKM_ENABLE_MPI + auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator(); + if (comm.size() == 1) + { + return statePerRank; + } + + vtkmdiy::Master master( + comm, + 1, + -1, + []() -> void* { return new StatValueType(); }, + [](void* ptr) { delete static_cast(ptr); }); + + vtkmdiy::ContiguousAssigner assigner(/*num ranks*/ comm.size(), + /*global-num-blocks*/ comm.size()); + vtkmdiy::RegularDecomposer decomposer( + /*dims*/ 1, vtkmdiy::interval(0, assigner.nblocks() - 1), assigner.nblocks()); + decomposer.decompose(comm.rank(), assigner, master); + VTKM_ASSERT(static_cast(master.size()) == 1); + + //adding data into master + *master.block(0) = statePerRank; + auto callback = [](StatValueType* result, + const vtkmdiy::ReduceProxy& srp, + const vtkmdiy::RegularMergePartners&) { + const auto selfid = srp.gid(); + // 1. dequeue. + std::vector incoming; + srp.incoming(incoming); + for (const int gid : incoming) + { + if (gid != selfid) + { + StatValueType inData; + srp.dequeue(gid, inData); + *result = *result + inData; + } + } + // 2. enqueue + for (int cc = 0; cc < srp.out_link().size(); ++cc) + { + auto target = srp.out_link().target(cc); + if (target.gid != selfid) + { + srp.enqueue(target, *result); + } + } + }; + + vtkmdiy::RegularMergePartners partners(decomposer, /*k=*/2); + vtkmdiy::reduce(master, assigner, partners, callback); + + //only rank 0 process returns the correct results + if (master.local(0)) + { + stateResult = *master.block(0); + } + else + { + stateResult = StatValueType(); + } +#endif + return stateResult; + } +}; +} + +vtkm::FloatDefault ExtractVariable(vtkm::cont::DataSet dataset, const std::string& statName) +{ + vtkm::cont::ArrayHandle array; + dataset.GetField(statName).GetData().AsArrayHandle(array); + vtkm::cont::ArrayHandle::ReadPortalType portal = array.ReadPortal(); + vtkm::FloatDefault value = portal.Get(0); + return value; +} + +template +VTKM_CONT vtkm::cont::ArrayHandle SaveDataIntoArray(const T value) +{ + vtkm::cont::ArrayHandle stat; + stat.Allocate(1); + stat.WritePortal().Set(0, static_cast(value)); + return stat; +} + +VTKM_CONT StatValueType GetStatValueFromDataSet(const vtkm::cont::DataSet& data) +{ + vtkm::FloatDefault N = ExtractVariable(data, "N"); + vtkm::FloatDefault Min = ExtractVariable(data, "Min"); + vtkm::FloatDefault Max = ExtractVariable(data, "Max"); + vtkm::FloatDefault Sum = ExtractVariable(data, "Sum"); + vtkm::FloatDefault Mean = ExtractVariable(data, "Mean"); + vtkm::FloatDefault M2 = ExtractVariable(data, "M2"); + vtkm::FloatDefault M3 = ExtractVariable(data, "M3"); + vtkm::FloatDefault M4 = ExtractVariable(data, "M4"); + return StatValueType(N, Min, Max, Sum, Mean, M2, M3, M4); +} + +template +VTKM_CONT void SaveIntoDataSet(StatValueType& statValue, + DataSetType& output, + vtkm::cont::Field::Association association) +{ + output.AddField({ "N", association, SaveDataIntoArray(statValue.N()) }); + output.AddField({ "Min", association, SaveDataIntoArray(statValue.Min()) }); + output.AddField({ "Max", association, SaveDataIntoArray(statValue.Max()) }); + output.AddField({ "Sum", association, SaveDataIntoArray(statValue.Sum()) }); + output.AddField({ "Mean", association, SaveDataIntoArray(statValue.Mean()) }); + output.AddField({ "M2", association, SaveDataIntoArray(statValue.M2()) }); + output.AddField({ "M3", association, SaveDataIntoArray(statValue.M3()) }); + output.AddField({ "M4", association, SaveDataIntoArray(statValue.M4()) }); + output.AddField({ "SampleStddev", association, SaveDataIntoArray(statValue.SampleStddev()) }); + output.AddField( + { "PopulationStddev", association, SaveDataIntoArray(statValue.PopulationStddev()) }); + output.AddField({ "SampleVariance", association, SaveDataIntoArray(statValue.SampleVariance()) }); + output.AddField( + { "PopulationVariance", association, SaveDataIntoArray(statValue.PopulationVariance()) }); + output.AddField({ "Skewness", association, SaveDataIntoArray(statValue.Skewness()) }); + output.AddField({ "Kurtosis", association, SaveDataIntoArray(statValue.Kurtosis()) }); +} VTKM_CONT vtkm::cont::DataSet Statistics::DoExecute(const vtkm::cont::DataSet& inData) { vtkm::worklet::DescriptiveStatistics worklet; vtkm::cont::DataSet output; - - auto resolveType = [&](const auto& concrete) { - auto result = worklet.Run(concrete); - - for (size_t i = 0; i < RequiredStatsList.size(); i++) - { - vtkm::cont::ArrayHandle stat; - stat.Allocate(1); - Stats statEnum = RequiredStatsList[i]; - - switch (statEnum) - { - case Stats::N: - { - stat.WritePortal().Set(0, static_cast(result.N())); - break; - } - case Stats::Min: - { - stat.WritePortal().Set(0, static_cast(result.Min())); - break; - } - case Stats::Max: - { - stat.WritePortal().Set(0, static_cast(result.Max())); - break; - } - case Stats::Sum: - { - stat.WritePortal().Set(0, static_cast(result.Sum())); - break; - } - case Stats::Mean: - { - stat.WritePortal().Set(0, static_cast(result.Mean())); - break; - } - case Stats::SampleStdDev: - { - stat.WritePortal().Set(0, static_cast(result.SampleStddev())); - break; - } - case Stats::PopulationStdDev: - { - stat.WritePortal().Set(0, static_cast(result.PopulationStddev())); - break; - } - case Stats::SampleVariance: - { - stat.WritePortal().Set(0, static_cast(result.SampleVariance())); - break; - } - case Stats::PopulationVariance: - { - stat.WritePortal().Set(0, static_cast(result.PopulationVariance())); - break; - } - case Stats::Skewness: - { - stat.WritePortal().Set(0, static_cast(result.Skewness())); - break; - } - case Stats::Kurtosis: - { - stat.WritePortal().Set(0, static_cast(result.Kurtosis())); - break; - } - default: - { - throw vtkm::cont::ErrorFilterExecution( - "Unsupported statistics variable in statistics filter."); - } - } - - output.AddField({ this->StatsName[static_cast(statEnum)], - vtkm::cont::Field::Association::WholeDataSet, - stat }); - } - }; - const auto& fieldArray = this->GetFieldFromDataSet(inData).GetData(); - fieldArray - .CastAndCallForTypesWithFloatFallback( - resolveType); - + vtkm::cont::ArrayHandle input; + //TODO: GetFieldFromDataSet will throw an exception if the targeted Field does not exist in the data set + ArrayCopyShallowIfPossible(this->GetFieldFromDataSet(inData).GetData(), input); + StatValueType result = worklet.Run(input); + SaveIntoDataSet( + result, output, vtkm::cont::Field::Association::WholeDataSet); return output; } +VTKM_CONT vtkm::cont::PartitionedDataSet Statistics::DoExecutePartitions( + const vtkm::cont::PartitionedDataSet& input) +{ + // This operation will create a partitioned data set with a partition matching each input partition + // containing the local statistics. It will iterate through each partition in the input and call the + // DoExecute function. This is the same behavior as if we did not implement `DoExecutePartitions`. + // It has the added benefit of optimizations for concurrently executing small blocks. + vtkm::cont::PartitionedDataSet localOutput = this->FilterField::DoExecutePartitions(input); + vtkm::Id numPartitions = input.GetNumberOfPartitions(); + DistributedStatistics helper(numPartitions); + for (vtkm::Id i = 0; i < numPartitions; ++i) + { + const vtkm::cont::DataSet& localDS = localOutput.GetPartition(i); + StatValueType localStatisticsValues = GetStatValueFromDataSet(localDS); + helper.SetLocalStatistics(i, localStatisticsValues); + } + StatValueType result = helper.ReduceStatisticsDiy(); + vtkm::cont::PartitionedDataSet output; + SaveIntoDataSet( + result, output, vtkm::cont::Field::Association::Global); + return output; +} } // namespace density_estimate } // namespace filter } // namespace vtkm diff --git a/vtkm/filter/density_estimate/Statistics.h b/vtkm/filter/density_estimate/Statistics.h index 769fcb00f..a10afeaac 100644 --- a/vtkm/filter/density_estimate/Statistics.h +++ b/vtkm/filter/density_estimate/Statistics.h @@ -25,54 +25,10 @@ namespace density_estimate /// class VTKM_FILTER_DENSITY_ESTIMATE_EXPORT Statistics : public vtkm::filter::FilterField { -public: - enum struct Stats - { - N = 0, - Min, - Max, - Sum, - Mean, - SampleStdDev, - PopulationStdDev, - SampleVariance, - PopulationVariance, - Skewness, - Kurtosis - }; - - - /// \{ - /// \brief The output statistical variables for executing the statistics filter. - /// - void SetRequiredStats(const std::vector StatsList) { RequiredStatsList = StatsList; } - const std::vector& GetRequiredStats() const { return this->RequiredStatsList; } - /// \} private: VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& input) override; - std::vector RequiredStatsList{ Stats::N, - Stats::Min, - Stats::Max, - Stats::Sum, - Stats::Mean, - Stats::SampleStdDev, - Stats::PopulationStdDev, - Stats::SampleVariance, - Stats::PopulationVariance, - Stats::Skewness, - Stats::Kurtosis }; - // This string vector stores variables names stored in the output dataset - std::vector StatsName{ "N", - "Min", - "Max", - "Sum", - "Mean", - "SampleStddev", - "PopulationStdDev", - "SampleVariance", - "PopulationVariance", - "Skewness", - "Kurtosis" }; + VTKM_CONT vtkm::cont::PartitionedDataSet DoExecutePartitions( + const vtkm::cont::PartitionedDataSet& inData) override; }; } // namespace density_estimate } // namespace filter diff --git a/vtkm/filter/density_estimate/testing/CMakeLists.txt b/vtkm/filter/density_estimate/testing/CMakeLists.txt index d36470d03..21b8ce4d9 100644 --- a/vtkm/filter/density_estimate/testing/CMakeLists.txt +++ b/vtkm/filter/density_estimate/testing/CMakeLists.txt @@ -15,6 +15,7 @@ set(unit_tests UnitTestNDHistogramFilter.cxx UnitTestPartitionedDataSetHistogramFilter.cxx UnitTestStatisticsFilter.cxx + UnitTestStatisticsFilterMPI.cxx ) set(unit_tests_device @@ -31,3 +32,14 @@ vtkm_unit_tests( LIBRARIES ${libraries} USE_VTKM_JOB_POOL ) + +if (VTKm_ENABLE_MPI) + set(mpi_unit_tests + UnitTestStatisticsFilterMPI.cxx + ) + vtkm_unit_tests( + MPI + DEVICE_SOURCES ${mpi_unit_tests} + USE_VTKM_JOB_POOL + ) +endif() diff --git a/vtkm/filter/density_estimate/testing/UnitTestStatisticsFilter.cxx b/vtkm/filter/density_estimate/testing/UnitTestStatisticsFilter.cxx index 15dade562..8a5fa557b 100644 --- a/vtkm/filter/density_estimate/testing/UnitTestStatisticsFilter.cxx +++ b/vtkm/filter/density_estimate/testing/UnitTestStatisticsFilter.cxx @@ -8,16 +8,16 @@ // PURPOSE. See the above copyright notice for more information. //============================================================================ -#include - +#include #include #include - +#include +#include namespace { - -vtkm::FloatDefault getStatsFromArray(vtkm::cont::DataSet dataset, const std::string statName) +template +vtkm::FloatDefault getStatsFromDataSet(const DataSetType& dataset, const std::string statName) { vtkm::cont::ArrayHandle array; dataset.GetField(statName).GetData().AsArrayHandle(array); @@ -30,56 +30,127 @@ void TestStatisticsPartial() { vtkm::cont::DataSet dataSet; constexpr vtkm::FloatDefault N = 1000; - // the number is from 0 to 999 - auto scalaArray = + auto scalarArrayCounting = vtkm::cont::ArrayHandleCounting(0.0f, 1.0f, static_cast(N)); - dataSet.AddPointField("scalarField", scalaArray); + vtkm::cont::ArrayHandle scalarArray; + vtkm::cont::ArrayCopy(scalarArrayCounting, scalarArray); + dataSet.AddPointField("scalarField", scalarArray); using STATS = vtkm::filter::density_estimate::Statistics; STATS statisticsFilter; - - //set required states - std::vector RequiredStatsList{ STATS::Stats::N, STATS::Stats::Sum, - STATS::Stats::Mean, STATS::Stats::SampleVariance, - STATS::Stats::Skewness, STATS::Stats::Kurtosis }; - - //default RequiredStatsList contains all statistics variables - statisticsFilter.SetRequiredStats(RequiredStatsList); using AsscoType = vtkm::cont::Field::Association; - statisticsFilter.SetActiveField("scalarField", AsscoType::Points); - - // We use the same test cases with the UnitTestDescriptiveStatistics.h vtkm::cont::DataSet resultDataSet = statisticsFilter.Execute(dataSet); - vtkm::FloatDefault NValueFromFilter = getStatsFromArray(resultDataSet, "N"); + vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(resultDataSet, "N"); VTKM_TEST_ASSERT(test_equal(NValueFromFilter, N)); - vtkm::FloatDefault SumFromFilter = getStatsFromArray(resultDataSet, "Sum"); + vtkm::FloatDefault MinValueFromFilter = getStatsFromDataSet(resultDataSet, "Min"); + VTKM_TEST_ASSERT(test_equal(MinValueFromFilter, 0)); + + vtkm::FloatDefault MaxValueFromFilter = getStatsFromDataSet(resultDataSet, "Max"); + VTKM_TEST_ASSERT(test_equal(MaxValueFromFilter, N - 1)); + + vtkm::FloatDefault SumFromFilter = getStatsFromDataSet(resultDataSet, "Sum"); VTKM_TEST_ASSERT(test_equal(SumFromFilter, N * (N - 1) / 2)); - vtkm::FloatDefault MeanFromFilter = getStatsFromArray(resultDataSet, "Mean"); + vtkm::FloatDefault MeanFromFilter = getStatsFromDataSet(resultDataSet, "Mean"); VTKM_TEST_ASSERT(test_equal(MeanFromFilter, (N - 1) / 2)); - vtkm::FloatDefault SVFromFilter = getStatsFromArray(resultDataSet, "SampleVariance"); + vtkm::FloatDefault SVFromFilter = getStatsFromDataSet(resultDataSet, "SampleVariance"); VTKM_TEST_ASSERT(test_equal(SVFromFilter, 83416.66)); - vtkm::FloatDefault SkewnessFromFilter = getStatsFromArray(resultDataSet, "Skewness"); + vtkm::FloatDefault SstddevFromFilter = getStatsFromDataSet(resultDataSet, "SampleStddev"); + VTKM_TEST_ASSERT(test_equal(SstddevFromFilter, 288.819)); + + vtkm::FloatDefault SkewnessFromFilter = getStatsFromDataSet(resultDataSet, "Skewness"); VTKM_TEST_ASSERT(test_equal(SkewnessFromFilter, 0)); // we use fisher=False when computing the Kurtosis value - vtkm::FloatDefault KurtosisFromFilter = getStatsFromArray(resultDataSet, "Kurtosis"); + vtkm::FloatDefault KurtosisFromFilter = getStatsFromDataSet(resultDataSet, "Kurtosis"); VTKM_TEST_ASSERT(test_equal(KurtosisFromFilter, 1.8)); + + vtkm::FloatDefault PopulationStddev = getStatsFromDataSet(resultDataSet, "PopulationStddev"); + VTKM_TEST_ASSERT(test_equal(PopulationStddev, 288.675)); + + vtkm::FloatDefault PopulationVariance = getStatsFromDataSet(resultDataSet, "PopulationVariance"); + VTKM_TEST_ASSERT(test_equal(PopulationVariance, 83333.3)); } +void TestStatisticsPartition() +{ + std::vector dataSetList; + constexpr vtkm::FloatDefault N = 1000; + + for (vtkm::Id i = 0; i < 10; i++) + { + vtkm::cont::DataSet dataSet; + constexpr vtkm::FloatDefault localN = N / 10; + vtkm::cont::ArrayHandle scalarArray; + scalarArray.Allocate(static_cast(localN)); + auto writePortal = scalarArray.WritePortal(); + for (vtkm::Id j = 0; j < static_cast(localN); j++) + { + writePortal.Set(j, static_cast(i * localN + j)); + } + dataSet.AddPointField("scalarField", scalarArray); + dataSetList.push_back(dataSet); + } + + //adding data sets for testing edge cases + vtkm::cont::DataSet dataSetEmptyField; + dataSetEmptyField.AddPointField("scalarField", vtkm::cont::ArrayHandle()); + dataSetList.push_back(dataSetEmptyField); + + vtkm::cont::PartitionedDataSet pds(dataSetList); + using STATS = vtkm::filter::density_estimate::Statistics; + STATS statisticsFilter; + using AsscoType = vtkm::cont::Field::Association; + statisticsFilter.SetActiveField("scalarField", AsscoType::Points); + vtkm::cont::PartitionedDataSet outputPDS = statisticsFilter.Execute(pds); + + vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N"); + VTKM_TEST_ASSERT(test_equal(NValueFromFilter, N)); + + vtkm::FloatDefault MinValueFromFilter = getStatsFromDataSet(outputPDS, "Min"); + VTKM_TEST_ASSERT(test_equal(MinValueFromFilter, 0)); + + vtkm::FloatDefault MaxValueFromFilter = getStatsFromDataSet(outputPDS, "Max"); + VTKM_TEST_ASSERT(test_equal(MaxValueFromFilter, N - 1)); + + vtkm::FloatDefault SumFromFilter = getStatsFromDataSet(outputPDS, "Sum"); + VTKM_TEST_ASSERT(test_equal(SumFromFilter, N * (N - 1) / 2)); + + vtkm::FloatDefault MeanFromFilter = getStatsFromDataSet(outputPDS, "Mean"); + VTKM_TEST_ASSERT(test_equal(MeanFromFilter, (N - 1) / 2)); + + vtkm::FloatDefault SVFromFilter = getStatsFromDataSet(outputPDS, "SampleVariance"); + VTKM_TEST_ASSERT(test_equal(SVFromFilter, 83416.66)); + + vtkm::FloatDefault SstddevFromFilter = getStatsFromDataSet(outputPDS, "SampleStddev"); + VTKM_TEST_ASSERT(test_equal(SstddevFromFilter, 288.819)); + + vtkm::FloatDefault SkewnessFromFilter = getStatsFromDataSet(outputPDS, "Skewness"); + VTKM_TEST_ASSERT(test_equal(SkewnessFromFilter, 0)); + + // we use fisher=False when computing the Kurtosis value + vtkm::FloatDefault KurtosisFromFilter = getStatsFromDataSet(outputPDS, "Kurtosis"); + VTKM_TEST_ASSERT(test_equal(KurtosisFromFilter, 1.8)); + + vtkm::FloatDefault PopulationStddev = getStatsFromDataSet(outputPDS, "PopulationStddev"); + VTKM_TEST_ASSERT(test_equal(PopulationStddev, 288.675)); + + vtkm::FloatDefault PopulationVariance = getStatsFromDataSet(outputPDS, "PopulationVariance"); + VTKM_TEST_ASSERT(test_equal(PopulationVariance, 83333.3)); +} void TestStatistics() { TestStatisticsPartial(); + TestStatisticsPartition(); } // TestFieldStatistics } -//More deatiled tests can be found in the UnitTestStatisticsFilter int UnitTestStatisticsFilter(int argc, char* argv[]) { return vtkm::cont::testing::Testing::Run(TestStatistics, argc, argv); diff --git a/vtkm/filter/density_estimate/testing/UnitTestStatisticsFilterMPI.cxx b/vtkm/filter/density_estimate/testing/UnitTestStatisticsFilterMPI.cxx new file mode 100644 index 000000000..414c24f84 --- /dev/null +++ b/vtkm/filter/density_estimate/testing/UnitTestStatisticsFilterMPI.cxx @@ -0,0 +1,267 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +//============================================================================ + +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +vtkm::FloatDefault getStatsFromDataSet(const vtkm::cont::PartitionedDataSet& dataset, + const std::string statName) +{ + vtkm::cont::ArrayHandle array; + dataset.GetField(statName).GetData().AsArrayHandle(array); + vtkm::cont::ArrayHandle::ReadPortalType portal = array.ReadPortal(); + vtkm::FloatDefault value = portal.Get(0); + return value; +} + +void checkResulst(const vtkm::cont::PartitionedDataSet& outputPDS, vtkm::FloatDefault N) +{ + vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N"); + VTKM_TEST_ASSERT(test_equal(NValueFromFilter, N)); + + vtkm::FloatDefault MinValueFromFilter = getStatsFromDataSet(outputPDS, "Min"); + VTKM_TEST_ASSERT(test_equal(MinValueFromFilter, 0)); + + vtkm::FloatDefault MaxValueFromFilter = getStatsFromDataSet(outputPDS, "Max"); + VTKM_TEST_ASSERT(test_equal(MaxValueFromFilter, N - 1)); + + vtkm::FloatDefault SumFromFilter = getStatsFromDataSet(outputPDS, "Sum"); + VTKM_TEST_ASSERT(test_equal(SumFromFilter, N * (N - 1) / 2)); + + vtkm::FloatDefault MeanFromFilter = getStatsFromDataSet(outputPDS, "Mean"); + VTKM_TEST_ASSERT(test_equal(MeanFromFilter, (N - 1) / 2)); + + vtkm::FloatDefault SVFromFilter = getStatsFromDataSet(outputPDS, "SampleVariance"); + VTKM_TEST_ASSERT(test_equal(SVFromFilter, 83416.66)); + + vtkm::FloatDefault SstddevFromFilter = getStatsFromDataSet(outputPDS, "SampleStddev"); + VTKM_TEST_ASSERT(test_equal(SstddevFromFilter, 288.819)); + + vtkm::FloatDefault SkewnessFromFilter = getStatsFromDataSet(outputPDS, "Skewness"); + VTKM_TEST_ASSERT(test_equal(SkewnessFromFilter, 0)); + + // we use fisher=False when computing the Kurtosis value + vtkm::FloatDefault KurtosisFromFilter = getStatsFromDataSet(outputPDS, "Kurtosis"); + VTKM_TEST_ASSERT(test_equal(KurtosisFromFilter, 1.8)); + + vtkm::FloatDefault PopulationStddev = getStatsFromDataSet(outputPDS, "PopulationStddev"); + VTKM_TEST_ASSERT(test_equal(PopulationStddev, 288.675)); + + vtkm::FloatDefault PopulationVariance = getStatsFromDataSet(outputPDS, "PopulationVariance"); + VTKM_TEST_ASSERT(test_equal(PopulationVariance, 83333.3)); +} + +void TestStatisticsMPISingleDataSet() +{ + auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator(); + + constexpr vtkm::FloatDefault N = 1000; + vtkm::Id numProcs = comm.size(); + + vtkm::Id workloadBase = static_cast(N / numProcs); + vtkm::Id workloadActual = workloadBase; + if (static_cast(N) % numProcs != 0) + { + //updating the workload for last one + if (comm.rank() == numProcs - 1) + { + workloadActual = workloadActual + (static_cast(N) % numProcs); + } + } + + vtkm::cont::ArrayHandle scalarArray; + scalarArray.Allocate(static_cast(workloadActual)); + auto writePortal = scalarArray.WritePortal(); + for (vtkm::Id i = 0; i < static_cast(workloadActual); i++) + { + writePortal.Set(i, static_cast(workloadBase * comm.rank() + i)); + } + + vtkm::cont::DataSet dataSet; + dataSet.AddPointField("scalarField", scalarArray); + + using STATS = vtkm::filter::density_estimate::Statistics; + STATS statisticsFilter; + + using AsscoType = vtkm::cont::Field::Association; + statisticsFilter.SetActiveField("scalarField", AsscoType::Points); + std::vector dataSetList; + dataSetList.push_back(dataSet); + auto pds = vtkm::cont::PartitionedDataSet(dataSetList); + vtkm::cont::PartitionedDataSet outputPDS = statisticsFilter.Execute(pds); + + if (comm.rank() == 0) + { + checkResulst(outputPDS, N); + } + else + { + vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N"); + VTKM_TEST_ASSERT(test_equal(NValueFromFilter, 0)); + } +} + +void TestStatisticsMPIPartitionDataSets() +{ + auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator(); + constexpr vtkm::FloatDefault N = 1000; + vtkm::Id numProcs = comm.size(); + + vtkm::Id workloadPerRankBase = static_cast(N / numProcs); + vtkm::Id workloadPerRankActual = workloadPerRankBase; + + if (static_cast(N) % numProcs != 0) + { + //updating the workload for last one + if (comm.rank() == numProcs - 1) + { + workloadPerRankActual = workloadPerRankActual + (static_cast(N) % numProcs); + } + } + + vtkm::Id numPartitions = 2; + vtkm::Id workloadPerPartition0 = workloadPerRankActual / numPartitions; + vtkm::Id workloadPerPartition1 = workloadPerRankActual - workloadPerPartition0; + + vtkm::Id offsetRank = workloadPerRankBase * comm.rank(); + std::vector dataSetList; + + vtkm::cont::ArrayHandle scalarArray0; + scalarArray0.Allocate(static_cast(workloadPerPartition0)); + auto writePortal0 = scalarArray0.WritePortal(); + vtkm::cont::DataSet dataSet0; + + for (vtkm::Id i = 0; i < workloadPerPartition0; i++) + { + writePortal0.Set(i, static_cast(offsetRank + i)); + } + + dataSet0.AddPointField("scalarField", scalarArray0); + dataSetList.push_back(dataSet0); + + vtkm::cont::ArrayHandle scalarArray1; + scalarArray1.Allocate(static_cast(workloadPerPartition1)); + auto writePortal1 = scalarArray1.WritePortal(); + vtkm::cont::DataSet dataSet1; + + for (vtkm::Id i = 0; i < workloadPerPartition1; i++) + { + writePortal1.Set(i, static_cast(offsetRank + workloadPerPartition0 + i)); + } + + dataSet1.AddPointField("scalarField", scalarArray1); + dataSetList.push_back(dataSet1); + + auto pds = vtkm::cont::PartitionedDataSet(dataSetList); + + using STATS = vtkm::filter::density_estimate::Statistics; + STATS statisticsFilter; + using AsscoType = vtkm::cont::Field::Association; + statisticsFilter.SetActiveField("scalarField", AsscoType::Points); + + vtkm::cont::PartitionedDataSet outputPDS = statisticsFilter.Execute(pds); + if (comm.rank() == 0) + { + checkResulst(outputPDS, N); + } + else + { + vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N"); + VTKM_TEST_ASSERT(test_equal(NValueFromFilter, 0)); + } +} + +void TestStatisticsMPIDataSetEmpty() +{ + auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator(); + constexpr vtkm::FloatDefault N = 1000; + vtkm::Id numProcs = comm.size(); + vtkm::Id numEmptyBlock = 1; + vtkm::Id numProcsWithWork = numProcs; + if (numProcs > 1) + { + numProcsWithWork = numProcsWithWork - numEmptyBlock; + } + + vtkm::Id workloadBase = static_cast(N / (numProcsWithWork)); + vtkm::Id workloadActual = workloadBase; + if (static_cast(N) % numProcsWithWork != 0) + { + //updating the workload for last one + if (comm.rank() == numProcsWithWork - 1) + { + workloadActual = workloadActual + (static_cast(N) % numProcsWithWork); + } + } + + vtkm::cont::DataSet dataSet; + vtkm::cont::ArrayHandle scalarArray; + //for the proc with actual work + if (comm.rank() != numProcs - 1) + { + scalarArray.Allocate(static_cast(workloadActual)); + auto writePortal = scalarArray.WritePortal(); + for (vtkm::Id i = 0; i < static_cast(workloadActual); i++) + { + writePortal.Set(i, static_cast(workloadBase * comm.rank() + i)); + } + } + dataSet.AddPointField("scalarField", scalarArray); + + using STATS = vtkm::filter::density_estimate::Statistics; + STATS statisticsFilter; + + using AsscoType = vtkm::cont::Field::Association; + statisticsFilter.SetActiveField("scalarField", AsscoType::Points); + std::vector dataSetList; + dataSetList.push_back(dataSet); + auto pds = vtkm::cont::PartitionedDataSet(dataSetList); + vtkm::cont::PartitionedDataSet outputPDS = statisticsFilter.Execute(pds); + + if (comm.size() == 1) + { + vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N"); + VTKM_TEST_ASSERT(test_equal(NValueFromFilter, 0)); + return; + } + + if (comm.rank() == 0) + { + checkResulst(outputPDS, N); + } + else + { + vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N"); + VTKM_TEST_ASSERT(test_equal(NValueFromFilter, 0)); + } +} + +void TestStatistics() +{ + TestStatisticsMPISingleDataSet(); + TestStatisticsMPIPartitionDataSets(); + TestStatisticsMPIDataSetEmpty(); +} // TestFieldStatistics +} + +//More deatiled tests can be found in the UnitTestStatisticsFilter +int UnitTestStatisticsFilterMPI(int argc, char* argv[]) +{ + vtkmdiy::mpi::environment env(argc, argv); + vtkmdiy::mpi::communicator world; + return vtkm::cont::testing::Testing::Run(TestStatistics, argc, argv); +} diff --git a/vtkm/filter/density_estimate/vtkm.module b/vtkm/filter/density_estimate/vtkm.module index 2bdced389..c66fc406e 100644 --- a/vtkm/filter/density_estimate/vtkm.module +++ b/vtkm/filter/density_estimate/vtkm.module @@ -6,6 +6,8 @@ DEPENDS vtkm_filter_core PRIVATE_DEPENDS vtkm_worklet +OPTIONAL_DEPENDS + MPI::MPI_CXX TEST_DEPENDS vtkm_filter_density_estimate vtkm_source diff --git a/vtkm/worklet/DescriptiveStatistics.h b/vtkm/worklet/DescriptiveStatistics.h index a2ba0cac0..65787b582 100644 --- a/vtkm/worklet/DescriptiveStatistics.h +++ b/vtkm/worklet/DescriptiveStatistics.h @@ -51,12 +51,33 @@ public: { } + VTKM_EXEC_CONT + StatState(T n, T min, T max, T sum, T mean, T M2, T M3, T M4) + : n_(n) + , min_(min) + , max_(max) + , sum_(sum) + , mean_(mean) + , M2_(M2) + , M3_(M3) + , M4_(M4) + { + } + VTKM_EXEC_CONT StatState operator+(const StatState& y) const { const StatState& x = *this; - StatState result; + if (y.n_ == 0) + { + return x; + } + if (x.n_ == 0) + { + return y; + } + StatState result; result.n_ = x.n_ + y.n_; result.min_ = vtkm::Min(x.min_, y.min_); @@ -97,8 +118,7 @@ public: return result; } - VTKM_EXEC_CONT - T N() const { return this->n_; } + VTKM_EXEC_CONT T N() const { return this->n_; } VTKM_EXEC_CONT T Min() const { return this->min_; } @@ -112,6 +132,15 @@ public: VTKM_EXEC_CONT T Mean() const { return this->mean_; } + VTKM_EXEC_CONT + T M2() const { return this->M2_; } + + VTKM_EXEC_CONT + T M3() const { return this->M3_; } + + VTKM_EXEC_CONT + T M4() const { return this->M4_; } + VTKM_EXEC_CONT T SampleStddev() const { return vtkm::Sqrt(this->SampleVariance()); } @@ -121,17 +150,27 @@ public: VTKM_EXEC_CONT T SampleVariance() const { - VTKM_ASSERT(n_ != 1); + if (this->n_ <= 1) + { + return 0; + } return this->M2_ / (this->n_ - 1); } VTKM_EXEC_CONT - T PopulationVariance() const { return this->M2_ / this->n_; } + T PopulationVariance() const + { + if (this->M2_ == 0 || this->n_ == 0) + { + return T(0); + } + return this->M2_ / this->n_; + } VTKM_EXEC_CONT T Skewness() const { - if (this->M2_ == 0) + if (this->M2_ == 0 || this->n_ == 0) // Shamelessly swiped from Boost Math // The limit is technically undefined, but the interpretation here is clear: // A constant dataset has no skewness. @@ -143,7 +182,7 @@ public: VTKM_EXEC_CONT T Kurtosis() const { - if (this->M2_ == 0) + if (this->M2_ == 0 || this->n_ == 0) // Shamelessly swiped from Boost Math // The limit is technically undefined, but the interpretation here is clear: // A constant dataset has no kurtosis. diff --git a/vtkm/worklet/testing/UnitTestDescriptiveStatistics.cxx b/vtkm/worklet/testing/UnitTestDescriptiveStatistics.cxx index 1f6550215..cf5c06f51 100644 --- a/vtkm/worklet/testing/UnitTestDescriptiveStatistics.cxx +++ b/vtkm/worklet/testing/UnitTestDescriptiveStatistics.cxx @@ -238,6 +238,53 @@ void TestMomentsByKey() } } +void TestEdgeCases() +{ + using StatValueType = vtkm::worklet::DescriptiveStatistics::StatState; + StatValueType state1(42); + StatValueType state2; + + StatValueType result = state1 + state2; + VTKM_TEST_ASSERT(test_equal(result.N(), 1)); + VTKM_TEST_ASSERT(test_equal(result.Min(), 42)); + VTKM_TEST_ASSERT(test_equal(result.Max(), 42)); + VTKM_TEST_ASSERT(test_equal(result.Mean(), 42)); + VTKM_TEST_ASSERT(test_equal(result.SampleVariance(), 0)); + VTKM_TEST_ASSERT(test_equal(result.PopulationVariance(), 0)); + VTKM_TEST_ASSERT(test_equal(result.Skewness(), 0)); + VTKM_TEST_ASSERT(test_equal(result.Kurtosis(), 0)); + + result = state2 + state1; + VTKM_TEST_ASSERT(test_equal(result.N(), 1)); + VTKM_TEST_ASSERT(test_equal(result.Min(), 42)); + VTKM_TEST_ASSERT(test_equal(result.Max(), 42)); + VTKM_TEST_ASSERT(test_equal(result.Mean(), 42)); + VTKM_TEST_ASSERT(test_equal(result.SampleVariance(), 0)); + VTKM_TEST_ASSERT(test_equal(result.PopulationVariance(), 0)); + VTKM_TEST_ASSERT(test_equal(result.Skewness(), 0)); + VTKM_TEST_ASSERT(test_equal(result.Kurtosis(), 0)); + + StatValueType empty; + VTKM_TEST_ASSERT(test_equal(empty.N(), 0)); + VTKM_TEST_ASSERT(test_equal(empty.Min(), std::numeric_limits::max())); + VTKM_TEST_ASSERT(test_equal(empty.Max(), std::numeric_limits::lowest())); + VTKM_TEST_ASSERT(test_equal(empty.Mean(), 0)); + VTKM_TEST_ASSERT(test_equal(empty.SampleVariance(), 0)); + VTKM_TEST_ASSERT(test_equal(empty.PopulationVariance(), 0)); + VTKM_TEST_ASSERT(test_equal(empty.Skewness(), 0)); + VTKM_TEST_ASSERT(test_equal(empty.Kurtosis(), 0)); + + result = empty + empty; + VTKM_TEST_ASSERT(test_equal(empty.N(), 0)); + VTKM_TEST_ASSERT(test_equal(empty.Min(), std::numeric_limits::max())); + VTKM_TEST_ASSERT(test_equal(empty.Max(), std::numeric_limits::lowest())); + VTKM_TEST_ASSERT(test_equal(empty.Mean(), 0)); + VTKM_TEST_ASSERT(test_equal(empty.SampleVariance(), 0)); + VTKM_TEST_ASSERT(test_equal(empty.PopulationVariance(), 0)); + VTKM_TEST_ASSERT(test_equal(empty.Skewness(), 0)); + VTKM_TEST_ASSERT(test_equal(empty.Kurtosis(), 0)); +} + void TestDescriptiveStatistics() { TestSingle(); @@ -249,6 +296,7 @@ void TestDescriptiveStatistics() TestMeanProperties(); TestVarianceProperty(); TestMomentsByKey(); + TestEdgeCases(); } int UnitTestDescriptiveStatistics(int argc, char* argv[])