add distributed statistics

This commit is contained in:
Jay 2023-03-17 09:59:27 +00:00
parent f79538bf85
commit d8f3e68d1d
9 changed files with 661 additions and 164 deletions

@ -1,3 +1,4 @@
# New Statistics filter
The statistics filter computes the descriptive statistics of the fields specified by users based on `DescriptiveStatistics`. Users can set `RequiredStatsList` to specify which statistics will be stored in the output data set.
The statistics filter computes the descriptive statistics of the fields specified by users based on `DescriptiveStatistics`. Users can set `RequiredStatsList` to specify which statistics will be stored in the output data set. The statistics filter supports the distributed memory case based on the vtkmdiy, and the process with rank 0 will return the correct final reduced results.

@ -7,9 +7,15 @@
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/cont/EnvironmentTracker.h>
#include <vtkm/cont/ErrorFilterExecution.h>
#include <vtkm/filter/density_estimate/Statistics.h>
#include <vtkm/thirdparty/diy/diy.h>
#include <vtkm/worklet/DescriptiveStatistics.h>
#ifdef VTKM_ENABLE_MPI
#include <mpi.h>
#include <vtkm/thirdparty/diy/mpi-cast.h>
#endif
namespace vtkm
{
@ -17,98 +23,193 @@ namespace filter
{
namespace density_estimate
{
//refer to this paper https://www.osti.gov/servlets/purl/1028931
//for the math of computing distributed statistics
//using anonymous namespace
namespace
{
using StatValueType = vtkm::worklet::DescriptiveStatistics::StatState<vtkm::FloatDefault>;
class DistributedStatistics
{
vtkm::cont::ArrayHandle<StatValueType> localStatisticsValues;
public:
DistributedStatistics(vtkm::Id numLocalBlocks)
{
this->localStatisticsValues.Allocate(numLocalBlocks);
}
void SetLocalStatistics(vtkm::Id index, StatValueType& value)
{
this->localStatisticsValues.WritePortal().Set(index, value);
}
StatValueType ReduceStatisticsDiy() const
{
using Algorithm = vtkm::cont::Algorithm;
// The StatValueType struct overloads the + operator. Reduce is using to properly
// combine statistical measures such as mean, standard deviation, and others. So,
// the Reduce is computing the global statistics over partitions rather than a
// simple sum.
StatValueType statePerRank = Algorithm::Reduce(this->localStatisticsValues, StatValueType{});
StatValueType stateResult = statePerRank;
#ifdef VTKM_ENABLE_MPI
auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
if (comm.size() == 1)
{
return statePerRank;
}
vtkmdiy::Master master(
comm,
1,
-1,
[]() -> void* { return new StatValueType(); },
[](void* ptr) { delete static_cast<StatValueType*>(ptr); });
vtkmdiy::ContiguousAssigner assigner(/*num ranks*/ comm.size(),
/*global-num-blocks*/ comm.size());
vtkmdiy::RegularDecomposer<vtkmdiy::DiscreteBounds> decomposer(
/*dims*/ 1, vtkmdiy::interval(0, assigner.nblocks() - 1), assigner.nblocks());
decomposer.decompose(comm.rank(), assigner, master);
VTKM_ASSERT(static_cast<vtkm::Id>(master.size()) == 1);
//adding data into master
*master.block<StatValueType>(0) = statePerRank;
auto callback = [](StatValueType* result,
const vtkmdiy::ReduceProxy& srp,
const vtkmdiy::RegularMergePartners&) {
const auto selfid = srp.gid();
// 1. dequeue.
std::vector<int> incoming;
srp.incoming(incoming);
for (const int gid : incoming)
{
if (gid != selfid)
{
StatValueType inData;
srp.dequeue(gid, inData);
*result = *result + inData;
}
}
// 2. enqueue
for (int cc = 0; cc < srp.out_link().size(); ++cc)
{
auto target = srp.out_link().target(cc);
if (target.gid != selfid)
{
srp.enqueue(target, *result);
}
}
};
vtkmdiy::RegularMergePartners partners(decomposer, /*k=*/2);
vtkmdiy::reduce(master, assigner, partners, callback);
//only rank 0 process returns the correct results
if (master.local(0))
{
stateResult = *master.block<StatValueType>(0);
}
else
{
stateResult = StatValueType();
}
#endif
return stateResult;
}
};
}
vtkm::FloatDefault ExtractVariable(vtkm::cont::DataSet dataset, const std::string& statName)
{
vtkm::cont::ArrayHandle<vtkm::FloatDefault> array;
dataset.GetField(statName).GetData().AsArrayHandle(array);
vtkm::cont::ArrayHandle<vtkm::FloatDefault>::ReadPortalType portal = array.ReadPortal();
vtkm::FloatDefault value = portal.Get(0);
return value;
}
template <typename T>
VTKM_CONT vtkm::cont::ArrayHandle<vtkm::FloatDefault> SaveDataIntoArray(const T value)
{
vtkm::cont::ArrayHandle<vtkm::FloatDefault> stat;
stat.Allocate(1);
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(value));
return stat;
}
VTKM_CONT StatValueType GetStatValueFromDataSet(const vtkm::cont::DataSet& data)
{
vtkm::FloatDefault N = ExtractVariable(data, "N");
vtkm::FloatDefault Min = ExtractVariable(data, "Min");
vtkm::FloatDefault Max = ExtractVariable(data, "Max");
vtkm::FloatDefault Sum = ExtractVariable(data, "Sum");
vtkm::FloatDefault Mean = ExtractVariable(data, "Mean");
vtkm::FloatDefault M2 = ExtractVariable(data, "M2");
vtkm::FloatDefault M3 = ExtractVariable(data, "M3");
vtkm::FloatDefault M4 = ExtractVariable(data, "M4");
return StatValueType(N, Min, Max, Sum, Mean, M2, M3, M4);
}
template <typename DataSetType>
VTKM_CONT void SaveIntoDataSet(StatValueType& statValue,
DataSetType& output,
vtkm::cont::Field::Association association)
{
output.AddField({ "N", association, SaveDataIntoArray(statValue.N()) });
output.AddField({ "Min", association, SaveDataIntoArray(statValue.Min()) });
output.AddField({ "Max", association, SaveDataIntoArray(statValue.Max()) });
output.AddField({ "Sum", association, SaveDataIntoArray(statValue.Sum()) });
output.AddField({ "Mean", association, SaveDataIntoArray(statValue.Mean()) });
output.AddField({ "M2", association, SaveDataIntoArray(statValue.M2()) });
output.AddField({ "M3", association, SaveDataIntoArray(statValue.M3()) });
output.AddField({ "M4", association, SaveDataIntoArray(statValue.M4()) });
output.AddField({ "SampleStddev", association, SaveDataIntoArray(statValue.SampleStddev()) });
output.AddField(
{ "PopulationStddev", association, SaveDataIntoArray(statValue.PopulationStddev()) });
output.AddField({ "SampleVariance", association, SaveDataIntoArray(statValue.SampleVariance()) });
output.AddField(
{ "PopulationVariance", association, SaveDataIntoArray(statValue.PopulationVariance()) });
output.AddField({ "Skewness", association, SaveDataIntoArray(statValue.Skewness()) });
output.AddField({ "Kurtosis", association, SaveDataIntoArray(statValue.Kurtosis()) });
}
VTKM_CONT vtkm::cont::DataSet Statistics::DoExecute(const vtkm::cont::DataSet& inData)
{
vtkm::worklet::DescriptiveStatistics worklet;
vtkm::cont::DataSet output;
auto resolveType = [&](const auto& concrete) {
auto result = worklet.Run(concrete);
for (size_t i = 0; i < RequiredStatsList.size(); i++)
{
vtkm::cont::ArrayHandle<vtkm::FloatDefault> stat;
stat.Allocate(1);
Stats statEnum = RequiredStatsList[i];
switch (statEnum)
{
case Stats::N:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.N()));
break;
}
case Stats::Min:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.Min()));
break;
}
case Stats::Max:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.Max()));
break;
}
case Stats::Sum:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.Sum()));
break;
}
case Stats::Mean:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.Mean()));
break;
}
case Stats::SampleStdDev:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.SampleStddev()));
break;
}
case Stats::PopulationStdDev:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.PopulationStddev()));
break;
}
case Stats::SampleVariance:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.SampleVariance()));
break;
}
case Stats::PopulationVariance:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.PopulationVariance()));
break;
}
case Stats::Skewness:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.Skewness()));
break;
}
case Stats::Kurtosis:
{
stat.WritePortal().Set(0, static_cast<vtkm::FloatDefault>(result.Kurtosis()));
break;
}
default:
{
throw vtkm::cont::ErrorFilterExecution(
"Unsupported statistics variable in statistics filter.");
}
}
output.AddField({ this->StatsName[static_cast<int>(statEnum)],
vtkm::cont::Field::Association::WholeDataSet,
stat });
}
};
const auto& fieldArray = this->GetFieldFromDataSet(inData).GetData();
fieldArray
.CastAndCallForTypesWithFloatFallback<vtkm::TypeListFieldScalar, VTKM_DEFAULT_STORAGE_LIST>(
resolveType);
vtkm::cont::ArrayHandle<vtkm::FloatDefault> input;
//TODO: GetFieldFromDataSet will throw an exception if the targeted Field does not exist in the data set
ArrayCopyShallowIfPossible(this->GetFieldFromDataSet(inData).GetData(), input);
StatValueType result = worklet.Run(input);
SaveIntoDataSet<vtkm::cont::DataSet>(
result, output, vtkm::cont::Field::Association::WholeDataSet);
return output;
}
VTKM_CONT vtkm::cont::PartitionedDataSet Statistics::DoExecutePartitions(
const vtkm::cont::PartitionedDataSet& input)
{
// This operation will create a partitioned data set with a partition matching each input partition
// containing the local statistics. It will iterate through each partition in the input and call the
// DoExecute function. This is the same behavior as if we did not implement `DoExecutePartitions`.
// It has the added benefit of optimizations for concurrently executing small blocks.
vtkm::cont::PartitionedDataSet localOutput = this->FilterField::DoExecutePartitions(input);
vtkm::Id numPartitions = input.GetNumberOfPartitions();
DistributedStatistics helper(numPartitions);
for (vtkm::Id i = 0; i < numPartitions; ++i)
{
const vtkm::cont::DataSet& localDS = localOutput.GetPartition(i);
StatValueType localStatisticsValues = GetStatValueFromDataSet(localDS);
helper.SetLocalStatistics(i, localStatisticsValues);
}
StatValueType result = helper.ReduceStatisticsDiy();
vtkm::cont::PartitionedDataSet output;
SaveIntoDataSet<vtkm::cont::PartitionedDataSet>(
result, output, vtkm::cont::Field::Association::Global);
return output;
}
} // namespace density_estimate
} // namespace filter
} // namespace vtkm

@ -25,54 +25,10 @@ namespace density_estimate
///
class VTKM_FILTER_DENSITY_ESTIMATE_EXPORT Statistics : public vtkm::filter::FilterField
{
public:
enum struct Stats
{
N = 0,
Min,
Max,
Sum,
Mean,
SampleStdDev,
PopulationStdDev,
SampleVariance,
PopulationVariance,
Skewness,
Kurtosis
};
/// \{
/// \brief The output statistical variables for executing the statistics filter.
///
void SetRequiredStats(const std::vector<Stats> StatsList) { RequiredStatsList = StatsList; }
const std::vector<Stats>& GetRequiredStats() const { return this->RequiredStatsList; }
/// \}
private:
VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& input) override;
std::vector<Stats> RequiredStatsList{ Stats::N,
Stats::Min,
Stats::Max,
Stats::Sum,
Stats::Mean,
Stats::SampleStdDev,
Stats::PopulationStdDev,
Stats::SampleVariance,
Stats::PopulationVariance,
Stats::Skewness,
Stats::Kurtosis };
// This string vector stores variables names stored in the output dataset
std::vector<std::string> StatsName{ "N",
"Min",
"Max",
"Sum",
"Mean",
"SampleStddev",
"PopulationStdDev",
"SampleVariance",
"PopulationVariance",
"Skewness",
"Kurtosis" };
VTKM_CONT vtkm::cont::PartitionedDataSet DoExecutePartitions(
const vtkm::cont::PartitionedDataSet& inData) override;
};
} // namespace density_estimate
} // namespace filter

@ -15,6 +15,7 @@ set(unit_tests
UnitTestNDHistogramFilter.cxx
UnitTestPartitionedDataSetHistogramFilter.cxx
UnitTestStatisticsFilter.cxx
UnitTestStatisticsFilterMPI.cxx
)
set(unit_tests_device
@ -31,3 +32,14 @@ vtkm_unit_tests(
LIBRARIES ${libraries}
USE_VTKM_JOB_POOL
)
if (VTKm_ENABLE_MPI)
set(mpi_unit_tests
UnitTestStatisticsFilterMPI.cxx
)
vtkm_unit_tests(
MPI
DEVICE_SOURCES ${mpi_unit_tests}
USE_VTKM_JOB_POOL
)
endif()

@ -8,16 +8,16 @@
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/filter/density_estimate/Statistics.h>
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/filter/density_estimate/Statistics.h>
#include <vtkm/thirdparty/diy/environment.h>
namespace
{
vtkm::FloatDefault getStatsFromArray(vtkm::cont::DataSet dataset, const std::string statName)
template <typename DataSetType>
vtkm::FloatDefault getStatsFromDataSet(const DataSetType& dataset, const std::string statName)
{
vtkm::cont::ArrayHandle<vtkm::FloatDefault> array;
dataset.GetField(statName).GetData().AsArrayHandle(array);
@ -30,56 +30,127 @@ void TestStatisticsPartial()
{
vtkm::cont::DataSet dataSet;
constexpr vtkm::FloatDefault N = 1000;
// the number is from 0 to 999
auto scalaArray =
auto scalarArrayCounting =
vtkm::cont::ArrayHandleCounting<vtkm::FloatDefault>(0.0f, 1.0f, static_cast<vtkm::Id>(N));
dataSet.AddPointField("scalarField", scalaArray);
vtkm::cont::ArrayHandle<vtkm::FloatDefault> scalarArray;
vtkm::cont::ArrayCopy(scalarArrayCounting, scalarArray);
dataSet.AddPointField("scalarField", scalarArray);
using STATS = vtkm::filter::density_estimate::Statistics;
STATS statisticsFilter;
//set required states
std::vector<STATS::Stats> RequiredStatsList{ STATS::Stats::N, STATS::Stats::Sum,
STATS::Stats::Mean, STATS::Stats::SampleVariance,
STATS::Stats::Skewness, STATS::Stats::Kurtosis };
//default RequiredStatsList contains all statistics variables
statisticsFilter.SetRequiredStats(RequiredStatsList);
using AsscoType = vtkm::cont::Field::Association;
statisticsFilter.SetActiveField("scalarField", AsscoType::Points);
// We use the same test cases with the UnitTestDescriptiveStatistics.h
vtkm::cont::DataSet resultDataSet = statisticsFilter.Execute(dataSet);
vtkm::FloatDefault NValueFromFilter = getStatsFromArray(resultDataSet, "N");
vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(resultDataSet, "N");
VTKM_TEST_ASSERT(test_equal(NValueFromFilter, N));
vtkm::FloatDefault SumFromFilter = getStatsFromArray(resultDataSet, "Sum");
vtkm::FloatDefault MinValueFromFilter = getStatsFromDataSet(resultDataSet, "Min");
VTKM_TEST_ASSERT(test_equal(MinValueFromFilter, 0));
vtkm::FloatDefault MaxValueFromFilter = getStatsFromDataSet(resultDataSet, "Max");
VTKM_TEST_ASSERT(test_equal(MaxValueFromFilter, N - 1));
vtkm::FloatDefault SumFromFilter = getStatsFromDataSet(resultDataSet, "Sum");
VTKM_TEST_ASSERT(test_equal(SumFromFilter, N * (N - 1) / 2));
vtkm::FloatDefault MeanFromFilter = getStatsFromArray(resultDataSet, "Mean");
vtkm::FloatDefault MeanFromFilter = getStatsFromDataSet(resultDataSet, "Mean");
VTKM_TEST_ASSERT(test_equal(MeanFromFilter, (N - 1) / 2));
vtkm::FloatDefault SVFromFilter = getStatsFromArray(resultDataSet, "SampleVariance");
vtkm::FloatDefault SVFromFilter = getStatsFromDataSet(resultDataSet, "SampleVariance");
VTKM_TEST_ASSERT(test_equal(SVFromFilter, 83416.66));
vtkm::FloatDefault SkewnessFromFilter = getStatsFromArray(resultDataSet, "Skewness");
vtkm::FloatDefault SstddevFromFilter = getStatsFromDataSet(resultDataSet, "SampleStddev");
VTKM_TEST_ASSERT(test_equal(SstddevFromFilter, 288.819));
vtkm::FloatDefault SkewnessFromFilter = getStatsFromDataSet(resultDataSet, "Skewness");
VTKM_TEST_ASSERT(test_equal(SkewnessFromFilter, 0));
// we use fisher=False when computing the Kurtosis value
vtkm::FloatDefault KurtosisFromFilter = getStatsFromArray(resultDataSet, "Kurtosis");
vtkm::FloatDefault KurtosisFromFilter = getStatsFromDataSet(resultDataSet, "Kurtosis");
VTKM_TEST_ASSERT(test_equal(KurtosisFromFilter, 1.8));
vtkm::FloatDefault PopulationStddev = getStatsFromDataSet(resultDataSet, "PopulationStddev");
VTKM_TEST_ASSERT(test_equal(PopulationStddev, 288.675));
vtkm::FloatDefault PopulationVariance = getStatsFromDataSet(resultDataSet, "PopulationVariance");
VTKM_TEST_ASSERT(test_equal(PopulationVariance, 83333.3));
}
void TestStatisticsPartition()
{
std::vector<vtkm::cont::DataSet> dataSetList;
constexpr vtkm::FloatDefault N = 1000;
for (vtkm::Id i = 0; i < 10; i++)
{
vtkm::cont::DataSet dataSet;
constexpr vtkm::FloatDefault localN = N / 10;
vtkm::cont::ArrayHandle<vtkm::FloatDefault> scalarArray;
scalarArray.Allocate(static_cast<vtkm::Id>(localN));
auto writePortal = scalarArray.WritePortal();
for (vtkm::Id j = 0; j < static_cast<vtkm::Id>(localN); j++)
{
writePortal.Set(j, static_cast<vtkm::FloatDefault>(i * localN + j));
}
dataSet.AddPointField("scalarField", scalarArray);
dataSetList.push_back(dataSet);
}
//adding data sets for testing edge cases
vtkm::cont::DataSet dataSetEmptyField;
dataSetEmptyField.AddPointField("scalarField", vtkm::cont::ArrayHandle<vtkm::FloatDefault>());
dataSetList.push_back(dataSetEmptyField);
vtkm::cont::PartitionedDataSet pds(dataSetList);
using STATS = vtkm::filter::density_estimate::Statistics;
STATS statisticsFilter;
using AsscoType = vtkm::cont::Field::Association;
statisticsFilter.SetActiveField("scalarField", AsscoType::Points);
vtkm::cont::PartitionedDataSet outputPDS = statisticsFilter.Execute(pds);
vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N");
VTKM_TEST_ASSERT(test_equal(NValueFromFilter, N));
vtkm::FloatDefault MinValueFromFilter = getStatsFromDataSet(outputPDS, "Min");
VTKM_TEST_ASSERT(test_equal(MinValueFromFilter, 0));
vtkm::FloatDefault MaxValueFromFilter = getStatsFromDataSet(outputPDS, "Max");
VTKM_TEST_ASSERT(test_equal(MaxValueFromFilter, N - 1));
vtkm::FloatDefault SumFromFilter = getStatsFromDataSet(outputPDS, "Sum");
VTKM_TEST_ASSERT(test_equal(SumFromFilter, N * (N - 1) / 2));
vtkm::FloatDefault MeanFromFilter = getStatsFromDataSet(outputPDS, "Mean");
VTKM_TEST_ASSERT(test_equal(MeanFromFilter, (N - 1) / 2));
vtkm::FloatDefault SVFromFilter = getStatsFromDataSet(outputPDS, "SampleVariance");
VTKM_TEST_ASSERT(test_equal(SVFromFilter, 83416.66));
vtkm::FloatDefault SstddevFromFilter = getStatsFromDataSet(outputPDS, "SampleStddev");
VTKM_TEST_ASSERT(test_equal(SstddevFromFilter, 288.819));
vtkm::FloatDefault SkewnessFromFilter = getStatsFromDataSet(outputPDS, "Skewness");
VTKM_TEST_ASSERT(test_equal(SkewnessFromFilter, 0));
// we use fisher=False when computing the Kurtosis value
vtkm::FloatDefault KurtosisFromFilter = getStatsFromDataSet(outputPDS, "Kurtosis");
VTKM_TEST_ASSERT(test_equal(KurtosisFromFilter, 1.8));
vtkm::FloatDefault PopulationStddev = getStatsFromDataSet(outputPDS, "PopulationStddev");
VTKM_TEST_ASSERT(test_equal(PopulationStddev, 288.675));
vtkm::FloatDefault PopulationVariance = getStatsFromDataSet(outputPDS, "PopulationVariance");
VTKM_TEST_ASSERT(test_equal(PopulationVariance, 83333.3));
}
void TestStatistics()
{
TestStatisticsPartial();
TestStatisticsPartition();
} // TestFieldStatistics
}
//More deatiled tests can be found in the UnitTestStatisticsFilter
int UnitTestStatisticsFilter(int argc, char* argv[])
{
return vtkm::cont::testing::Testing::Run(TestStatistics, argc, argv);

@ -0,0 +1,267 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/ErrorFilterExecution.h>
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/filter/density_estimate/Statistics.h>
#include <vtkm/thirdparty/diy/diy.h>
#include <vtkm/thirdparty/diy/environment.h>
namespace
{
vtkm::FloatDefault getStatsFromDataSet(const vtkm::cont::PartitionedDataSet& dataset,
const std::string statName)
{
vtkm::cont::ArrayHandle<vtkm::FloatDefault> array;
dataset.GetField(statName).GetData().AsArrayHandle(array);
vtkm::cont::ArrayHandle<vtkm::FloatDefault>::ReadPortalType portal = array.ReadPortal();
vtkm::FloatDefault value = portal.Get(0);
return value;
}
void checkResulst(const vtkm::cont::PartitionedDataSet& outputPDS, vtkm::FloatDefault N)
{
vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N");
VTKM_TEST_ASSERT(test_equal(NValueFromFilter, N));
vtkm::FloatDefault MinValueFromFilter = getStatsFromDataSet(outputPDS, "Min");
VTKM_TEST_ASSERT(test_equal(MinValueFromFilter, 0));
vtkm::FloatDefault MaxValueFromFilter = getStatsFromDataSet(outputPDS, "Max");
VTKM_TEST_ASSERT(test_equal(MaxValueFromFilter, N - 1));
vtkm::FloatDefault SumFromFilter = getStatsFromDataSet(outputPDS, "Sum");
VTKM_TEST_ASSERT(test_equal(SumFromFilter, N * (N - 1) / 2));
vtkm::FloatDefault MeanFromFilter = getStatsFromDataSet(outputPDS, "Mean");
VTKM_TEST_ASSERT(test_equal(MeanFromFilter, (N - 1) / 2));
vtkm::FloatDefault SVFromFilter = getStatsFromDataSet(outputPDS, "SampleVariance");
VTKM_TEST_ASSERT(test_equal(SVFromFilter, 83416.66));
vtkm::FloatDefault SstddevFromFilter = getStatsFromDataSet(outputPDS, "SampleStddev");
VTKM_TEST_ASSERT(test_equal(SstddevFromFilter, 288.819));
vtkm::FloatDefault SkewnessFromFilter = getStatsFromDataSet(outputPDS, "Skewness");
VTKM_TEST_ASSERT(test_equal(SkewnessFromFilter, 0));
// we use fisher=False when computing the Kurtosis value
vtkm::FloatDefault KurtosisFromFilter = getStatsFromDataSet(outputPDS, "Kurtosis");
VTKM_TEST_ASSERT(test_equal(KurtosisFromFilter, 1.8));
vtkm::FloatDefault PopulationStddev = getStatsFromDataSet(outputPDS, "PopulationStddev");
VTKM_TEST_ASSERT(test_equal(PopulationStddev, 288.675));
vtkm::FloatDefault PopulationVariance = getStatsFromDataSet(outputPDS, "PopulationVariance");
VTKM_TEST_ASSERT(test_equal(PopulationVariance, 83333.3));
}
void TestStatisticsMPISingleDataSet()
{
auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
constexpr vtkm::FloatDefault N = 1000;
vtkm::Id numProcs = comm.size();
vtkm::Id workloadBase = static_cast<vtkm::Id>(N / numProcs);
vtkm::Id workloadActual = workloadBase;
if (static_cast<vtkm::Id>(N) % numProcs != 0)
{
//updating the workload for last one
if (comm.rank() == numProcs - 1)
{
workloadActual = workloadActual + (static_cast<vtkm::Id>(N) % numProcs);
}
}
vtkm::cont::ArrayHandle<vtkm::FloatDefault> scalarArray;
scalarArray.Allocate(static_cast<vtkm::Id>(workloadActual));
auto writePortal = scalarArray.WritePortal();
for (vtkm::Id i = 0; i < static_cast<vtkm::Id>(workloadActual); i++)
{
writePortal.Set(i, static_cast<vtkm::FloatDefault>(workloadBase * comm.rank() + i));
}
vtkm::cont::DataSet dataSet;
dataSet.AddPointField("scalarField", scalarArray);
using STATS = vtkm::filter::density_estimate::Statistics;
STATS statisticsFilter;
using AsscoType = vtkm::cont::Field::Association;
statisticsFilter.SetActiveField("scalarField", AsscoType::Points);
std::vector<vtkm::cont::DataSet> dataSetList;
dataSetList.push_back(dataSet);
auto pds = vtkm::cont::PartitionedDataSet(dataSetList);
vtkm::cont::PartitionedDataSet outputPDS = statisticsFilter.Execute(pds);
if (comm.rank() == 0)
{
checkResulst(outputPDS, N);
}
else
{
vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N");
VTKM_TEST_ASSERT(test_equal(NValueFromFilter, 0));
}
}
void TestStatisticsMPIPartitionDataSets()
{
auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
constexpr vtkm::FloatDefault N = 1000;
vtkm::Id numProcs = comm.size();
vtkm::Id workloadPerRankBase = static_cast<vtkm::Id>(N / numProcs);
vtkm::Id workloadPerRankActual = workloadPerRankBase;
if (static_cast<vtkm::Id>(N) % numProcs != 0)
{
//updating the workload for last one
if (comm.rank() == numProcs - 1)
{
workloadPerRankActual = workloadPerRankActual + (static_cast<vtkm::Id>(N) % numProcs);
}
}
vtkm::Id numPartitions = 2;
vtkm::Id workloadPerPartition0 = workloadPerRankActual / numPartitions;
vtkm::Id workloadPerPartition1 = workloadPerRankActual - workloadPerPartition0;
vtkm::Id offsetRank = workloadPerRankBase * comm.rank();
std::vector<vtkm::cont::DataSet> dataSetList;
vtkm::cont::ArrayHandle<vtkm::FloatDefault> scalarArray0;
scalarArray0.Allocate(static_cast<vtkm::Id>(workloadPerPartition0));
auto writePortal0 = scalarArray0.WritePortal();
vtkm::cont::DataSet dataSet0;
for (vtkm::Id i = 0; i < workloadPerPartition0; i++)
{
writePortal0.Set(i, static_cast<vtkm::FloatDefault>(offsetRank + i));
}
dataSet0.AddPointField("scalarField", scalarArray0);
dataSetList.push_back(dataSet0);
vtkm::cont::ArrayHandle<vtkm::FloatDefault> scalarArray1;
scalarArray1.Allocate(static_cast<vtkm::Id>(workloadPerPartition1));
auto writePortal1 = scalarArray1.WritePortal();
vtkm::cont::DataSet dataSet1;
for (vtkm::Id i = 0; i < workloadPerPartition1; i++)
{
writePortal1.Set(i, static_cast<vtkm::FloatDefault>(offsetRank + workloadPerPartition0 + i));
}
dataSet1.AddPointField("scalarField", scalarArray1);
dataSetList.push_back(dataSet1);
auto pds = vtkm::cont::PartitionedDataSet(dataSetList);
using STATS = vtkm::filter::density_estimate::Statistics;
STATS statisticsFilter;
using AsscoType = vtkm::cont::Field::Association;
statisticsFilter.SetActiveField("scalarField", AsscoType::Points);
vtkm::cont::PartitionedDataSet outputPDS = statisticsFilter.Execute(pds);
if (comm.rank() == 0)
{
checkResulst(outputPDS, N);
}
else
{
vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N");
VTKM_TEST_ASSERT(test_equal(NValueFromFilter, 0));
}
}
void TestStatisticsMPIDataSetEmpty()
{
auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
constexpr vtkm::FloatDefault N = 1000;
vtkm::Id numProcs = comm.size();
vtkm::Id numEmptyBlock = 1;
vtkm::Id numProcsWithWork = numProcs;
if (numProcs > 1)
{
numProcsWithWork = numProcsWithWork - numEmptyBlock;
}
vtkm::Id workloadBase = static_cast<vtkm::Id>(N / (numProcsWithWork));
vtkm::Id workloadActual = workloadBase;
if (static_cast<vtkm::Id>(N) % numProcsWithWork != 0)
{
//updating the workload for last one
if (comm.rank() == numProcsWithWork - 1)
{
workloadActual = workloadActual + (static_cast<vtkm::Id>(N) % numProcsWithWork);
}
}
vtkm::cont::DataSet dataSet;
vtkm::cont::ArrayHandle<vtkm::FloatDefault> scalarArray;
//for the proc with actual work
if (comm.rank() != numProcs - 1)
{
scalarArray.Allocate(static_cast<vtkm::Id>(workloadActual));
auto writePortal = scalarArray.WritePortal();
for (vtkm::Id i = 0; i < static_cast<vtkm::Id>(workloadActual); i++)
{
writePortal.Set(i, static_cast<vtkm::FloatDefault>(workloadBase * comm.rank() + i));
}
}
dataSet.AddPointField("scalarField", scalarArray);
using STATS = vtkm::filter::density_estimate::Statistics;
STATS statisticsFilter;
using AsscoType = vtkm::cont::Field::Association;
statisticsFilter.SetActiveField("scalarField", AsscoType::Points);
std::vector<vtkm::cont::DataSet> dataSetList;
dataSetList.push_back(dataSet);
auto pds = vtkm::cont::PartitionedDataSet(dataSetList);
vtkm::cont::PartitionedDataSet outputPDS = statisticsFilter.Execute(pds);
if (comm.size() == 1)
{
vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N");
VTKM_TEST_ASSERT(test_equal(NValueFromFilter, 0));
return;
}
if (comm.rank() == 0)
{
checkResulst(outputPDS, N);
}
else
{
vtkm::FloatDefault NValueFromFilter = getStatsFromDataSet(outputPDS, "N");
VTKM_TEST_ASSERT(test_equal(NValueFromFilter, 0));
}
}
void TestStatistics()
{
TestStatisticsMPISingleDataSet();
TestStatisticsMPIPartitionDataSets();
TestStatisticsMPIDataSetEmpty();
} // TestFieldStatistics
}
//More deatiled tests can be found in the UnitTestStatisticsFilter
int UnitTestStatisticsFilterMPI(int argc, char* argv[])
{
vtkmdiy::mpi::environment env(argc, argv);
vtkmdiy::mpi::communicator world;
return vtkm::cont::testing::Testing::Run(TestStatistics, argc, argv);
}

@ -6,6 +6,8 @@ DEPENDS
vtkm_filter_core
PRIVATE_DEPENDS
vtkm_worklet
OPTIONAL_DEPENDS
MPI::MPI_CXX
TEST_DEPENDS
vtkm_filter_density_estimate
vtkm_source

@ -51,12 +51,33 @@ public:
{
}
VTKM_EXEC_CONT
StatState(T n, T min, T max, T sum, T mean, T M2, T M3, T M4)
: n_(n)
, min_(min)
, max_(max)
, sum_(sum)
, mean_(mean)
, M2_(M2)
, M3_(M3)
, M4_(M4)
{
}
VTKM_EXEC_CONT
StatState operator+(const StatState<T>& y) const
{
const StatState<T>& x = *this;
StatState result;
if (y.n_ == 0)
{
return x;
}
if (x.n_ == 0)
{
return y;
}
StatState result;
result.n_ = x.n_ + y.n_;
result.min_ = vtkm::Min(x.min_, y.min_);
@ -97,8 +118,7 @@ public:
return result;
}
VTKM_EXEC_CONT
T N() const { return this->n_; }
VTKM_EXEC_CONT T N() const { return this->n_; }
VTKM_EXEC_CONT
T Min() const { return this->min_; }
@ -112,6 +132,15 @@ public:
VTKM_EXEC_CONT
T Mean() const { return this->mean_; }
VTKM_EXEC_CONT
T M2() const { return this->M2_; }
VTKM_EXEC_CONT
T M3() const { return this->M3_; }
VTKM_EXEC_CONT
T M4() const { return this->M4_; }
VTKM_EXEC_CONT
T SampleStddev() const { return vtkm::Sqrt(this->SampleVariance()); }
@ -121,17 +150,27 @@ public:
VTKM_EXEC_CONT
T SampleVariance() const
{
VTKM_ASSERT(n_ != 1);
if (this->n_ <= 1)
{
return 0;
}
return this->M2_ / (this->n_ - 1);
}
VTKM_EXEC_CONT
T PopulationVariance() const { return this->M2_ / this->n_; }
T PopulationVariance() const
{
if (this->M2_ == 0 || this->n_ == 0)
{
return T(0);
}
return this->M2_ / this->n_;
}
VTKM_EXEC_CONT
T Skewness() const
{
if (this->M2_ == 0)
if (this->M2_ == 0 || this->n_ == 0)
// Shamelessly swiped from Boost Math
// The limit is technically undefined, but the interpretation here is clear:
// A constant dataset has no skewness.
@ -143,7 +182,7 @@ public:
VTKM_EXEC_CONT
T Kurtosis() const
{
if (this->M2_ == 0)
if (this->M2_ == 0 || this->n_ == 0)
// Shamelessly swiped from Boost Math
// The limit is technically undefined, but the interpretation here is clear:
// A constant dataset has no kurtosis.

@ -238,6 +238,53 @@ void TestMomentsByKey()
}
}
void TestEdgeCases()
{
using StatValueType = vtkm::worklet::DescriptiveStatistics::StatState<vtkm::FloatDefault>;
StatValueType state1(42);
StatValueType state2;
StatValueType result = state1 + state2;
VTKM_TEST_ASSERT(test_equal(result.N(), 1));
VTKM_TEST_ASSERT(test_equal(result.Min(), 42));
VTKM_TEST_ASSERT(test_equal(result.Max(), 42));
VTKM_TEST_ASSERT(test_equal(result.Mean(), 42));
VTKM_TEST_ASSERT(test_equal(result.SampleVariance(), 0));
VTKM_TEST_ASSERT(test_equal(result.PopulationVariance(), 0));
VTKM_TEST_ASSERT(test_equal(result.Skewness(), 0));
VTKM_TEST_ASSERT(test_equal(result.Kurtosis(), 0));
result = state2 + state1;
VTKM_TEST_ASSERT(test_equal(result.N(), 1));
VTKM_TEST_ASSERT(test_equal(result.Min(), 42));
VTKM_TEST_ASSERT(test_equal(result.Max(), 42));
VTKM_TEST_ASSERT(test_equal(result.Mean(), 42));
VTKM_TEST_ASSERT(test_equal(result.SampleVariance(), 0));
VTKM_TEST_ASSERT(test_equal(result.PopulationVariance(), 0));
VTKM_TEST_ASSERT(test_equal(result.Skewness(), 0));
VTKM_TEST_ASSERT(test_equal(result.Kurtosis(), 0));
StatValueType empty;
VTKM_TEST_ASSERT(test_equal(empty.N(), 0));
VTKM_TEST_ASSERT(test_equal(empty.Min(), std::numeric_limits<vtkm::FloatDefault>::max()));
VTKM_TEST_ASSERT(test_equal(empty.Max(), std::numeric_limits<vtkm::FloatDefault>::lowest()));
VTKM_TEST_ASSERT(test_equal(empty.Mean(), 0));
VTKM_TEST_ASSERT(test_equal(empty.SampleVariance(), 0));
VTKM_TEST_ASSERT(test_equal(empty.PopulationVariance(), 0));
VTKM_TEST_ASSERT(test_equal(empty.Skewness(), 0));
VTKM_TEST_ASSERT(test_equal(empty.Kurtosis(), 0));
result = empty + empty;
VTKM_TEST_ASSERT(test_equal(empty.N(), 0));
VTKM_TEST_ASSERT(test_equal(empty.Min(), std::numeric_limits<vtkm::FloatDefault>::max()));
VTKM_TEST_ASSERT(test_equal(empty.Max(), std::numeric_limits<vtkm::FloatDefault>::lowest()));
VTKM_TEST_ASSERT(test_equal(empty.Mean(), 0));
VTKM_TEST_ASSERT(test_equal(empty.SampleVariance(), 0));
VTKM_TEST_ASSERT(test_equal(empty.PopulationVariance(), 0));
VTKM_TEST_ASSERT(test_equal(empty.Skewness(), 0));
VTKM_TEST_ASSERT(test_equal(empty.Kurtosis(), 0));
}
void TestDescriptiveStatistics()
{
TestSingle();
@ -249,6 +296,7 @@ void TestDescriptiveStatistics()
TestMeanProperties();
TestVarianceProperty();
TestMomentsByKey();
TestEdgeCases();
}
int UnitTestDescriptiveStatistics(int argc, char* argv[])