0a4317709e
Some methods were marked as `VTKM_CONT` when they should have been marked `VTKM_EXEC_CONT`.
227 lines
7.1 KiB
C++
227 lines
7.1 KiB
C++
//============================================================================
|
|
// Copyright (c) Kitware, Inc.
|
|
// All rights reserved.
|
|
// See LICENSE.txt for details.
|
|
//
|
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
// PURPOSE. See the above copyright notice for more information.
|
|
//============================================================================
|
|
#ifndef vtk_m_worklet_DescriptiveStatistics_h
|
|
#define vtk_m_worklet_DescriptiveStatistics_h
|
|
|
|
#include <vtkm/cont/Algorithm.h>
|
|
#include <vtkm/cont/ArrayCopy.h>
|
|
#include <vtkm/cont/ArrayHandleTransform.h>
|
|
#include <vtkm/cont/ArrayHandleZip.h>
|
|
|
|
namespace vtkm
|
|
{
|
|
namespace worklet
|
|
{
|
|
class DescriptiveStatistics
|
|
{
|
|
public:
|
|
template <typename T>
|
|
struct StatState
|
|
{
|
|
VTKM_EXEC_CONT
|
|
StatState()
|
|
: n_(0)
|
|
, min_(std::numeric_limits<T>::max())
|
|
, max_(std::numeric_limits<T>::lowest())
|
|
, sum_(0)
|
|
, mean_(0)
|
|
, M2_(0)
|
|
, M3_(0)
|
|
, M4_(0)
|
|
{
|
|
}
|
|
|
|
VTKM_EXEC_CONT
|
|
StatState(T value)
|
|
: n_(1)
|
|
, min_(value)
|
|
, max_(value)
|
|
, sum_(value)
|
|
, mean_(value)
|
|
, M2_(0)
|
|
, M3_(0)
|
|
, M4_(0)
|
|
{
|
|
}
|
|
|
|
VTKM_EXEC_CONT
|
|
StatState operator+(const StatState<T>& y) const
|
|
{
|
|
const StatState<T>& x = *this;
|
|
StatState result;
|
|
|
|
result.n_ = x.n_ + y.n_;
|
|
|
|
result.min_ = vtkm::Min(x.min_, y.min_);
|
|
result.max_ = vtkm::Max(x.max_, y.max_);
|
|
|
|
// TODO: consider implementing compensated sum
|
|
// https://en.wikipedia.org/wiki/Kahan_summation_algorithm
|
|
result.sum_ = x.sum_ + y.sum_;
|
|
|
|
// It is tempting to try to deviate from the literature and calculate
|
|
// mean in each "reduction" from sum and n. This saves one multiplication.
|
|
// However, RESIST THE TEMPTATION!!! This takes us back to the naive
|
|
// algorithm (mean = sum of a bunch of numbers / N) that actually
|
|
// accumulates more error and causes problem when calculating M2
|
|
// (and thus variance).
|
|
// TODO: Verify that FieldStatistics exhibits the same problem since
|
|
// it is using a "parallel" version of the naive algorithm as well.
|
|
// TODO: or better, just deprecate FieldStatistics.
|
|
T delta = y.mean_ - x.mean_;
|
|
result.mean_ = x.mean_ + delta * y.n_ / result.n_;
|
|
|
|
T delta2 = delta * delta;
|
|
result.M2_ = x.M2_ + y.M2_ + delta2 * x.n_ * y.n_ / result.n_;
|
|
|
|
T delta3 = delta * delta2;
|
|
T n2 = result.n_ * result.n_;
|
|
result.M3_ = x.M3_ + y.M3_;
|
|
result.M3_ += delta3 * x.n_ * y.n_ * (x.n_ - y.n_) / n2;
|
|
result.M3_ += T(3.0) * delta * (x.n_ * y.M2_ - y.n_ * x.M2_) / result.n_;
|
|
|
|
T delta4 = delta2 * delta2;
|
|
T n3 = result.n_ * n2;
|
|
result.M4_ = x.M4_ + y.M4_;
|
|
result.M4_ += delta4 * x.n_ * y.n_ * (x.n_ * x.n_ - x.n_ * y.n_ + y.n_ * y.n_) / n3;
|
|
result.M4_ += T(6.0) * delta2 * (x.n_ * x.n_ * y.M2_ + y.n_ * y.n_ * x.M2_) / n2;
|
|
result.M4_ += T(4.0) * delta * (x.n_ * y.M3_ - y.n_ * x.M3_) / result.n_;
|
|
|
|
return result;
|
|
}
|
|
|
|
VTKM_EXEC_CONT
|
|
T N() const { return this->n_; }
|
|
|
|
VTKM_EXEC_CONT
|
|
T Min() const { return this->min_; }
|
|
|
|
VTKM_EXEC_CONT
|
|
T Max() const { return this->max_; }
|
|
|
|
VTKM_EXEC_CONT
|
|
T Sum() const { return this->sum_; }
|
|
|
|
VTKM_EXEC_CONT
|
|
T Mean() const { return this->mean_; }
|
|
|
|
VTKM_EXEC_CONT
|
|
T SampleStddev() const { return vtkm::Sqrt(this->SampleVariance()); }
|
|
|
|
VTKM_EXEC_CONT
|
|
T PopulationStddev() const { return vtkm::Sqrt(this->PopulationVariance()); }
|
|
|
|
VTKM_EXEC_CONT
|
|
T SampleVariance() const
|
|
{
|
|
VTKM_ASSERT(n_ != 1);
|
|
return this->M2_ / (this->n_ - 1);
|
|
}
|
|
|
|
VTKM_EXEC_CONT
|
|
T PopulationVariance() const { return this->M2_ / this->n_; }
|
|
|
|
VTKM_EXEC_CONT
|
|
T Skewness() const
|
|
{
|
|
if (this->M2_ == 0)
|
|
// Shamelessly swiped from Boost Math
|
|
// The limit is technically undefined, but the interpretation here is clear:
|
|
// A constant dataset has no skewness.
|
|
return T(0);
|
|
else
|
|
return vtkm::Sqrt(this->n_) * this->M3_ / vtkm::Pow(this->M2_, T{ 1.5 });
|
|
}
|
|
|
|
VTKM_EXEC_CONT
|
|
T Kurtosis() const
|
|
{
|
|
if (this->M2_ == 0)
|
|
// Shamelessly swiped from Boost Math
|
|
// The limit is technically undefined, but the interpretation here is clear:
|
|
// A constant dataset has no kurtosis.
|
|
return T(0);
|
|
else
|
|
return this->n_ * this->M4_ / (this->M2_ * this->M2_);
|
|
}
|
|
|
|
private:
|
|
// GCC4.8 is not happy about initializing data members here.
|
|
T n_;
|
|
T min_;
|
|
T max_;
|
|
T sum_;
|
|
T mean_;
|
|
T M2_;
|
|
T M3_;
|
|
T M4_;
|
|
}; // StatState
|
|
|
|
struct MakeStatState
|
|
{
|
|
template <typename T>
|
|
VTKM_EXEC_CONT vtkm::worklet::DescriptiveStatistics::StatState<T> operator()(T value) const
|
|
{
|
|
return vtkm::worklet::DescriptiveStatistics::StatState<T>{ value };
|
|
}
|
|
};
|
|
|
|
/// \brief Calculate various summary statistics for the input ArrayHandle
|
|
///
|
|
/// Reference:
|
|
/// [1] Wikipeida, parallel algorithm for calculating variance
|
|
/// http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
|
|
/// [2] Implementation of [1] in the Trust library
|
|
/// https://github.com/thrust/thrust/blob/master/examples/summary_statistics.cu
|
|
/// [3] Bennett, Janine, et al. "Numerically stable, single-pass, parallel statistics algorithms."
|
|
/// 2009 IEEE International Conference on Cluster Computing and Workshops. IEEE, 2009.
|
|
template <typename FieldType, typename Storage>
|
|
VTKM_CONT static StatState<FieldType> Run(
|
|
const vtkm::cont::ArrayHandle<FieldType, Storage>& field)
|
|
{
|
|
using Algorithm = vtkm::cont::Algorithm;
|
|
|
|
// Essentially a TransformReduce. Do we have that convenience in VTKm?
|
|
auto states = vtkm::cont::make_ArrayHandleTransform(field, MakeStatState{});
|
|
return Algorithm::Reduce(states, StatState<FieldType>{});
|
|
}
|
|
|
|
template <typename KeyType, typename ValueType, typename KeyInStorage, typename ValueInStorage>
|
|
VTKM_CONT static auto Run(const vtkm::cont::ArrayHandle<KeyType, KeyInStorage>& keys,
|
|
const vtkm::cont::ArrayHandle<ValueType, ValueInStorage>& values)
|
|
-> vtkm::cont::ArrayHandleZip<vtkm::cont::ArrayHandle<KeyType>,
|
|
vtkm::cont::ArrayHandle<StatState<ValueType>>>
|
|
{
|
|
using Algorithm = vtkm::cont::Algorithm;
|
|
|
|
// Make a copy of the input arrays so we don't modify them
|
|
vtkm::cont::ArrayHandle<KeyType> keys_copy;
|
|
vtkm::cont::ArrayCopy(keys, keys_copy);
|
|
|
|
vtkm::cont::ArrayHandle<ValueType> values_copy;
|
|
vtkm::cont::ArrayCopy(values, values_copy);
|
|
|
|
// Gather values of the same key by sorting them according to keys
|
|
Algorithm::SortByKey(keys_copy, values_copy);
|
|
|
|
auto states = vtkm::cont::make_ArrayHandleTransform(values_copy, MakeStatState{});
|
|
vtkm::cont::ArrayHandle<KeyType> keys_out;
|
|
|
|
vtkm::cont::ArrayHandle<StatState<ValueType>> results;
|
|
Algorithm::ReduceByKey(keys_copy, states, keys_out, results, vtkm::Add{});
|
|
|
|
return vtkm::cont::make_ArrayHandleZip(keys_out, results);
|
|
}
|
|
}; // DescriptiveStatistics
|
|
|
|
} // worklet
|
|
} // vtkm
|
|
#endif // vtk_m_worklet_DescriptiveStatistics_h
|