diff --git a/vtkm/worklet/DescriptiveStatistics.h b/vtkm/worklet/DescriptiveStatistics.h index ac63d8d76..783485de1 100644 --- a/vtkm/worklet/DescriptiveStatistics.h +++ b/vtkm/worklet/DescriptiveStatistics.h @@ -25,9 +25,19 @@ public: template struct StatState { - StatState() = default; + VTKM_EXEC_CONT + StatState() + : n(0) + , min_(std::numeric_limits::max()) + , max_(std::numeric_limits::lowest()) + , sum(0) + , mean_(0) + , M2(0) + , M3(0) + , M4(0) + { + } - // FIXME: is the constructor actually called on the device side? VTKM_EXEC_CONT StatState(T value) : n(1) @@ -35,6 +45,9 @@ public: , max_(value) , sum(value) , mean_(value) + , M2(0) + , M3(0) + , M4(0) { } @@ -55,7 +68,7 @@ public: // It is tempting to try to deviate from the literature and calculate // mean in each "reduction" from sum and n. This saves one multiplication. - // However, RESIST THE TEMPTATION!!! This takes us back to the two-pass + // However, RESIST THE TEMPTATION!!! This takes us back to the naive // algorithm (mean = sum of a bunch of numbers / N) that actually // accumulates more error and causes problem when calculating M2 // (and thus variance). @@ -115,20 +128,39 @@ public: T PopulationVariance() const { return this->M2 / this->n; } VTKM_CONT - T Skewness() const { return vtkm::Sqrt(this->n) * this->M3 / vtkm::Pow(this->M2, T{ 1.5 }); } + T Skewness() const + { + if (this->M2 == 0) + // Shamelessly swiped from Boost Math + // The limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no skewness. + return T{}; + else + return vtkm::Sqrt(this->n) * this->M3 / vtkm::Pow(this->M2, T{ 1.5 }); + } VTKM_CONT - T Kurtosis() const { return this->n * this->M4 / (this->M2 * this->M2); } + T Kurtosis() const + { + if (this->M2 == 0) + // Shamelessly swiped from Boost Math + // The limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no kurtosis. + return T{}; + else + return this->n * this->M4 / (this->M2 * this->M2); + } private: - T n = T{}; - T min_ = std::numeric_limits::max(); - T max_ = std::numeric_limits::lowest(); - T sum = T{}; - T mean_ = T{}; - T M2 = T{}; - T M3 = T{}; - T M4 = T{}; + // GCC4.8 is not happy about initializing data members here. + T n; + T min_; + T max_; + T sum; + T mean_; + T M2; + T M3; + T M4; }; // StatState struct MakeStatState diff --git a/vtkm/worklet/testing/UnitTestDescriptiveStatistics.cxx b/vtkm/worklet/testing/UnitTestDescriptiveStatistics.cxx index 7718e469b..c68a6ec12 100644 --- a/vtkm/worklet/testing/UnitTestDescriptiveStatistics.cxx +++ b/vtkm/worklet/testing/UnitTestDescriptiveStatistics.cxx @@ -25,7 +25,9 @@ void TestSingle() // TODO: what is VTKm way of saying EXPECT_EXCEPTION? //VTKM_TEST_FAIL(result.sample_variance()); - // TODO: what are the skewness and kurtosis of a single element? Zero, Nada? + // A single number does not have skewness nor kurtosis + VTKM_TEST_ASSERT(result.Skewness() == 0); + VTKM_TEST_ASSERT(result.Kurtosis() == 0); } void TestConstant() @@ -36,6 +38,8 @@ void TestConstant() VTKM_TEST_ASSERT(result.N() == 10000); VTKM_TEST_ASSERT(result.Sum() == 12340000); VTKM_TEST_ASSERT(result.PopulationVariance() == 0); + VTKM_TEST_ASSERT(result.Skewness() == 0); + VTKM_TEST_ASSERT(result.Kurtosis() == 0); } void TestIntegerSequence() @@ -49,6 +53,33 @@ void TestIntegerSequence() VTKM_TEST_ASSERT(result.N() == N); VTKM_TEST_ASSERT(result.Sum() == N * (N - 1) / 2); VTKM_TEST_ASSERT(test_equal(result.Mean(), (N - 1) / 2)); + + // Expected values are from Numpy/SciPy + VTKM_TEST_ASSERT(test_equal(result.PopulationVariance(), 83333.25)); + VTKM_TEST_ASSERT(test_equal(result.Skewness(), 0)); + // We are using the Pearson's definition, with fisher = False when calling + // numpy. + VTKM_TEST_ASSERT(test_equal(result.Kurtosis(), 1.8)); +} + +void TestStandardNormal() +{ + // Draw random numbers from the Standard Normal distribution, with mean = 0, stddev = 1 + std::mt19937 gen(0xceed); + std::normal_distribution dis(0.0f, 1.0f); + + std::vector x(1000000); + std::generate(x.begin(), x.end(), [&gen, &dis]() { return dis(gen); }); + + auto array = vtkm::cont::make_ArrayHandle(x); + auto result = vtkm::worklet::DescriptiveStatistics::Run(array); + + // Variance should be positive + VTKM_TEST_ASSERT(result.SampleVariance() >= 0); + // SampleStddev should be very close to 1.0, Skewness ~= 0 and Kurtosis ~= 3.0 + VTKM_TEST_ASSERT(test_equal(result.SampleStddev(), 1.0f, 1.0f / 100)); + VTKM_TEST_ASSERT(test_equal(result.Skewness(), 0.0f, 1.0 / 100)); + VTKM_TEST_ASSERT(test_equal(result.Kurtosis(), 3.0f, 1.0 / 100)); } void TestCatastrophicCancellation() @@ -194,6 +225,7 @@ void TestDescriptiveStatistics() TestSingle(); TestConstant(); TestIntegerSequence(); + TestStandardNormal(); TestCatastrophicCancellation(); TestGeneGolub(); TestMeanProperties();