258 lines
9.7 KiB
C++
258 lines
9.7 KiB
C++
//============================================================================
|
|
// Copyright (c) Kitware, Inc.
|
|
// All rights reserved.
|
|
// See LICENSE.txt for details.
|
|
//
|
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
// PURPOSE. See the above copyright notice for more information.
|
|
//============================================================================
|
|
|
|
#include <vtkm/cont/testing/Testing.h>
|
|
#include <vtkm/worklet/DescriptiveStatistics.h>
|
|
|
|
#include <random>
|
|
|
|
void TestSingle()
|
|
{
|
|
auto single_array = vtkm::cont::make_ArrayHandle<vtkm::Float32>({ 42 });
|
|
auto result = vtkm::worklet::DescriptiveStatistics::Run(single_array);
|
|
|
|
VTKM_TEST_ASSERT(test_equal(result.N(), 1));
|
|
VTKM_TEST_ASSERT(test_equal(result.Mean(), 42));
|
|
VTKM_TEST_ASSERT(test_equal(result.PopulationVariance(), 0));
|
|
|
|
// A single number does not have skewness nor kurtosis
|
|
VTKM_TEST_ASSERT(test_equal(result.Skewness(), 0));
|
|
VTKM_TEST_ASSERT(test_equal(result.Kurtosis(), 0));
|
|
}
|
|
|
|
void TestConstant()
|
|
{
|
|
auto constants = vtkm::cont::make_ArrayHandleConstant(1234.f, 10000);
|
|
auto result = vtkm::worklet::DescriptiveStatistics::Run(constants);
|
|
|
|
VTKM_TEST_ASSERT(test_equal(result.N(), 10000));
|
|
VTKM_TEST_ASSERT(test_equal(result.Sum(), 12340000));
|
|
VTKM_TEST_ASSERT(test_equal(result.PopulationVariance(), 0));
|
|
VTKM_TEST_ASSERT(test_equal(result.Skewness(), 0));
|
|
VTKM_TEST_ASSERT(test_equal(result.Kurtosis(), 0));
|
|
}
|
|
|
|
void TestIntegerSequence()
|
|
{
|
|
// We only have 23 bits for FloatInt in Float32. This limits N to 11 bits.
|
|
constexpr vtkm::Float32 N = 1000;
|
|
|
|
auto integers =
|
|
vtkm::cont::ArrayHandleCounting<vtkm::Float32>(0.0f, 1.0f, static_cast<vtkm::Id>(N));
|
|
auto result = vtkm::worklet::DescriptiveStatistics::Run(integers);
|
|
|
|
VTKM_TEST_ASSERT(test_equal(result.N(), N));
|
|
VTKM_TEST_ASSERT(test_equal(result.Sum(), N * (N - 1) / 2));
|
|
VTKM_TEST_ASSERT(test_equal(result.Mean(), (N - 1) / 2));
|
|
|
|
// Expected values are from Numpy/SciPy
|
|
VTKM_TEST_ASSERT(test_equal(result.PopulationVariance(), 83333.25));
|
|
VTKM_TEST_ASSERT(test_equal(result.Skewness(), 0));
|
|
// We are using the Pearson's definition, with fisher = False when calling
|
|
// numpy.
|
|
VTKM_TEST_ASSERT(test_equal(result.Kurtosis(), 1.8));
|
|
}
|
|
|
|
void TestStandardNormal()
|
|
{
|
|
// Draw random numbers from the Standard Normal distribution, with mean = 0, stddev = 1
|
|
std::mt19937 gen(0xceed);
|
|
std::normal_distribution<vtkm::Float32> dis(0.0f, 1.0f);
|
|
|
|
std::vector<vtkm::Float32> x(1000000);
|
|
std::generate(x.begin(), x.end(), [&gen, &dis]() { return dis(gen); });
|
|
|
|
auto array = vtkm::cont::make_ArrayHandle(x, vtkm::CopyFlag::Off);
|
|
auto result = vtkm::worklet::DescriptiveStatistics::Run(array);
|
|
|
|
// Variance should be positive
|
|
VTKM_TEST_ASSERT(result.SampleVariance() >= 0);
|
|
// SampleStddev should be very close to 1.0, Skewness ~= 0 and Kurtosis ~= 3.0
|
|
VTKM_TEST_ASSERT(test_equal(result.SampleStddev(), 1.0f, 1.0f / 100));
|
|
VTKM_TEST_ASSERT(test_equal(result.Skewness(), 0.0f, 1.0f / 100));
|
|
VTKM_TEST_ASSERT(test_equal(result.Kurtosis(), 3.0f, 1.0f / 100));
|
|
}
|
|
|
|
void TestCatastrophicCancellation()
|
|
{
|
|
// Good examples of the effect of catastrophic cancellation from Wikipedia.
|
|
auto arrayOK =
|
|
vtkm::cont::make_ArrayHandle<vtkm::Float64>({ 1e8 + 4, 1e8 + 7, 1e8 + 13, 1.0e8 + 16 });
|
|
auto resultOK = vtkm::worklet::DescriptiveStatistics::Run(arrayOK);
|
|
|
|
VTKM_TEST_ASSERT(test_equal(resultOK.N(), 4));
|
|
VTKM_TEST_ASSERT(test_equal(resultOK.Sum(), 4.0e8 + 40));
|
|
VTKM_TEST_ASSERT(test_equal(resultOK.Min(), 1.0e8 + 4));
|
|
VTKM_TEST_ASSERT(test_equal(resultOK.Max(), 1.0e8 + 16));
|
|
VTKM_TEST_ASSERT(test_equal(resultOK.SampleVariance(), 30));
|
|
VTKM_TEST_ASSERT(test_equal(resultOK.PopulationVariance(), 22.5));
|
|
|
|
// Bad examples of the effect of catastrophic cancellation from Wikipedia.
|
|
// A naive algorithm will fail in calculating the correct variance
|
|
auto arrayEvil =
|
|
vtkm::cont::make_ArrayHandle<vtkm::Float64>({ 1e9 + 4, 1e9 + 7, 1e9 + 13, 1.0e9 + 16 });
|
|
auto resultEvil = vtkm::worklet::DescriptiveStatistics::Run(arrayEvil);
|
|
|
|
VTKM_TEST_ASSERT(test_equal(resultEvil.N(), 4));
|
|
VTKM_TEST_ASSERT(test_equal(resultEvil.Sum(), 4.0e9 + 40));
|
|
VTKM_TEST_ASSERT(test_equal(resultEvil.Min(), 1.0e9 + 4));
|
|
VTKM_TEST_ASSERT(test_equal(resultEvil.Max(), 1.0e9 + 16));
|
|
VTKM_TEST_ASSERT(test_equal(resultEvil.SampleVariance(), 30));
|
|
VTKM_TEST_ASSERT(test_equal(resultEvil.PopulationVariance(), 22.5));
|
|
}
|
|
|
|
void TestGeneGolub()
|
|
{
|
|
// Bad case example proposed by Gene Golub, the variance may come out
|
|
// as negative due to numerical precision. Thanks to Nick Thompson for
|
|
// providing this unit test.
|
|
|
|
// Draw random numbers from the Normal distribution, with mean = 500, stddev = 0.01
|
|
std::mt19937 gen(0xceed);
|
|
std::normal_distribution<vtkm::Float32> dis(500.0f, 0.01f);
|
|
|
|
std::vector<vtkm::Float32> v(50000);
|
|
for (float& i : v)
|
|
{
|
|
i = dis(gen);
|
|
}
|
|
|
|
auto array = vtkm::cont::make_ArrayHandle(v, vtkm::CopyFlag::Off);
|
|
auto result = vtkm::worklet::DescriptiveStatistics::Run(array);
|
|
|
|
// Variance should be positive
|
|
VTKM_TEST_ASSERT(result.SampleVariance() >= 0);
|
|
}
|
|
|
|
void TestMeanProperties()
|
|
{
|
|
// Draw random numbers from the Normal distribution, with mean = 500, stddev = 0.01
|
|
std::mt19937 gen(0xceed);
|
|
std::normal_distribution<vtkm::Float32> dis(500.0f, 0.01f);
|
|
|
|
std::vector<vtkm::Float32> x(50000);
|
|
std::generate(x.begin(), x.end(), [&gen, &dis]() { return dis(gen); });
|
|
|
|
// 1. Linearity, Mean(a * x + b) = a * Mean(x) + b
|
|
std::vector<vtkm::Float32> axpb(x.size());
|
|
std::transform(
|
|
x.begin(), x.end(), axpb.begin(), [](vtkm::Float32 value) { return 4.0f * value + 1000.f; });
|
|
|
|
auto x_array = vtkm::cont::make_ArrayHandle(x, vtkm::CopyFlag::Off);
|
|
auto axpb_array = vtkm::cont::make_ArrayHandle(axpb, vtkm::CopyFlag::Off);
|
|
|
|
auto mean_x = vtkm::worklet::DescriptiveStatistics::Run(x_array).Mean();
|
|
auto mean_axpb = vtkm::worklet::DescriptiveStatistics::Run(axpb_array).Mean();
|
|
|
|
VTKM_TEST_ASSERT(test_equal(4.0f * mean_x + 1000.f, mean_axpb, 0.01f));
|
|
|
|
// 2. Random shuffle
|
|
std::vector<vtkm::Float32> px = x;
|
|
std::shuffle(px.begin(), px.end(), gen);
|
|
|
|
auto px_array = vtkm::cont::make_ArrayHandle(px, vtkm::CopyFlag::Off);
|
|
auto mean_px = vtkm::worklet::DescriptiveStatistics::Run(px_array).Mean();
|
|
|
|
VTKM_TEST_ASSERT(test_equal(mean_x, mean_px, 0.01f));
|
|
}
|
|
|
|
void TestVarianceProperty()
|
|
{
|
|
// Draw random numbers from the Normal distribution, with mean = 500, stddev = 0.01
|
|
std::mt19937 gen(0xceed);
|
|
std::normal_distribution<vtkm::Float32> dis(500.0f, 0.01f);
|
|
|
|
std::vector<vtkm::Float32> v(50000);
|
|
std::generate(v.begin(), v.end(), [&gen, &dis]() { return dis(gen); });
|
|
|
|
// 1. Linearity, Var(a * x + b) = a^2 * Var(x)
|
|
std::vector<vtkm::Float32> kv(v.size());
|
|
std::transform(
|
|
v.begin(), v.end(), kv.begin(), [](vtkm::Float32 value) { return 4.0f * value + 5.0f; });
|
|
|
|
auto array_v = vtkm::cont::make_ArrayHandle(v, vtkm::CopyFlag::Off);
|
|
auto array_kv = vtkm::cont::make_ArrayHandle(kv, vtkm::CopyFlag::Off);
|
|
auto result_v = vtkm::worklet::DescriptiveStatistics::Run(array_v);
|
|
auto result_kv = vtkm::worklet::DescriptiveStatistics::Run(array_kv);
|
|
auto mean_v = result_v.Mean();
|
|
auto mean_kv = result_kv.Mean();
|
|
auto var_v = result_v.SampleVariance();
|
|
auto var_kv = result_kv.SampleVariance();
|
|
|
|
vtkm::Float32 condition_number_kv = 0;
|
|
auto rp = array_kv.ReadPortal();
|
|
for (vtkm::Id i = 0; i < rp.GetNumberOfValues(); ++i)
|
|
{
|
|
condition_number_kv += vtkm::Abs(rp.Get(i) - mean_kv) * vtkm::Abs(rp.Get(i));
|
|
}
|
|
condition_number_kv *= (2.0f / (static_cast<float>(rp.GetNumberOfValues() - 1) * var_kv));
|
|
VTKM_TEST_ASSERT(test_equal(var_kv,
|
|
4.0 * 4.0 * var_v,
|
|
condition_number_kv * std::numeric_limits<vtkm::Float32>::epsilon()));
|
|
|
|
// Random shuffle
|
|
std::vector<vtkm::Float32> px = v;
|
|
std::shuffle(px.begin(), px.end(), gen);
|
|
|
|
auto px_array = vtkm::cont::make_ArrayHandle(px, vtkm::CopyFlag::Off);
|
|
auto var_px = vtkm::worklet::DescriptiveStatistics::Run(px_array).SampleVariance();
|
|
|
|
vtkm::Float32 condition_number_v = 0;
|
|
rp = px_array.ReadPortal();
|
|
for (vtkm::Id i = 0; i < rp.GetNumberOfValues(); ++i)
|
|
{
|
|
condition_number_v += vtkm::Abs(rp.Get(i) - mean_v) * vtkm::Abs(rp.Get(i));
|
|
}
|
|
condition_number_v *= (2.0f / (static_cast<float>(rp.GetNumberOfValues() - 1) * var_v));
|
|
|
|
VTKM_TEST_ASSERT(
|
|
test_equal(var_v, var_px, condition_number_v * std::numeric_limits<vtkm::Float32>::epsilon()));
|
|
}
|
|
|
|
void TestMomentsByKey()
|
|
{
|
|
auto keys_array = vtkm::cont::make_ArrayHandle<vtkm::UInt32>({ 0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4 });
|
|
auto values_array = vtkm::cont::make_ArrayHandleConstant(1.0f, keys_array.GetNumberOfValues());
|
|
|
|
auto results = vtkm::worklet::DescriptiveStatistics::Run(keys_array, values_array);
|
|
VTKM_TEST_ASSERT(results.GetNumberOfValues() == 5);
|
|
|
|
std::vector<vtkm::UInt32> expected_ns{ 1, 1, 2, 3, 4 };
|
|
std::vector<vtkm::Float32> expected_sums{ 1, 1, 2, 3, 4 };
|
|
std::vector<vtkm::Float32> expected_means{ 1, 1, 1, 1, 1 };
|
|
|
|
auto resultsPortal = results.ReadPortal();
|
|
for (vtkm::Id i = 0; i < results.GetNumberOfValues(); ++i)
|
|
{
|
|
auto result = resultsPortal.Get(i);
|
|
VTKM_TEST_ASSERT(test_equal(result.first, i));
|
|
VTKM_TEST_ASSERT(test_equal(result.second.N(), expected_ns[static_cast<std::size_t>(i)]));
|
|
VTKM_TEST_ASSERT(test_equal(result.second.PopulationVariance(), 0));
|
|
}
|
|
}
|
|
|
|
void TestDescriptiveStatistics()
|
|
{
|
|
TestSingle();
|
|
TestConstant();
|
|
TestIntegerSequence();
|
|
TestStandardNormal();
|
|
TestCatastrophicCancellation();
|
|
TestGeneGolub();
|
|
TestMeanProperties();
|
|
TestVarianceProperty();
|
|
TestMomentsByKey();
|
|
}
|
|
|
|
int UnitTestDescriptiveStatistics(int argc, char* argv[])
|
|
{
|
|
return vtkm::cont::testing::Testing::Run(TestDescriptiveStatistics, argc, argv);
|
|
}
|