diff --git a/include/xsf/stats.h b/include/xsf/stats.h index 2c78c9aed..9e65c5a52 100644 --- a/include/xsf/stats.h +++ b/include/xsf/stats.h @@ -300,6 +300,85 @@ inline float tukeylambdacdf(float x, double lmbda) { return tukeylambdacdf(static_cast(x), static_cast(lmbda)); } +inline std::vector wilcoxon_pmf(int n) { + // PMF of the Wilcoxon signed-rank statistic. + // The returned vector has length n*(n+1)/2 + 1, and the k-th element contains + // the probability of observing a test statistic equal to k for n pairs of + // observations. + std::vector pmf = {1.0}; + for (int k = 1; k < n + 1; k++) { + std::vector tmp(k * (k + 1) / 2 + 1, 0.0); + int m = static_cast(pmf.size()); + for (int i = 0; i < m; i++) { + tmp[i] += 0.5 * pmf[i]; + tmp[i + k] += 0.5 * pmf[i]; + } + pmf = std::move(tmp); + } + return pmf; +} + +inline std::vector wilcoxon_cdf_table(const std::vector &pmf) { + // Cumulative probabilities for CDF/SF queries. + std::vector cdf_table(pmf.size()); + double sum = 0.0; + for (int i = 0; i < static_cast(pmf.size()); ++i) { + sum += pmf[i]; + cdf_table[i] = sum; + } + return cdf_table; +} + +inline double wilcoxon_cdf1(int k, const std::vector &cdf_table, int n) { + const int max_k = n * (n + 1) / 2; + if (k < 0) { + return 0.0; + } + if (k >= max_k) { + return 1.0; + } + return cdf_table[k]; +} + +inline double wilcoxon_sf1(int k, const std::vector &cdf_table, int n) { + const int max_k = n * (n + 1) / 2; + if (k < 0) { + return 1.0; + } + if (k > max_k) { + return 0.0; + } + return 1.0 - (k > 0 ? cdf_table[k - 1] : 0.0); +} + +inline double wilcoxon_cdf(int k, const std::vector &cdf_table, int n) { + // CDF of the Wilcoxon signed-rank statistic for integer k and n pairs of observations. + const double mn = n * (n + 1) / 4.0; + return (k <= mn) ? wilcoxon_cdf1(k, cdf_table, n) : 1.0 - wilcoxon_sf1(k + 1, cdf_table, n); +} + +inline double wilcoxon_cdf(double k, const std::vector &cdf_table, int n) { + // CDF of the Wilcoxon signed-rank statistic for real k and n pairs of observations. + if (std::isnan(k) || n < 0) { + return std::numeric_limits::quiet_NaN(); + } + return wilcoxon_cdf(static_cast(k), cdf_table, n); +} + +inline double wilcoxon_sf(int k, const std::vector &cdf_table, int n) { + // SF of the Wilcoxon signed-rank statistic for integer k and n pairs of observations. + const double mn = n * (n + 1) / 4.0; + return (k <= mn) ? wilcoxon_sf1(k, cdf_table, n) : 1.0 - wilcoxon_cdf1(k - 1, cdf_table, n); +} + +inline double wilcoxon_sf(double k, const std::vector &cdf_table, int n) { + // SF of the Wilcoxon signed-rank statistic for real k and n pairs of observations. + if (std::isnan(k) || n < 0) { + return std::numeric_limits::quiet_NaN(); + } + return wilcoxon_sf(static_cast(k), cdf_table, n); +} + namespace detail { template diff --git a/tests/xsf_tests/test_wilcoxon.cpp b/tests/xsf_tests/test_wilcoxon.cpp new file mode 100644 index 000000000..71f32af54 --- /dev/null +++ b/tests/xsf_tests/test_wilcoxon.cpp @@ -0,0 +1,132 @@ +#include "../testing_utils.h" +#include + +// Values computed from scipy.stats._wilcoxon +// +// # Example for CDF and n = 5 +// from scipy.stats._wilcoxon import WilcoxonDistribution +// import numpy as np +// np.set_printoptions(precision=17) +// n = 5 +// wd = WilcoxonDistribution(n) +// wd.cdf([5, 6, 7, 8, 9]) + +TEST_CASE("wilcoxon_cdf exact small cases", "[wilcoxon_cdf]") { + { + // n = 0 + const std::vector pmf = xsf::wilcoxon_pmf(0); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + REQUIRE(xsf::wilcoxon_cdf(0.0, cdf_table, 0) == 1.0); + } + + { // n = 1 + const std::vector pmf = xsf::wilcoxon_pmf(1); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + REQUIRE(xsf::wilcoxon_cdf(0.0, cdf_table, 1) == 0.5); + REQUIRE(xsf::wilcoxon_cdf(1.0, cdf_table, 1) == 1.0); + } + + { // n = 2 + const std::vector pmf = xsf::wilcoxon_pmf(2); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + REQUIRE(xsf::wilcoxon_cdf(1.0, cdf_table, 2) == 0.5); + REQUIRE(xsf::wilcoxon_cdf(2.0, cdf_table, 2) == 0.75); + REQUIRE(xsf::wilcoxon_cdf(3.0, cdf_table, 2) == 1.0); + REQUIRE(xsf::wilcoxon_cdf(4.0, cdf_table, 2) == 1.0); + } + + { // n = 3 + const std::vector pmf = xsf::wilcoxon_pmf(3); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + REQUIRE(xsf::wilcoxon_cdf(3.0, cdf_table, 3) == 0.625); + REQUIRE(xsf::wilcoxon_cdf(4.0, cdf_table, 3) == 0.75); + REQUIRE(xsf::wilcoxon_cdf(5.0, cdf_table, 3) == 0.875); + REQUIRE(xsf::wilcoxon_cdf(6.0, cdf_table, 3) == 1.0); + REQUIRE(xsf::wilcoxon_cdf(7.0, cdf_table, 3) == 1.0); + } + + { // n = 5 + const std::vector pmf = xsf::wilcoxon_pmf(5); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + REQUIRE(xsf::wilcoxon_cdf(5.0, cdf_table, 5) == 0.3125); + REQUIRE(xsf::wilcoxon_cdf(6.0, cdf_table, 5) == 0.40625); + REQUIRE(xsf::wilcoxon_cdf(7.0, cdf_table, 5) == 0.5); + REQUIRE(xsf::wilcoxon_cdf(8.0, cdf_table, 5) == 0.59375); + REQUIRE(xsf::wilcoxon_cdf(9.0, cdf_table, 5) == 0.6875); + } +} + +TEST_CASE("wilcoxon_cdf edge cases", "[wilcoxon_cdf]") { + const std::vector pmf = xsf::wilcoxon_pmf(2); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + + REQUIRE(xsf::wilcoxon_cdf(4.0, cdf_table, 2) == 1.0); + + REQUIRE(xsf::wilcoxon_cdf(1.9, cdf_table, 2) == xsf::wilcoxon_cdf(1.0, cdf_table, 2)); + REQUIRE(xsf::wilcoxon_cdf(-0.5, cdf_table, 2) == xsf::wilcoxon_cdf(0.0, cdf_table, 2)); + + REQUIRE(std::isnan(xsf::wilcoxon_cdf(0.0, cdf_table, -1))); + REQUIRE(std::isnan(xsf::wilcoxon_cdf(std::numeric_limits::quiet_NaN(), cdf_table, 2))); +} + +TEST_CASE("wilcoxon_sf exact small cases", "[wilcoxon_sf]") { + { + // n = 0 + const std::vector pmf = xsf::wilcoxon_pmf(0); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + REQUIRE(xsf::wilcoxon_sf(0.0, cdf_table, 0) == 1.0); + REQUIRE(xsf::wilcoxon_sf(1.0, cdf_table, 0) == 0.0); + } + + { // n = 1 + const std::vector pmf = xsf::wilcoxon_pmf(1); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + REQUIRE(xsf::wilcoxon_sf(0.0, cdf_table, 1) == 1.0); + REQUIRE(xsf::wilcoxon_sf(1.0, cdf_table, 1) == 0.5); + REQUIRE(xsf::wilcoxon_sf(2.0, cdf_table, 1) == 0.0); + } + + { // n = 2 + const std::vector pmf = xsf::wilcoxon_pmf(2); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + REQUIRE(xsf::wilcoxon_sf(0.0, cdf_table, 2) == 1.0); + REQUIRE(xsf::wilcoxon_sf(1.0, cdf_table, 2) == 0.75); + REQUIRE(xsf::wilcoxon_sf(2.0, cdf_table, 2) == 0.5); + REQUIRE(xsf::wilcoxon_sf(3.0, cdf_table, 2) == 0.25); + REQUIRE(xsf::wilcoxon_sf(4.0, cdf_table, 2) == 0.0); + } + + { // n = 3 + const std::vector pmf = xsf::wilcoxon_pmf(3); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + REQUIRE(xsf::wilcoxon_sf(3.0, cdf_table, 3) == 0.625); + REQUIRE(xsf::wilcoxon_sf(4.0, cdf_table, 3) == 0.375); + REQUIRE(xsf::wilcoxon_sf(5.0, cdf_table, 3) == 0.25); + REQUIRE(xsf::wilcoxon_sf(6.0, cdf_table, 3) == 0.125); + REQUIRE(xsf::wilcoxon_sf(7.0, cdf_table, 3) == 0.0); + } + + { // n = 5 + const std::vector pmf = xsf::wilcoxon_pmf(5); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + REQUIRE(xsf::wilcoxon_sf(5.0, cdf_table, 5) == 0.78125); + REQUIRE(xsf::wilcoxon_sf(6.0, cdf_table, 5) == 0.6875); + REQUIRE(xsf::wilcoxon_sf(7.0, cdf_table, 5) == 0.59375); + REQUIRE(xsf::wilcoxon_sf(8.0, cdf_table, 5) == 0.5); + REQUIRE(xsf::wilcoxon_sf(9.0, cdf_table, 5) == 0.40625); + } +} + +TEST_CASE("wilcoxon_sf edge cases", "[wilcoxon_sf]") { + const std::vector pmf = xsf::wilcoxon_pmf(2); + const std::vector cdf_table = xsf::wilcoxon_cdf_table(pmf); + + REQUIRE(xsf::wilcoxon_sf(-1.0, cdf_table, 2) == 1.0); + REQUIRE(xsf::wilcoxon_sf(5.0, cdf_table, 2) == 0.0); + + REQUIRE(xsf::wilcoxon_sf(2.9, cdf_table, 2) == xsf::wilcoxon_sf(2.0, cdf_table, 2)); + REQUIRE(xsf::wilcoxon_sf(-0.5, cdf_table, 2) == xsf::wilcoxon_sf(0.0, cdf_table, 2)); + + REQUIRE(std::isnan(xsf::wilcoxon_sf(0.0, cdf_table, -1))); + REQUIRE(std::isnan(xsf::wilcoxon_sf(std::numeric_limits::quiet_NaN(), cdf_table, 2))); +}