Skip to content

Commit

Permalink
Rework MannWhitneyTest
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreyAkinshin committed Mar 7, 2024
1 parent acbfbe1 commit 4d217b1
Show file tree
Hide file tree
Showing 18 changed files with 482 additions and 399 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public void BrunnerMunzelTest05()
private void CheckGreater(double[] x, double[] y, double w, double df, double pValue)
{
var threshold = AbsoluteThreshold.Zero;
var result = BrunnerMunzelTest.Instance.Run(x.ToSample(), y.ToSample(), AlternativeHypothesis.Greater, threshold);
var result = BrunnerMunzelTest.Instance.Perform(x.ToSample(), y.ToSample(), AlternativeHypothesis.Greater, threshold);
if (result == null)
throw new NullReferenceException($"{nameof(BrunnerMunzelTest)} returned null");
output.WriteLine("W = " + result.W + " (Expected: " + w + ")");
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace Perfolizer.Tests.Mathematics.SignificanceTesting;
public class WelchTests
{
private static readonly AbsoluteEqualityComparer EqualityComparer = new(1e-5);

private readonly ITestOutputHelper output;

public WelchTests(ITestOutputHelper output) => this.output = output;
Expand Down Expand Up @@ -244,7 +244,7 @@ private void CheckGreater(double[] x, double[] y, Threshold threshold, double t,
private void Check(double[] x, double[] y, Threshold threshold, double t, double df, double pValue,
AlternativeHypothesis alternativeHypothesis)
{
var welch = WelchTest.Instance.Run(x.ToSample(), y.ToSample(), alternativeHypothesis, threshold);
var welch = WelchTest.Instance.Perform(x.ToSample(), y.ToSample(), alternativeHypothesis, threshold);
output.WriteLine("Alternative = " + alternativeHypothesis);
output.WriteLine("T = " + welch.T);
output.WriteLine("Df = " + welch.Df);
Expand Down
5 changes: 4 additions & 1 deletion src/Perfolizer/Perfolizer.sln.DotSettings
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,19 @@
<s:Int64 x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/BLANK_LINES_AFTER_BLOCK_STATEMENTS/@EntryValue">0</s:Int64>
<s:Int64 x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/INDENT_SIZE/@EntryValue">4</s:Int64>
<s:Boolean x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/KEEP_EXISTING_ATTRIBUTE_ARRANGEMENT/@EntryValue">True</s:Boolean>
<s:Boolean x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/WRAP_BEFORE_COMMA/@EntryValue">True</s:Boolean>
<s:Boolean x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/WRAP_BEFORE_COMMA/@EntryValue">False</s:Boolean>
<s:Int64 x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/WRAP_LIMIT/@EntryValue">120</s:Int64>
<s:String x:Key="/Default/CodeStyle/CSharpVarKeywordUsage/ForBuiltInTypes/@EntryValue">UseExplicitType</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/PredefinedNamingRules/=PrivateInstanceFields/@EntryIndexedValue">&lt;Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /&gt;</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/PredefinedNamingRules/=PrivateStaticFields/@EntryIndexedValue">&lt;Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /&gt;</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/UserRules/=4a98fdf6_002D7d98_002D4f5a_002Dafeb_002Dea44ad98c70c/@EntryIndexedValue">&lt;Policy&gt;&lt;Descriptor Staticness="Instance" AccessRightKinds="Private" Description="Instance fields (private)"&gt;&lt;ElementKinds&gt;&lt;Kind Name="FIELD" /&gt;&lt;Kind Name="READONLY_FIELD" /&gt;&lt;/ElementKinds&gt;&lt;/Descriptor&gt;&lt;Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /&gt;&lt;/Policy&gt;</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/UserRules/=f9fce829_002De6f4_002D4cb2_002D80f1_002D5497c44f51df/@EntryIndexedValue">&lt;Policy&gt;&lt;Descriptor Staticness="Static" AccessRightKinds="Private" Description="Static fields (private)"&gt;&lt;ElementKinds&gt;&lt;Kind Name="FIELD" /&gt;&lt;/ElementKinds&gt;&lt;/Descriptor&gt;&lt;Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /&gt;&lt;/Policy&gt;</s:String>
<s:Boolean x:Key="/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EFeature_002EServices_002EDaemon_002ESettings_002EMigration_002ESwaWarningsModeSettingsMigrate/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EPsi_002ECSharp_002ECodeStyle_002ECSharpKeepExistingMigration/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EPsi_002ECSharp_002ECodeStyle_002ECSharpPlaceEmbeddedOnSameLineMigration/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EPsi_002ECSharp_002ECodeStyle_002ECSharpUseContinuousIndentInsideBracesMigration/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EPsi_002ECSharp_002ECodeStyle_002ESettingsUpgrade_002EMigrateBlankLinesAroundFieldToBlankLinesAroundProperty/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EPsi_002ECSharp_002ECodeStyle_002ESettingsUpgrade_002EPredefinedNamingRulesToUserRulesUpgrade/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Abramowitz/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Akinshin/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Algo/@EntryIndexedValue">True</s:Boolean>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ internal static class MathExtensions
{
public static double Sqr(this double x) => x * x;
public static double Sqrt(this double x) => Math.Sqrt(x);
public static double Pow(this int x, double k) => Math.Pow(x, k);
public static double Pow(this double x, double k) => Math.Pow(x, k);
public static double Clamp(this double x, double min, double max) => Math.Min(Math.Max(x, min), max);
public static int Clamp(this int x, int min, int max) => Math.Min(Math.Max(x, min), max);
public static int RoundToInt(this double x) => (int)Round(x);

public static IEnumerable<double> Clamp(this IEnumerable<double> values, double min, double max)
=> values.Select(x => Clamp(x, min, max));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ namespace Perfolizer.Mathematics.SignificanceTesting.Base;

public interface ISignificanceTwoSampleTest<out T> where T : SignificanceTwoSampleResult
{
T Run(Sample x, Sample y, AlternativeHypothesis alternativeHypothesis = AlternativeHypothesis.Greater, Threshold? threshold = null);
T Perform(Sample x, Sample y, AlternativeHypothesis alternative, Threshold threshold);
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ private BrunnerMunzelTest()
{
}

public BrunnerMunzelResult Run(Sample x, Sample y, AlternativeHypothesis alternativeHypothesis = AlternativeHypothesis.Greater,
public BrunnerMunzelResult Perform(Sample x, Sample y, AlternativeHypothesis alternative = AlternativeHypothesis.Greater,
Threshold? threshold = null)
{
Assertion.NotNullOrEmpty(nameof(x), x);
Expand Down Expand Up @@ -61,25 +61,25 @@ public BrunnerMunzelResult Run(Sample x, Sample y, AlternativeHypothesis alterna
return Result(0.5, 0, double.NaN);
double w = diff > 0 ? double.PositiveInfinity : double.NegativeInfinity;

return alternativeHypothesis switch
return alternative switch
{
AlternativeHypothesis.TwoSides => Result(0, w, double.NaN),
AlternativeHypothesis.Less => Result(rxMean > ryMean ? 1 : 0, w, double.NaN),
AlternativeHypothesis.Greater => Result(rxMean < ryMean ? 1 : 0, w, double.NaN),
_ => throw new ArgumentOutOfRangeException(nameof(alternativeHypothesis), alternativeHypothesis, null)
_ => throw new ArgumentOutOfRangeException(nameof(alternative), alternative, null)
};
}
else
{
double w = (rxMean - ryMean) / Sqrt(sigma2 * (n + m));
double df = (sx2 / m + sy2 / n).Sqr() / ((sx2 / m).Sqr() / (n - 1) + (sy2 / n).Sqr() / (m - 1));
double cdf = new StudentDistribution(df).Cdf(w);
double pValue = SignificanceTestHelper.CdfToPValue(cdf, alternativeHypothesis);
double pValue = SignificanceTestHelper.CdfToPValue(cdf, alternative);

return Result(pValue, w, df);
}

BrunnerMunzelResult Result(double pValueResult, double wResult, double dfResult) =>
new(x, y, threshold, alternativeHypothesis, pValueResult, wResult, dfResult);
new(x, y, threshold, alternative, pValueResult, wResult, dfResult);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace Perfolizer.Mathematics.SignificanceTesting.MannWhitney;

public interface IMannWhitneyCdf
{
double Cdf(int n, int m, int u);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
using Perfolizer.Mathematics.Common;

namespace Perfolizer.Mathematics.SignificanceTesting.MannWhitney;

public class MannWhitneyClassicExactCdf : IMannWhitneyCdf
{
public static readonly MannWhitneyClassicExactCdf Instance = new();

public double Cdf(int n, int m, int u)
{
u -= 1;

int q = (int)Floor(u + 1e-9);
int nm = Max(n, m);
long[,,] w = new long[nm + 1, nm + 1, q + 1];
for (int i = 0; i <= nm; i++)
for (int j = 0; j <= nm; j++)
for (int k = 0; k <= q; k++)
{
if (i == 0 || j == 0 || k == 0)
w[i, j, k] = k == 0 ? 1 : 0;
else if (k > i * j)
w[i, j, k] = 0;
else if (i > j)
w[i, j, k] = w[j, i, k];
else if (j > 0 && k < j)
w[i, j, k] = w[i, k, k];
else
w[i, j, k] = w[i - 1, j, k - j] + w[i, j - 1, k];
}

long denominator = BinomialCoefficientHelper.BinomialCoefficient(n + m, m);
long p = 0;
if (q <= n * m / 2)
{
for (int i = 0; i <= q; i++)
p += w[n, m, i];
}
else
{
q = n * m - q;
for (int i = 0; i < q; i++)
p += w[n, m, i];
p = denominator - p;
}

return p * 1.0 / denominator;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
using System.Diagnostics.CodeAnalysis;
using Perfolizer.Mathematics.Common;
using Perfolizer.Mathematics.Distributions.ContinuousDistributions;
using Perfolizer.Mathematics.Functions;

namespace Perfolizer.Mathematics.SignificanceTesting.MannWhitney;

/// <summary>
/// https://aakinshin.net/posts/mw-edgeworth2/
/// </summary>
public class MannWhitneyEdgeworthApproxCdf : IMannWhitneyCdf
{
public static readonly MannWhitneyEdgeworthApproxCdf Instance = new();

[SuppressMessage("ReSharper", "InconsistentNaming")]
public double Cdf(int n, int m, int u)
{
double mu = n * m / 2.0;
double su = Sqrt(n * m * (n + m + 1) / 12.0);
double z = (u - mu - 0.5) / su;
double phi = NormalDistribution.Standard.Pdf(z);
double Phi = NormalDistribution.Standard.Cdf(z);

double mu2 = n * m * (n + m + 1) / 12.0;
double mu4 =
n * m * (n + m + 1) *
(0
+ 5 * m * n * (m + n)
- 2 * (m.Pow(2) + n.Pow(2))
+ 3 * m * n
- 2 * (n + m)
) / 240.0;

double mu6 =
n * m * (n + m + 1) *
(0
+ 35 * m.Pow(2) * n.Pow(2) * (m.Pow(2) + n.Pow(2))
+ 70 * m.Pow(3) * n.Pow(3)
- 42 * m * n * (m.Pow(3) + n.Pow(3))
- 14 * m.Pow(2) * n.Pow(2) * (n + m)
+ 16 * (n.Pow(4) + m.Pow(4))
- 52 * n * m * (n.Pow(2) + m.Pow(2))
- 43 * n.Pow(2) * m.Pow(2)
+ 32 * (m.Pow(3) + n.Pow(3))
+ 14 * m * n * (n + m)
+ 8 * (n.Pow(2) + m.Pow(2))
+ 16 * n * m
- 8 * (n + m)
) / 4032.0;

double e3 = (mu4 / Pow(mu2, 2) - 3) / Factorial(4);
double e5 = (mu6 / Pow(mu2, 3) - 15 * mu4 / Pow(mu2, 2) + 30) / Factorial(6);
double e7 = 35 * Pow((mu4 / Pow(mu2, 2) - 3), 2) / Factorial(8);

double f3 = -phi * H3(z);
double f5 = -phi * H5(z);
double f7 = -phi * H7(z);

double edgeworth = Phi + e3 * f3 + e5 * f5 + e7 * f7;
return Min(Max(edgeworth, 0), 1);

double H3(double x) => x.Pow(3) - 3 * x;
double H5(double x) => x.Pow(5) - 10 * x.Pow(3) + 15 * x;
double H7(double x) => x.Pow(7) - 21 * x.Pow(5) + 105 * x.Pow(3) - 105 * x;
double Factorial(int x) => FactorialFunction.Value(x);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
using Perfolizer.Mathematics.Common;

namespace Perfolizer.Mathematics.SignificanceTesting.MannWhitney;

/// <summary>
/// https://aakinshin.net/posts/mw-loeffler/
/// </summary>
public class MannWhitneyLoefflerExactCdf : IMannWhitneyCdf
{
public static readonly MannWhitneyLoefflerExactCdf Instance = new();

public double Cdf(int n, int m, int u)
{
return SumCdf(n, m, u) * 1.0 / BinomialCoefficientHelper.BinomialCoefficient(n + m, m);
}

private long SumCdf(int n, int m, int u) => u <= 0 ? 0 : FullCdf(n, m, u).Sum();

// TODO: support big numbers
// TODO: research the maximum values of n and m that we can handle
private long[] FullCdf(int n, int m, int u)
{
u -= 1;

int[] sigma = new int[u + 1];
for (int d = 1; d <= n; d++)
for (int i = d; i <= u; i += d)
sigma[i] += d;
for (int d = m + 1; d <= m + n; d++)
for (int i = d; i <= u; i += d)
sigma[i] -= d;

long[] p = new long[u + 1];
p[0] = 1;
for (int a = 1; a <= u; a++)
{
for (int i = 0; i < a; i++)
p[a] += p[i] * sigma[a - i];
p[a] /= a;
}

return p;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Perfolizer.Mathematics.Distributions.ContinuousDistributions;

namespace Perfolizer.Mathematics.SignificanceTesting.MannWhitney;

public class MannWhitneyNormalApproxCdf : IMannWhitneyCdf
{
public static readonly MannWhitneyNormalApproxCdf Instance = new();

public double Cdf(int n, int m, int u)
{
double mu = n * m / 2.0;
double su = Sqrt(n * m * (n + m + 1) / 12.0);
double z = (u - mu) / su;
return NormalDistribution.Gauss(z);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
using Perfolizer.Mathematics.SignificanceTesting.Base;
using Perfolizer.Mathematics.Thresholds;

namespace Perfolizer.Mathematics.SignificanceTesting;
namespace Perfolizer.Mathematics.SignificanceTesting.MannWhitney;

public class MannWhitneyResult : SignificanceTwoSampleResult
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
namespace Perfolizer.Mathematics.SignificanceTesting.MannWhitney;

public enum MannWhitneyStrategy
{
Auto,
ClassicExact,
LoefflerExact,
NormalApprox,
EdgeworthApprox
}
Loading

0 comments on commit 4d217b1

Please sign in to comment.