fslaborg · github-actions · Apr 15, 2026 · Apr 15, 2026
diff --git a/src/FSharp.Stats/FSharp.Stats.fsproj b/src/FSharp.Stats/FSharp.Stats.fsproj
@@ -128,6 +128,7 @@
     <Compile Include="Testing\Bartlett.fs" />
     <Compile Include="Testing\PostHoc.fs" />
     <Compile Include="Testing\Wilcoxon.fs" />
+    <Compile Include="Testing\MannWhitney.fs" />
     <Compile Include="Testing\MultipleTesting.fs" />
     <Compile Include="Testing\Outliers.fs" />
     <Compile Include="Testing\SAM.fs" />

diff --git a/src/FSharp.Stats/Testing/MannWhitney.fs b/src/FSharp.Stats/Testing/MannWhitney.fs
@@ -0,0 +1,101 @@
+namespace FSharp.Stats.Testing
+
+/// <summary>
+/// Mann-Whitney U test (also known as the Wilcoxon rank-sum test) for two independent samples.
+/// Uses a normal approximation, which is reliable for combined sample sizes ≥ 20.
+/// </summary>
+module MannWhitneyTest =
+
+    open FSharp.Stats
+
+    /// Result of a Mann-Whitney U test for two independent samples.
+    type MannWhitneyTestStatistics =
+        {
+            /// U statistic for sample 1: number of pairs (x₁, x₂) where x₁ > x₂ (counting ties as 0.5).
+            U1: float
+            /// U statistic for sample 2: U1 + U2 = n₁ × n₂.
+            U2: float
+            /// Z-score from the normal approximation.
+            Statistic: float
+            /// One-sided p-value: P(sample 1 is stochastically less than sample 2).
+            PValueLeft: float
+            /// One-sided p-value: P(sample 1 is stochastically greater than sample 2).
+            PValueRight: float
+            /// Two-sided p-value.
+            PValue: float
+        }
+
+    /// <summary>
+    /// Creates a Mann-Whitney U test for two independent samples using a normal approximation.
+    /// </summary>
+    /// <param name="sample1">First independent sample.</param>
+    /// <param name="sample2">Second independent sample.</param>
+    /// <param name="continuityCorrection">
+    /// When true, applies a continuity correction (±0.5) to the z-score. Recommended for small samples.
+    /// Default: true.
+    /// </param>
+    /// <remarks>
+    /// The test is reliable for combined sample sizes ≥ 20. For smaller samples, exact p-values
+    /// (not yet implemented) should be used instead of the normal approximation.
+    ///
+    /// Ties are handled via the standard variance correction.
+    ///
+    /// A large U1 (close to n₁×n₂) indicates that sample 1 tends to have larger values than sample 2,
+    /// yielding a small PValueRight. Conversely, a small U1 yields a small PValueLeft.
+    /// </remarks>
+    let create (sample1: seq<float>) (sample2: seq<float>) (continuityCorrection: bool) : MannWhitneyTestStatistics =
+        let arr1 = Seq.toArray sample1
+        let arr2 = Seq.toArray sample2
+        let n1   = float arr1.Length
+        let n2   = float arr2.Length
+        let bigN = n1 + n2
+
+        // Rank combined array; first n1 entries correspond to sample1
+        let combined = Array.append arr1 arr2
+        let ranks    = Rank.RankAverage() combined
+
+        // Sum of ranks for sample 1
+        let r1 = Array.sumBy (fun i -> ranks.[i]) [| 0 .. arr1.Length - 1 |]
+
+        // U statistics
+        let u1 = r1 - n1 * (n1 + 1.) / 2.
+        let u2 = n1 * n2 - u1
+
+        // Variance with tie correction
+        let tieCorrection =
+            ranks
+            |> Array.countBy id
+            |> Array.sumBy (fun (_, cnt) ->
+                let t = float cnt
+                t * t * t - t)
+
+        let mu  = n1 * n2 / 2.
+        let var =
+            if bigN <= 1. then 0.
+            else (n1 * n2 / (bigN * (bigN - 1.))) * ((bigN * bigN * bigN - bigN - tieCorrection) / 12.)
+
+        let sigma = sqrt var
+
+        // Z-score using U1 (positive z → sample1 tends to be larger)
+        let z =
+            if sigma = 0. then 0.
+            else
+                let cc =
+                    if not continuityCorrection then 0.
+                    elif u1 > mu then -0.5
+                    elif u1 < mu then  0.5
+                    else 0.
+                (u1 - mu + cc) / sigma
+
+        let pLeft  = Distributions.Continuous.Normal.CDF 0. 1. z
+        let pRight = 1. - pLeft
+        let pTwo   = 2. * (min pLeft pRight)
+
+        {
+            U1          = u1
+            U2          = u2
+            Statistic   = z
+            PValueLeft  = pLeft
+            PValueRight = pRight
+            PValue      = pTwo
+        }
diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs
@@ -1455,4 +1455,62 @@ let anovaTests =
             // Expect.floatClose Accuracy.low      (Math.Round (twoWayANOVARandom.Total.SumOfSquares,8))           0.7815                 "Total.SumOfSquares deviated from expected value"               
             // Expect.floatClose Accuracy.low      (Math.Round (twoWayANOVARandom.Total.MeanSquares,8))            0.04597                "Total.MeanSquares deviated from expected value"               
 
-        ]
+        ]
+
+[<Tests>]
+let testMannWhitneyTest =
+    // Reference values cross-checked against R: wilcox.test(..., exact=FALSE, correct=FALSE)
+    // and R: wilcox.test(..., exact=FALSE, correct=TRUE)
+
+    // Test case 1: clear separation (sample1 >> sample2) -- no ties
+    // R: wilcox.test(c(6,7,8,9,10), c(1,2,3,4,5), exact=FALSE, correct=FALSE)  → W=25, p=0.004516
+    // R: wilcox.test(c(6,7,8,9,10), c(1,2,3,4,5), exact=FALSE, correct=TRUE)   → W=25, p=0.01220
+    let s1a = [| 6.; 7.; 8.; 9.; 10. |]
+    let s2a = [| 1.; 2.; 3.; 4.; 5.  |]
+    let resultNoCC  = MannWhitneyTest.create s1a s2a false
+    let resultCC    = MannWhitneyTest.create s1a s2a true
+
+    // Test case 2: moderate overlap -- no ties
+    // sample1=[2;4;6;8;10], sample2=[1;3;5;7;9]
+    // U1=15, U2=10; z(no cc)=(15-12.5)/4.787=0.5222; pRight=0.3008; pTwo=0.6016
+    let s1b = [| 2.; 4.; 6.; 8.; 10. |]
+    let s2b = [| 1.; 3.; 5.; 7.; 9.  |]
+    let resultB = MannWhitneyTest.create s1b s2b false
+
+    // Test case 3: data with ties
+    // sample1=[1;2;3;4], sample2=[2;3;5;6]
+    // U1=4, U2=12, mu=8, var=11.714, sigma=3.4225
+    // z=-1.1693, pLeft=0.1212, pRight=0.8788, pTwo=0.2423
+    let s1c = [| 1.; 2.; 3.; 4. |]
+    let s2c = [| 2.; 3.; 5.; 6. |]
+    let resultC = MannWhitneyTest.create s1c s2c false
+
+    testList "Testing.MannWhitneyTest" [
+        testCase "U statistics - clear separation" <| fun () ->
+            Expect.floatClose Accuracy.medium resultNoCC.U1 25. "U1 should be 25 (sample1 dominates)"
+            Expect.floatClose Accuracy.medium resultNoCC.U2  0. "U2 should be 0"
+        testCase "U1 + U2 = n1*n2" <| fun () ->
+            Expect.floatClose Accuracy.medium (resultNoCC.U1 + resultNoCC.U2) 25. "U1+U2 must equal n1*n2=25"
+            Expect.floatClose Accuracy.medium (resultB.U1    + resultB.U2)    25. "U1+U2 must equal n1*n2=25"
+            Expect.floatClose Accuracy.medium (resultC.U1    + resultC.U2)    16. "U1+U2 must equal n1*n2=16"
+        testCase "p-values - clear separation, no continuity correction" <| fun () ->
+            // R: p-value = 0.004516
+            Expect.isTrue (resultNoCC.PValueRight < 0.01) "PValueRight should be < 0.01 (sample1 >> sample2)"
+            Expect.isTrue (resultNoCC.PValueLeft  > 0.99) "PValueLeft should be > 0.99"
+            Expect.floatClose Accuracy.low resultNoCC.PValue (2. * resultNoCC.PValueRight) "Two-sided = 2 × one-sided min"
+        testCase "p-values - clear separation, continuity correction" <| fun () ->
+            // R: p-value ≈ 0.01220
+            Expect.floatClose Accuracy.low (Math.Round(resultCC.PValue, 4)) 0.0122 "PValue with CC should ≈ 0.0122"
+        testCase "U statistics - moderate overlap" <| fun () ->
+            Expect.floatClose Accuracy.medium resultB.U1 15. "U1 should be 15"
+            Expect.floatClose Accuracy.medium resultB.U2 10. "U2 should be 10"
+        testCase "p-value not significant - moderate overlap" <| fun () ->
+            Expect.isTrue (resultB.PValue > 0.05) "p-value should not be significant for nearly identical distributions"
+        testCase "U statistics with ties" <| fun () ->
+            Expect.floatClose Accuracy.medium resultC.U1 4. "U1 should be 4 (with ties)"
+            Expect.floatClose Accuracy.medium resultC.U2 12. "U2 should be 12"
+        testCase "p-values with ties" <| fun () ->
+            // z ≈ -1.169, pLeft ≈ 0.121, pTwo ≈ 0.242
+            Expect.floatClose Accuracy.low (Math.Round(resultC.PValueLeft, 3)) 0.121 "PValueLeft with ties should ≈ 0.121"
+            Expect.floatClose Accuracy.low (Math.Round(resultC.PValue,     3)) 0.243 "PValueTwoTailed with ties should ≈ 0.243"
+    ]