diff --git a/src/FSharp.Stats/Testing/MultipleTesting.fs b/src/FSharp.Stats/Testing/MultipleTesting.fs index 9088ffae..afcdb513 100644 --- a/src/FSharp.Stats/Testing/MultipleTesting.fs +++ b/src/FSharp.Stats/Testing/MultipleTesting.fs @@ -78,6 +78,52 @@ module MultipleTesting = result + /// Šidák single-step FWER adjustment (NaN-safe). + /// Each p-value is adjusted independently as p_adj = 1 - (1 - p)^m, where m is the number of + /// non-NaN tests. This is slightly less conservative than Bonferroni. + /// NaN p-values are preserved at their original indices. + let inline dunnSidakFWER (p : float[]) : float[] = + let total = p.Length + let indexed = p |> Array.mapi (fun i v -> i, v) + let valid = indexed |> Array.filter (fun (_, v) -> not (Double.IsNaN v)) + let m = valid.Length + let result = Array.create total Double.NaN + if m = 0 then result else + valid |> Array.iter (fun (i, pv) -> + result.[i] <- min 1.0 (1.0 - (1.0 - pv) ** float m)) + result + + /// Holm–Šidák (step-down) FWER adjustment (NaN-safe). + /// Uses the Šidák formula at each step of the Holm procedure, giving a slightly more + /// powerful test than Holm–Bonferroni while still controlling the FWER. + /// NaN p-values are ignored in the computation and preserved at their original indices. + let inline holmSidakFWER (p : float[]) : float[] = + let total = p.Length + let indexed = p |> Array.mapi (fun i v -> i, v) + let valid = indexed |> Array.filter (fun (_, v) -> not (Double.IsNaN v)) + let m = valid.Length + let result = Array.create total Double.NaN + if m = 0 then result else + let sorted = valid |> Array.sortBy snd + + // raw[i] = 1 - (1 - p_(i+1))^(m-i) for i = 0..m-1 (rank in 1-indexed sorted order = i+1) + let raw = + sorted + |> Array.mapi (fun i (_, pv) -> 1.0 - (1.0 - pv) ** float (m - i)) + + // running max from the left, capped at 1.0 + let adjAsc = + raw + |> Array.scan (fun runningMax r -> max runningMax r) 0.0 + |> Array.tail + |> Array.map (min 1.0) + + Array.zip (sorted |> Array.map fst) adjAsc + |> Array.iter (fun (i, adj) -> result.[i] <- adj) + + result + + /// Benjamini-Hochberg Correction (BH) /// 'projection' should return a tuple of any identifier and the pValues as float, when applied to 'rawP' /// This function applies the Benjamini-Hochberg multiple testing correcture and returns all False Discovery Rates to which the given p-values are still diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs index 1adfd493..70202a9b 100644 --- a/tests/FSharp.Stats.Tests/Testing.fs +++ b/tests/FSharp.Stats.Tests/Testing.fs @@ -497,6 +497,50 @@ let hochbergTests = ] +[] +let dunnSidakTests = + // p = [0.01; 0.04; 0.03; 0.1; 0.5], m=5 + // expected values verified with Python: 1 - (1-p)^m + let pValues = [| 0.01; 0.04; 0.03; 0.1; 0.5 |] + let pNaN = [| 0.01; nan; 0.03; 0.1; 0.5 |] + + testList "Testing.MultipleTesting.DunnSidak" [ + testCase "singleStepBasic" <| fun () -> + let result = MultipleTesting.dunnSidakFWER pValues + // 1 - (1-p)^5 for each p + let expected = [| 0.049010; 0.184627; 0.141266; 0.40951; 0.96875 |] + Array.iter2 (fun r e -> + Expect.floatClose Accuracy.low r e "Single-step Šidák adjusted p-values should match." + ) result expected + testCase "singleStepNaN" <| fun () -> + let result = MultipleTesting.dunnSidakFWER pNaN + // m=4 valid values; 1 - (1-p)^4 + Expect.floatClose Accuracy.low result.[0] 0.039404 "p[0] with NaN should match." + Expect.isTrue (Double.IsNaN result.[1]) "NaN position should remain NaN." + Expect.floatClose Accuracy.low result.[2] 0.114707 "p[2] with NaN should match." + Expect.floatClose Accuracy.low result.[3] 0.34390 "p[3] with NaN should match." + Expect.floatClose Accuracy.low result.[4] 0.93750 "p[4] with NaN should match." + testCase "holmSidakBasic" <| fun () -> + let result = MultipleTesting.holmSidakFWER pValues + // sorted p: [0.01, 0.03, 0.04, 0.1, 0.5] + // raw: [1-0.99^5, 1-0.97^4, 1-0.96^3, 1-0.9^2, 1-0.5^1] + // running max (already monotone): [0.04901, 0.11471, 0.11526, 0.19, 0.5] + // back to original order: [0.04901, 0.11526, 0.11471, 0.19, 0.5] + let expected = [| 0.049010; 0.115264; 0.114707; 0.19; 0.5 |] + Array.iter2 (fun r e -> + Expect.floatClose Accuracy.low r e "Holm–Šidák adjusted p-values should match." + ) result expected + testCase "holmSidakNaN" <| fun () -> + let result = MultipleTesting.holmSidakFWER pNaN + // m=4 valid values; sorted: [0.01, 0.03, 0.1, 0.5] + // raw: [1-0.99^4, 1-0.97^3, 1-0.9^2, 1-0.5^1] = [0.039404, 0.087327, 0.19, 0.5] + Expect.floatClose Accuracy.low result.[0] 0.039404 "p[0] Holm-Šidák NaN" + Expect.isTrue (Double.IsNaN result.[1]) "NaN position should remain NaN." + Expect.floatClose Accuracy.low result.[2] 0.087327 "p[2] Holm-Šidák NaN" + Expect.floatClose Accuracy.low result.[3] 0.19000 "p[3] Holm-Šidák NaN" + Expect.floatClose Accuracy.low result.[4] 0.50000 "p[4] Holm-Šidák NaN" + ] + [] let benjaminiHochbergTests =