Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions src/FSharp.Stats/Testing/MultipleTesting.fs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,52 @@ module MultipleTesting =
result


/// Šidák single-step FWER adjustment (NaN-safe).
/// Each p-value is adjusted independently as p_adj = 1 - (1 - p)^m, where m is the number of
/// non-NaN tests. This is slightly less conservative than Bonferroni.
/// NaN p-values are preserved at their original indices.
let inline dunnSidakFWER (p : float[]) : float[] =
let total = p.Length
let indexed = p |> Array.mapi (fun i v -> i, v)
let valid = indexed |> Array.filter (fun (_, v) -> not (Double.IsNaN v))
let m = valid.Length
let result = Array.create total Double.NaN
if m = 0 then result else
valid |> Array.iter (fun (i, pv) ->
result.[i] <- min 1.0 (1.0 - (1.0 - pv) ** float m))
result

/// Holm–Šidák (step-down) FWER adjustment (NaN-safe).
/// Uses the Šidák formula at each step of the Holm procedure, giving a slightly more
/// powerful test than Holm–Bonferroni while still controlling the FWER.
/// NaN p-values are ignored in the computation and preserved at their original indices.
let inline holmSidakFWER (p : float[]) : float[] =
let total = p.Length
let indexed = p |> Array.mapi (fun i v -> i, v)
let valid = indexed |> Array.filter (fun (_, v) -> not (Double.IsNaN v))
let m = valid.Length
let result = Array.create total Double.NaN
if m = 0 then result else
let sorted = valid |> Array.sortBy snd

// raw[i] = 1 - (1 - p_(i+1))^(m-i) for i = 0..m-1 (rank in 1-indexed sorted order = i+1)
let raw =
sorted
|> Array.mapi (fun i (_, pv) -> 1.0 - (1.0 - pv) ** float (m - i))

// running max from the left, capped at 1.0
let adjAsc =
raw
|> Array.scan (fun runningMax r -> max runningMax r) 0.0
|> Array.tail
|> Array.map (min 1.0)

Array.zip (sorted |> Array.map fst) adjAsc
|> Array.iter (fun (i, adj) -> result.[i] <- adj)

result


/// Benjamini-Hochberg Correction (BH)
/// 'projection' should return a tuple of any identifier and the pValues as float, when applied to 'rawP'
/// This function applies the Benjamini-Hochberg multiple testing correcture and returns all False Discovery Rates to which the given p-values are still
Expand Down
44 changes: 44 additions & 0 deletions tests/FSharp.Stats.Tests/Testing.fs
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,50 @@ let hochbergTests =

]

[<Tests>]
let dunnSidakTests =
// p = [0.01; 0.04; 0.03; 0.1; 0.5], m=5
// expected values verified with Python: 1 - (1-p)^m
let pValues = [| 0.01; 0.04; 0.03; 0.1; 0.5 |]
let pNaN = [| 0.01; nan; 0.03; 0.1; 0.5 |]

testList "Testing.MultipleTesting.DunnSidak" [
testCase "singleStepBasic" <| fun () ->
let result = MultipleTesting.dunnSidakFWER pValues
// 1 - (1-p)^5 for each p
let expected = [| 0.049010; 0.184627; 0.141266; 0.40951; 0.96875 |]
Array.iter2 (fun r e ->
Expect.floatClose Accuracy.low r e "Single-step Šidák adjusted p-values should match."
) result expected
testCase "singleStepNaN" <| fun () ->
let result = MultipleTesting.dunnSidakFWER pNaN
// m=4 valid values; 1 - (1-p)^4
Expect.floatClose Accuracy.low result.[0] 0.039404 "p[0] with NaN should match."
Expect.isTrue (Double.IsNaN result.[1]) "NaN position should remain NaN."
Expect.floatClose Accuracy.low result.[2] 0.114707 "p[2] with NaN should match."
Expect.floatClose Accuracy.low result.[3] 0.34390 "p[3] with NaN should match."
Expect.floatClose Accuracy.low result.[4] 0.93750 "p[4] with NaN should match."
testCase "holmSidakBasic" <| fun () ->
let result = MultipleTesting.holmSidakFWER pValues
// sorted p: [0.01, 0.03, 0.04, 0.1, 0.5]
// raw: [1-0.99^5, 1-0.97^4, 1-0.96^3, 1-0.9^2, 1-0.5^1]
// running max (already monotone): [0.04901, 0.11471, 0.11526, 0.19, 0.5]
// back to original order: [0.04901, 0.11526, 0.11471, 0.19, 0.5]
let expected = [| 0.049010; 0.115264; 0.114707; 0.19; 0.5 |]
Array.iter2 (fun r e ->
Expect.floatClose Accuracy.low r e "Holm–Šidák adjusted p-values should match."
) result expected
testCase "holmSidakNaN" <| fun () ->
let result = MultipleTesting.holmSidakFWER pNaN
// m=4 valid values; sorted: [0.01, 0.03, 0.1, 0.5]
// raw: [1-0.99^4, 1-0.97^3, 1-0.9^2, 1-0.5^1] = [0.039404, 0.087327, 0.19, 0.5]
Expect.floatClose Accuracy.low result.[0] 0.039404 "p[0] Holm-Šidák NaN"
Expect.isTrue (Double.IsNaN result.[1]) "NaN position should remain NaN."
Expect.floatClose Accuracy.low result.[2] 0.087327 "p[2] Holm-Šidák NaN"
Expect.floatClose Accuracy.low result.[3] 0.19000 "p[3] Holm-Šidák NaN"
Expect.floatClose Accuracy.low result.[4] 0.50000 "p[4] Holm-Šidák NaN"
]

[<Tests>]
let benjaminiHochbergTests =

Expand Down
Loading