|
| 1 | +#' Melt deep learning annotations into long format |
| 2 | +#' |
| 3 | +#' Aggregates deep learning annotation values by SNP group within each |
| 4 | +#' locus, then melts the result into long format and parses annotation |
| 5 | +#' column names into component fields (\code{Model}, \code{Tissue}, |
| 6 | +#' \code{Assay}, \code{Type}, \code{Metric}, \code{SNP_group}). |
| 7 | +#' |
| 8 | +#' @param ANNOT A \code{data.table} of deep learning annotations as |
| 9 | +#' returned by \code{\link{DEEPLEARNING_query}}. |
| 10 | +#' @param model Character vector of model names used to identify |
| 11 | +#' annotation columns. |
| 12 | +#' @param aggregate_func Name of the aggregation function |
| 13 | +#' (e.g. \code{"mean"}, \code{"median"}). |
| 14 | +#' @param replace_NA Value to substitute for \code{NA} before aggregation. |
| 15 | +#' @param replace_negInf Value to substitute for \code{-Inf} (currently |
| 16 | +#' unused but reserved for future use). |
| 17 | +#' @param save_path File path to save the melted result, or \code{FALSE} |
| 18 | +#' to skip saving. |
| 19 | +#' @param verbose Print messages. |
| 20 | +#' |
| 21 | +#' @returns A \code{data.table} in long format with columns \code{Locus}, |
| 22 | +#' \code{Annotation}, \code{value}, \code{Model}, \code{Tissue}, |
| 23 | +#' \code{Assay}, \code{Type}, \code{Metric}, and \code{SNP_group}. |
| 24 | +#' |
| 25 | +#' @export |
| 26 | +#' @family DEEPLEARNING |
| 27 | +#' @source |
| 28 | +#' \url{https://alkesgroup.broadinstitute.org/LDSCORE/DeepLearning/Dey_DeepLearning.tgz} |
| 29 | +#' @importFrom data.table data.table melt.data.table |
| 30 | +#' @importFrom dplyr group_by summarise_at vars mutate |
| 31 | +#' @importFrom tidyr replace_na separate |
| 32 | +#' @importFrom echodata snp_group_filters |
| 33 | +#' @examples |
| 34 | +#' \dontrun{ |
| 35 | +#' annot_melt <- DEEPLEARNING_melt( |
| 36 | +#' ANNOT = ANNOT, |
| 37 | +#' aggregate_func = "mean", |
| 38 | +#' save_path = "results/deeplearning_snp_groups_mean.csv.gz" |
| 39 | +#' ) |
| 40 | +#' } |
| 41 | +DEEPLEARNING_melt <- function( |
| 42 | + ANNOT, |
| 43 | + model = c("Basenji", "BiClassCNN", "DeepSEA", |
| 44 | + "ChromHMM", "Roadmap", "Others"), |
| 45 | + aggregate_func = "mean", |
| 46 | + replace_NA = NA, |
| 47 | + replace_negInf = NA, |
| 48 | + save_path = FALSE, |
| 49 | + verbose = TRUE) { |
| 50 | + |
| 51 | + Locus <- P <- leadSNP <- ABF.CS <- SUSIE.CS <- NULL |
| 52 | + POLYFUN_SUSIE.CS <- FINEMAP.CS <- NULL |
| 53 | + Support <- Support_noPF <- NULL |
| 54 | + Consensus_SNP <- Consensus_SNP_noPF <- NULL |
| 55 | + Annotation <- SNP_group <- NULL |
| 56 | + |
| 57 | + snp_groups_list <- echodata::snp_group_filters() |
| 58 | + agg_func <- get(aggregate_func) |
| 59 | + |
| 60 | + annot_melt <- ANNOT |> |
| 61 | + dplyr::group_by(Locus) |> |
| 62 | + dplyr::summarise_at( |
| 63 | + .vars = dplyr::vars( |
| 64 | + grep(paste(model, collapse = "|"), |
| 65 | + colnames(ANNOT), value = TRUE) |
| 66 | + ), |
| 67 | + .funs = list( |
| 68 | + "Random" = ~ agg_func( |
| 69 | + tidyr::replace_na( |
| 70 | + sample(.x, size = 3, replace = TRUE), replace_NA |
| 71 | + ), na.rm = TRUE), |
| 72 | + "All" = ~ agg_func( |
| 73 | + tidyr::replace_na(.x, replace_NA), |
| 74 | + na.rm = TRUE), |
| 75 | + "GWAS nom. sig." = ~ agg_func( |
| 76 | + tidyr::replace_na(.x[P < .05], replace_NA), |
| 77 | + na.rm = TRUE), |
| 78 | + "GWAS sig." = ~ agg_func( |
| 79 | + tidyr::replace_na(.x[P < 5e-8], replace_NA), |
| 80 | + na.rm = TRUE), |
| 81 | + "GWAS lead" = ~ agg_func( |
| 82 | + tidyr::replace_na(.x[leadSNP], replace_NA), |
| 83 | + na.rm = TRUE), |
| 84 | + "ABF CS" = ~ agg_func( |
| 85 | + tidyr::replace_na(.x[ABF.CS > 0], replace_NA), |
| 86 | + na.rm = TRUE), |
| 87 | + "SUSIE CS" = ~ agg_func( |
| 88 | + tidyr::replace_na(.x[SUSIE.CS > 0], replace_NA), |
| 89 | + na.rm = TRUE), |
| 90 | + "POLYFUN-SUSIE CS" = ~ agg_func( |
| 91 | + tidyr::replace_na( |
| 92 | + .x[POLYFUN_SUSIE.CS > 0], replace_NA |
| 93 | + ), na.rm = TRUE), |
| 94 | + "FINEMAP CS" = ~ agg_func( |
| 95 | + tidyr::replace_na(.x[FINEMAP.CS > 0], replace_NA), |
| 96 | + na.rm = TRUE), |
| 97 | + "UCS (-PolyFun)" = ~ agg_func( |
| 98 | + tidyr::replace_na(.x[Support_noPF > 0], replace_NA), |
| 99 | + na.rm = TRUE), |
| 100 | + "UCS" = ~ agg_func( |
| 101 | + tidyr::replace_na(.x[Support > 0], replace_NA), |
| 102 | + na.rm = TRUE), |
| 103 | + "Support==0" = ~ agg_func( |
| 104 | + tidyr::replace_na(.x[Support == 0], replace_NA), |
| 105 | + na.rm = TRUE), |
| 106 | + "Support==1" = ~ agg_func( |
| 107 | + tidyr::replace_na(.x[Support == 1], replace_NA), |
| 108 | + na.rm = TRUE), |
| 109 | + "Support==2" = ~ agg_func( |
| 110 | + tidyr::replace_na(.x[Support == 2], replace_NA), |
| 111 | + na.rm = TRUE), |
| 112 | + "Support==3" = ~ agg_func( |
| 113 | + tidyr::replace_na(.x[Support == 3], replace_NA), |
| 114 | + na.rm = TRUE), |
| 115 | + "Support==4" = ~ agg_func( |
| 116 | + tidyr::replace_na(.x[Support == 4], replace_NA), |
| 117 | + na.rm = TRUE), |
| 118 | + "Consensus (-PolyFun)" = ~ agg_func( |
| 119 | + tidyr::replace_na( |
| 120 | + .x[Consensus_SNP_noPF], replace_NA |
| 121 | + ), na.rm = TRUE), |
| 122 | + "Consensus" = ~ agg_func( |
| 123 | + tidyr::replace_na(.x[Consensus_SNP], replace_NA), |
| 124 | + na.rm = TRUE) |
| 125 | + ) |
| 126 | + ) |> |
| 127 | + data.table::data.table() |> |
| 128 | + data.table::melt.data.table( |
| 129 | + id.vars = "Locus", |
| 130 | + variable.name = "Annotation" |
| 131 | + ) |> |
| 132 | + tidyr::separate( |
| 133 | + col = "Annotation", |
| 134 | + sep = "_", |
| 135 | + into = c("Model", "Tissue", "Assay", "Type", |
| 136 | + "Metric", "SNP_group"), |
| 137 | + remove = FALSE |
| 138 | + ) |> |
| 139 | + dplyr::mutate( |
| 140 | + Annotation = gsub("^_+|_+$", "", |
| 141 | + trimws(as.character(Annotation))), |
| 142 | + SNP_group = factor( |
| 143 | + SNP_group, |
| 144 | + levels = names(snp_groups_list), |
| 145 | + ordered = TRUE |
| 146 | + ) |
| 147 | + ) |
| 148 | + |
| 149 | + if (!isFALSE(save_path)) { |
| 150 | + messager("DEEPLEARNING:: Saving aggregated SNP_group values", |
| 151 | + aggregate_func, "==>", save_path, v = verbose) |
| 152 | + dir.create(dirname(save_path), |
| 153 | + showWarnings = FALSE, recursive = TRUE) |
| 154 | + data.table::fwrite(annot_melt, save_path) |
| 155 | + } |
| 156 | + return(annot_melt) |
| 157 | +} |
0 commit comments