Skip to content

Commit d9464cf

Browse files
committed
Add epi data; Add merging functions; Add haploR annot functions; Add summary plot functions
1 parent c35ecc7 commit d9464cf

143 files changed

Lines changed: 7245 additions & 14 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.DS_Store

6 KB
Binary file not shown.

.Rbuildignore

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
.*\.Rproj$
2+
^\.Rproj\.user$
3+
^README.Rmd
4+
^\.github$
5+
^doc$
6+
^Meta$
7+
^codecov\.yml$
8+
^_pkgdown\.yml$
9+
^docs$
10+
^pkgdown$
11+
12+
Dockerfile
13+
^LICENSE\.md$

.gitignore

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,42 @@
1+
# R project files
2+
*.Rproj
3+
.Rproj.user
4+
.Ruserdata
15
# History files
26
.Rhistory
37
.Rapp.history
4-
58
# Session Data files
69
.RData
7-
810
# User-specific files
911
.Ruserdata
10-
12+
# .DS_Store
13+
# find . -name .DS_Store -print0 | xargs -0 git rm -f --ignore-unmatch
14+
./.DS_Store
15+
./**/.DS_Store
16+
./**/**/.DS_Store
17+
./**/**/**/.DS_Store
18+
./**/**/**/**/.DS_Store
19+
./**/**/**/**/**/.DS_Store
20+
./**/**/**/**/**/**/.DS_Store
1121
# Example code in package build process
1222
*-Ex.R
13-
1423
# Output files from R CMD build
1524
/*.tar.gz
16-
1725
# Output files from R CMD check
1826
/*.Rcheck/
19-
2027
# RStudio files
2128
.Rproj.user/
22-
2329
# produced vignettes
2430
vignettes/*.html
2531
vignettes/*.pdf
26-
32+
vignettes/*.R
2733
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
2834
.httr-oauth
29-
3035
# knitr and R markdown default cache directories
3136
*_cache/
3237
/cache/
33-
3438
# Temporary files created by R markdown
3539
*.utf8.md
3640
*.knit.md
37-
3841
# R Environment Variables
39-
.Renviron
42+
.Renviron

DESCRIPTION

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
Package: echoannot
2+
Type: Package
3+
Title: echoverse module: Annotate fine-mapping results
4+
Version: 0.99.1
5+
Authors@R:
6+
c(person(given = "Brian",
7+
family = "Schilder",
8+
role = c("cre"),
9+
email = "brian_schilder@alumni.brown.edu",
10+
comment = c(ORCID = "0000-0001-5949-2191")),
11+
person(given = "Jack",
12+
family = "Humphrey",
13+
role = c("aut"),
14+
email = "Jack.Humphrey@mssm.edu",
15+
comment = c(ORCID = "0000-0002-6274-6620")),
16+
person(given = "Towfique",
17+
family = "Raj",
18+
role = c("aut"),
19+
email = "towfique.raj@mssm.edu",
20+
comment = c(ORCID = "0000-0002-9355-5704"))
21+
)
22+
Description: echoverse module: Annotate fine-mapping results.
23+
URL: https://github.com/RajLabMSSM/echoannot
24+
BugReports: https://github.com/RajLabMSSM/echoannot/issues
25+
Encoding: UTF-8
26+
LazyData: true
27+
Depends: R (>= 4.1)
28+
SystemRequirements: Python (>= 3.7.0)
29+
biocViews:
30+
Imports:
31+
echodata,
32+
echotabix,
33+
dplyr,
34+
data.table,
35+
utils,
36+
stats,
37+
tidyr,
38+
parallel,
39+
haploR,
40+
ggplot2,
41+
patchwork,
42+
ggbio,
43+
RColorBrewer,
44+
scales,
45+
GenomicRanges,
46+
DescTools,
47+
pheatmap,
48+
grDevices,
49+
rtracklayer,
50+
S4Vectors,
51+
GenomeInfoDb,
52+
biomaRt
53+
Suggests:
54+
markdown,
55+
rmarkdown,
56+
remotes,
57+
knitr,
58+
BiocStyle,
59+
covr,
60+
testthat (>= 3.0.0),
61+
corrplot
62+
Remotes:
63+
github::RajLabMSSM/echodata,
64+
github::RajLabMSSM/echotabix
65+
RoxygenNote: 7.1.2
66+
VignetteBuilder: knitr
67+
License: GPL-3
68+
Config/testthat/edition: 3

NAMESPACE

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(CORCES_2020.get_HiChIP_FitHiChIP_overlap)
4+
export(CS_bin_plot)
5+
export(CS_counts_plot)
6+
export(merge_finemapping_results)
7+
export(peak_overlap_plot)
8+
export(plot_dataset_overlap)
9+
export(super_summary_plot)
10+
import(ggplot2)
11+
importFrom(DescTools,Divisors)
12+
importFrom(GenomeInfoDb,seqlevelsStyle)
13+
importFrom(GenomicRanges,GRanges)
14+
importFrom(GenomicRanges,elementMetadata)
15+
importFrom(GenomicRanges,end)
16+
importFrom(GenomicRanges,findOverlaps)
17+
importFrom(GenomicRanges,makeGRangesFromDataFrame)
18+
importFrom(GenomicRanges,mcols)
19+
importFrom(GenomicRanges,start)
20+
importFrom(IRanges,IRanges)
21+
importFrom(RColorBrewer,brewer.pal)
22+
importFrom(S4Vectors,queryHits)
23+
importFrom(S4Vectors,subjectHits)
24+
importFrom(biomaRt,getBM)
25+
importFrom(biomaRt,useMart)
26+
importFrom(data.table,as.data.table)
27+
importFrom(data.table,data.table)
28+
importFrom(data.table,fread)
29+
importFrom(data.table,fwrite)
30+
importFrom(data.table,melt.data.table)
31+
importFrom(data.table,merge.data.table)
32+
importFrom(data.table,rbindlist)
33+
importFrom(dplyr,"%>%")
34+
importFrom(dplyr,arrange)
35+
importFrom(dplyr,case_when)
36+
importFrom(dplyr,desc)
37+
importFrom(dplyr,funs)
38+
importFrom(dplyr,group_by)
39+
importFrom(dplyr,mutate)
40+
importFrom(dplyr,n_distinct)
41+
importFrom(dplyr,rename)
42+
importFrom(dplyr,select)
43+
importFrom(dplyr,slice)
44+
importFrom(dplyr,summarise)
45+
importFrom(dplyr,summarise_at)
46+
importFrom(dplyr,top_n)
47+
importFrom(dplyr,vars)
48+
importFrom(ggbio,autoplot)
49+
importFrom(ggbio,geom_arch)
50+
importFrom(ggbio,geom_rect)
51+
importFrom(ggbio,ggbio)
52+
importFrom(ggbio,ggsave)
53+
importFrom(ggbio,plotGrandLinear)
54+
importFrom(ggbio,scale_x_sequnit)
55+
importFrom(ggbio,theme_genome)
56+
importFrom(grDevices,dev.off)
57+
importFrom(grDevices,png)
58+
importFrom(haploR,queryRegulome)
59+
importFrom(parallel,mclapply)
60+
importFrom(patchwork,plot_layout)
61+
importFrom(patchwork,plot_spacer)
62+
importFrom(pheatmap,pheatmap)
63+
importFrom(rtracklayer,import.bw)
64+
importFrom(scales,alpha)
65+
importFrom(stats,as.formula)
66+
importFrom(stats,formula)
67+
importFrom(stats,median)
68+
importFrom(stats,setNames)
69+
importFrom(tidyr,separate)
70+
importFrom(utils,head)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#' Get overlap between datatable of SNPs and scATAC peaks
2+
#'
3+
#' Can optionally add \code{Cicero} coaccessibility scores,
4+
#' which are also derived from scATAC-seq data.
5+
#'
6+
#' @family CORCES_2020
7+
#' @source \url{https://doi.org/10.1038/s41588-020-00721-x}
8+
CORCES_2020.get_ATAC_peak_overlap <- function(finemap_dat,
9+
FDR_filter = NULL,
10+
add_cicero = TRUE,
11+
cell_type_specific = TRUE,
12+
verbose = TRUE) {
13+
FDR <- Cicero <- NULL
14+
15+
if (cell_type_specific) {
16+
messager("CORCES_2020:: Extracting overlapping",
17+
"cell-type-specific scATAC-seq peaks",
18+
v = verbose
19+
)
20+
dat <- echoannot::CORCES_2020.scATACseq_celltype_peaks
21+
Assay <- "scATAC"
22+
} else {
23+
messager("CORCES_2020:: Extracting overlapping",
24+
"bulkATAC-seq peaks from brain tissue",
25+
v = verbose
26+
)
27+
dat <- echoannot::CORCES_2020.bulkATACseq_peaks
28+
Assay <- "bulkATAC"
29+
}
30+
gr.peaks_lifted <- echotabix::liftover(
31+
sumstats_dt = dat,
32+
convert_ref_genome = "hg19",
33+
ref_genome = "hg38",
34+
chrom_col = "hg38_Chromosome",
35+
start_col = "hg38_Start",
36+
end_col = "hg38_Stop",
37+
as_granges = TRUE,
38+
style = "NCBI",
39+
verbose = FALSE
40+
)
41+
# Get overlap with PEAKS
42+
gr.hits <- granges_overlap(
43+
dat1 = finemap_dat,
44+
chrom_col.1 = "CHR",
45+
start_col.1 = "POS",
46+
end_col.1 = "POS",
47+
dat2 = gr.peaks_lifted
48+
)
49+
gr.hits$Assay <- Assay
50+
if (!is.null(FDR_filter)) {
51+
gr.hits <- subset(gr.hits, FDR < FDR_filter)
52+
}
53+
54+
if (add_cicero & cell_type_specific) {
55+
try({
56+
# Pretty sure the Peak_IDs are shared between the
57+
# sc-ATACseq data and cicero,
58+
# because Cicero derives coaccess from sc-ATAC-seq data:
59+
# http://www.cell.com/molecular-cell/retrieve/pii/S1097276518305471?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS1097276518305471%3Fshowall%3Dtrue
60+
## Also pretty sure that checking for cicero overlap
61+
# only in the scATACseq gr.hits object is ok
62+
# bc you can only test for coaccessibility if there's a
63+
# peak to begin with.
64+
cicero <- echoannot::CORCES_2020.cicero_coaccessibility
65+
cicero_dict <- c(
66+
stats::setNames(
67+
cicero$Coaccessibility,
68+
cicero$Peak_ID_Peak1
69+
),
70+
stats::setNames(
71+
cicero$Coaccessibility,
72+
cicero$Peak_ID_Peak2
73+
)
74+
)
75+
if (any(!is.na(cicero_dict[gr.hits$Peak_ID]))) {
76+
gr.hits$Cicero <- cicero_dict[gr.hits$Peak_ID]
77+
gr.cicero <- subset(gr.hits, !is.na(Cicero))
78+
gr.cicero$Assay <- "Cicero"
79+
messager(
80+
"+ CORCES_2020:: Cicero coaccessibility scores",
81+
"identified for",
82+
length(gr.cicero), "/", length(gr.hits), "peak hits.",
83+
v = verbose
84+
)
85+
gr.hits <- rbind_granges(
86+
gr1 = gr.hits,
87+
gr2 = gr.cicero
88+
)
89+
} else {
90+
messager("+ CORCES_2020:: No Cicero hits found.", v = verbose)
91+
}
92+
})
93+
}
94+
if (cell_type_specific == FALSE) {
95+
gr.hits$brain <- 1
96+
}
97+
return(gr.hits)
98+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#' Get overlap between data table of SNPs and HiChIP_FitHiChIP
2+
#' coaccessibility anchors
3+
#'
4+
#' Anchors are the genomic regions that have evidence of being
5+
#' functionally connected to one another (coaccessible),
6+
#' e.g. enhancer-promoter interactions.
7+
#'
8+
#' @param finemap_dat Fine-mapping results.
9+
#' @param verbose Print messages.
10+
#'
11+
#' @family CORCES_2020
12+
#' @source \url{https://doi.org/10.1038/s41588-020-00721-x}
13+
#' @export
14+
#'
15+
CORCES_2020.get_HiChIP_FitHiChIP_overlap <- function(finemap_dat,
16+
verbose = TRUE) {
17+
loops <- echoannot::CORCES_2020.HiChIP_FitHiChIP_loop_calls
18+
# Anchor 1
19+
gr.anchor1 <- echotabix::liftover(
20+
sumstats_dt = loops,
21+
convert_ref_genome = "hg19",
22+
chrom_col = "hg38_Chromosome_Anchor1",
23+
start_col = "hg38_Start_Anchor1",
24+
end_col = "hg38_Stop_Anchor1",
25+
verbose = FALSE,
26+
as_granges = TRUE,
27+
style = "NCBI"
28+
)
29+
gr.anchor1_hits <- granges_overlap(
30+
dat1 = finemap_dat,
31+
chrom_col.1 = "CHR",
32+
start_col.1 = "POS",
33+
end_col.1 = "POS",
34+
dat2 = gr.anchor1
35+
)
36+
gr.anchor1_hits$Anchor <- 1
37+
38+
# Anchor 2
39+
gr.anchor2 <- echotabix::liftover(
40+
sumstats_dt = loops,
41+
convert_ref_genome = "hg19",
42+
chrom_col = "hg38_Chromosome_Anchor2",
43+
start_col = "hg38_Start_Anchor2",
44+
end_col = "hg38_Stop_Anchor2",
45+
verbose = FALSE,
46+
as_granges = TRUE,
47+
style = "NCBI"
48+
)
49+
gr.anchor2_hits <- granges_overlap(
50+
dat1 = finemap_dat,
51+
chrom_col.1 = "CHR",
52+
start_col.1 = "POS",
53+
end_col.1 = "POS",
54+
dat2 = gr.anchor2
55+
)
56+
gr.anchor2_hits$Anchor <- 2
57+
# Merge and report
58+
gr.anchor <- rbind_granges(gr.anchor1_hits, gr.anchor2_hits)
59+
gr.anchor$Assay <- "HiChIP_FitHiChIP"
60+
# Have to make a pseudo cell-type col bc (i think)
61+
# this analysis was done on bulk data
62+
gr.anchor$brain <- 1
63+
messager("+ CORCES_2020:: Found", length(gr.anchor),
64+
"hits with HiChIP_FitHiChIP coaccessibility loop anchors.",
65+
v = verbose
66+
)
67+
return(gr.anchor)
68+
}

0 commit comments

Comments
 (0)