Skip to content

Commit dbabb64

Browse files
authored
Merge pull request #13 from Core-Bioinformatics/annotation_parameters
Annotation parameters
2 parents f235d20 + a021974 commit dbabb64

31 files changed

Lines changed: 596 additions & 562 deletions

DESCRIPTION

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: bulkAnalyseR
2-
Title: Shiny app for bulk sequencing data
3-
Version: 0.1.0
2+
Title: An Accessible, Interactive Pipeline for Analysing and Sharing Bulk Sequencing Results
3+
Version: 0.1.1
44
Authors@R: c(
55
person("Ilias", "Moutsopoulos", , "im383@cam.ac.uk", c("aut", "cre")),
66
person("Eleanor", "Williams", , "ecw63@cam.ac.uk", role = c("aut", "ctb")),
@@ -13,7 +13,7 @@ Description: Given an expression matrix from a bulk RNA-Seq experiment,
1313
differential expression analysis, volcano and cross plots,
1414
enrichment analysis and gene regulatory network inference,
1515
and can be customised to contain more panels by the user.
16-
License: GPL2
16+
License: GPL-2
1717
Encoding: UTF-8
1818
URL: https://github.com/Core-Bioinformatics/bulkAnalyseR
1919
BugReports: https://github.com/Core-Bioinformatics/bulkAnalyseR/issues
@@ -44,7 +44,6 @@ Imports:
4444
rlang,
4545
glue,
4646
preprocessCore,
47-
AnnotationDbi,
4847
matrixStats,
4948
noisyr,
5049
tibble,
@@ -54,10 +53,13 @@ Imports:
5453
visNetwork,
5554
DT,
5655
scales,
57-
shinyjs
56+
shinyjs,
57+
tidyr
5858
Suggests:
5959
rmarkdown,
6060
knitr,
61+
BiocManager,
62+
AnnotationDbi,
6163
org.Hs.eg.db,
6264
org.Mm.eg.db
6365
VignetteBuilder: knitr

R/DEfuns.R

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
#' system.file("extdata", "expression_matrix.csv", package = "bulkAnalyseR"),
2222
#' row.names = 1
2323
#' ))
24-
#' expression.matrix.preproc <- preprocessExpressionMatrix(expression.matrix)[, 1:4]
24+
#' expression.matrix.preproc <- preprocessExpressionMatrix(expression.matrix)[1:500, 1:4]
2525
#'
2626
#' anno <- AnnotationDbi::select(
2727
#' getExportedValue('org.Mm.eg.db', 'org.Mm.eg.db'),
@@ -46,10 +46,10 @@
4646
#' var2 = "12h",
4747
#' anno = anno
4848
#' )
49-
#' # DE genes with log2(fold-change) > 5 in both pipelines
49+
#' # DE genes with log2(fold-change) > 2 in both pipelines
5050
#' intersect(
51-
#' dplyr::filter(edger, abs(log2FC) > 5, pvalAdj < 0.05)$gene_name,
52-
#' dplyr::filter(deseq, abs(log2FC) > 5, pvalAdj < 0.05)$gene_name
51+
#' dplyr::filter(edger, abs(log2FC) > 2, pvalAdj < 0.05)$gene_name,
52+
#' dplyr::filter(deseq, abs(log2FC) > 2, pvalAdj < 0.05)$gene_name
5353
#' )
5454
#' @name DEanalysis
5555
NULL
@@ -85,6 +85,7 @@ DEanalysis_edger <- function(
8585
edger.lrt <- edgeR::glmLRT(edger.fit, contrast = contrast)
8686
edger.table <- edger.lrt$table
8787

88+
gene_id <- NULL; pval <- NULL
8889
output = tibble::tibble(
8990
gene_id = rownames(expression.matrix),
9091
gene_name = anno$NAME[match(gene_id, anno$ENSEMBL)],
@@ -118,6 +119,7 @@ DEanalysis_deseq2 <- function(
118119
if(var1 < var2) contrast <- c(-1, 1) else contrast <- c(1, -1)
119120
deseq.res <- DESeq2::results(deseq, contrast = contrast)
120121

122+
gene_id <- NULL; pval <- NULL
121123
output <- tibble::tibble(
122124
gene_id = rownames(expression.matrix),
123125
gene_name = anno$NAME[match(gene_id, anno$ENSEMBL)],

R/DEpanel.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,9 @@ DEpanelServer <- function(id, expression.matrix, metadata, anno){
115115
}
116116

117117
DEtableSubset <- DEtable %>%
118-
dplyr::filter(abs(log2FC) > input[["lfcThreshold"]] & pvalAdj < input[["pvalThreshold"]]) %>%
119-
dplyr::arrange(desc(abs(log2FC)))
118+
dplyr::filter(abs(.data$log2FC) > input[["lfcThreshold"]] &
119+
.data$pvalAdj < input[["pvalThreshold"]]) %>%
120+
dplyr::arrange(dplyr::desc(abs(.data$log2FC)))
120121

121122
#the thresholds are returned here so that MA/volcano and table display
122123
#don't use new thresholds without the button being used

R/DEplotFuns.R

Lines changed: 52 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#' system.file("extdata", "expression_matrix.csv", package = "bulkAnalyseR"),
3030
#' row.names = 1
3131
#' ))
32-
#' expression.matrix.preproc <- preprocessExpressionMatrix(expression.matrix)[, 1:4]
32+
#' expression.matrix.preproc <- preprocessExpressionMatrix(expression.matrix)[1:500, 1:4]
3333
#'
3434
#' anno <- AnnotationDbi::select(
3535
#' getExportedValue('org.Mm.eg.db', 'org.Mm.eg.db'),
@@ -64,14 +64,14 @@ volcano_plot <- function(
6464
...
6565
){
6666
df = genes.de.results %>%
67-
dplyr::mutate(gene = gene_name, log10pval = log10(pvalAdj)) %>%
68-
dplyr::filter(!is.na(log10pval))
67+
dplyr::mutate(gene = .data$gene_name, log10pval = log10(.data$pvalAdj)) %>%
68+
dplyr::filter(!is.na(.data$log10pval))
6969

7070
if(all(df$log10pval >= -10)) log10pval.cap <- FALSE
7171
if(log10pval.cap) df$log10pval[df$log10pval < -10] <- -10
7272

7373
vp <- ggplot(data = df,
74-
mapping = aes(x = log2FC, y = -log10pval)) +
74+
mapping = aes(x = .data$log2FC, y = -.data$log10pval)) +
7575
ggplot2::theme_minimal() +
7676
xlab("log2(FC)") +
7777
ylab("-log10(pval)")
@@ -196,29 +196,29 @@ volcano_enhance <- function(
196196

197197
if(add.expression.colour.gradient){
198198
df.colour.gradient <- df %>%
199-
dplyr::filter(abs(log2FC) > lfc.threshold & log10pval < logp.threshold) %>%
200-
dplyr::arrange(log2exp)
199+
dplyr::filter(abs(.data$log2FC) > lfc.threshold & .data$log10pval < logp.threshold) %>%
200+
dplyr::arrange(.data$log2exp)
201201
if(identical(colour.gradient.scale$left, colour.gradient.scale$right)){
202202
vp <- vp +
203203
geom_point(data = df.colour.gradient,
204-
mapping = aes(x = log2FC, y = -log10pval, colour = log2exp)) +
204+
mapping = aes(x = .data$log2FC, y = -.data$log10pval, colour = .data$log2exp)) +
205205
scale_color_gradient(low = colour.gradient.scale$left[1],
206206
high = colour.gradient.scale$left[2],
207207
breaks = colour.gradient.breaks,
208208
limits = colour.gradient.limits) +
209209
labs(colour = "log2(exp)")
210210
}else{
211211
vp <- vp +
212-
geom_point(data = dplyr::filter(df.colour.gradient, log2FC < 0),
213-
mapping = aes(x = log2FC, y = -log10pval, colour = log2exp)) +
212+
geom_point(data = dplyr::filter(df.colour.gradient, .data$log2FC < 0),
213+
mapping = aes(x = .data$log2FC, y = -.data$log10pval, colour = .data$log2exp)) +
214214
scale_color_gradient(low = colour.gradient.scale$left[1],
215215
high = colour.gradient.scale$left[2],
216216
breaks = colour.gradient.breaks,
217217
limits = colour.gradient.limits) +
218218
labs(colour = "log2(exp)") +
219219
ggnewscale::new_scale_colour() +
220-
geom_point(data = dplyr::filter(df.colour.gradient, log2FC > 0),
221-
mapping = aes(x = log2FC, y = -log10pval, colour = log2exp)) +
220+
geom_point(data = dplyr::filter(df.colour.gradient, .data$log2FC > 0),
221+
mapping = aes(x = .data$log2FC, y = -.data$log10pval, colour = .data$log2exp)) +
222222
scale_colour_gradient(low = colour.gradient.scale$right[1],
223223
high = colour.gradient.scale$right[2],
224224
breaks = colour.gradient.breaks,
@@ -241,12 +241,12 @@ volcano_enhance <- function(
241241
if(!is.null(annotation)){
242242
df <- df %>%
243243
dplyr::mutate(
244-
symbol = annotation$SYMBOL[match(gene, annotation$ENSEMBL)],
245-
name = ifelse(is.na(symbol), gene, symbol)
244+
symbol = annotation$SYMBOL[match(.data$gene, annotation$ENSEMBL)],
245+
name = ifelse(is.na(.data$symbol), .data$gene, .data$symbol)
246246
) %>%
247-
dplyr::select(-symbol)
247+
dplyr::select(-.data$symbol)
248248
}else{
249-
df <- df %>% dplyr::mutate(name = gene)
249+
df <- df %>% dplyr::mutate(name = .data$gene)
250250
}
251251

252252
df.label <- tibble::tibble()
@@ -255,7 +255,7 @@ volcano_enhance <- function(
255255
genes.to.label <- df$name[(match(genes.to.label, c(df$name, df$gene)) - 1) %% nrow(df) + 1]
256256
genes.to.label <- unique(genes.to.label[!is.na(genes.to.label)])
257257
genes.to.rename <- genes.to.rename[genes.to.rename %in% genes.to.label]
258-
df.label <- dplyr::filter(df, name %in% genes.to.label)
258+
df.label <- dplyr::filter(df, .data$name %in% genes.to.label)
259259
df.label$name[match(genes.to.rename, df.label$name)] <- names(genes.to.rename)
260260
if(nrow(df.label) == 0){
261261
message(paste0("add.labels.custom was TRUE but no genes specified; ",
@@ -265,12 +265,12 @@ volcano_enhance <- function(
265265

266266
if(add.labels.auto){
267267
if(length(n.labels.auto) == 1) n.labels.auto <- rep(n.labels.auto, 3)
268-
df.significant <- dplyr::filter(df, !(name %in% genes.to.label))
268+
df.significant <- dplyr::filter(df, !(.data$name %in% genes.to.label))
269269

270270
df.significant <- df.significant[order(abs(df.significant$log2FC), decreasing=TRUE), ]
271271
df.highest.lfc <- utils::head(df.significant, n.labels.auto[1])
272272
df.rest <- utils::tail(df.significant, nrow(df.significant) - n.labels.auto[1]) %>%
273-
dplyr::filter(abs(log2FC) > lfc.threshold, log10pval < logp.threshold)
273+
dplyr::filter(abs(.data$log2FC) > lfc.threshold, .data$log10pval < logp.threshold)
274274

275275
df.rest <- df.rest[order(abs(df.rest$log10pval), decreasing=TRUE), ]
276276
df.lowest.p.vals <- utils::head(df.rest, n.labels.auto[2])
@@ -280,16 +280,18 @@ volcano_enhance <- function(
280280
df.highest.abn <- utils::head(df.rest, n.labels.auto[3])
281281

282282
df.label <- rbind(df.lowest.p.vals, df.highest.lfc, df.highest.abn, df.label) %>%
283-
dplyr::distinct(name, .keep_all = TRUE)
283+
dplyr::distinct(.data$name, .keep_all = TRUE)
284284
}
285285

286286
set.seed(seed = seed)
287287
vp <- vp +
288-
ggrepel::geom_label_repel(data = df.label,
289-
mapping = aes(x = log2FC, y = -log10pval, label = name),
290-
max.overlaps = Inf,
291-
force = label.force,
292-
point.size = NA)
288+
ggrepel::geom_label_repel(
289+
data = df.label,
290+
mapping = aes(x = .data$log2FC, y = -.data$log10pval, label = .data$name),
291+
max.overlaps = Inf,
292+
force = label.force,
293+
point.size = NA
294+
)
293295
}
294296

295297
return(vp)
@@ -310,7 +312,7 @@ volcano_enhance <- function(
310312
#' system.file("extdata", "expression_matrix.csv", package = "bulkAnalyseR"),
311313
#' row.names = 1
312314
#' ))
313-
#' expression.matrix.preproc <- preprocessExpressionMatrix(expression.matrix)[, 1:4]
315+
#' expression.matrix.preproc <- preprocessExpressionMatrix(expression.matrix)[1:500, 1:4]
314316
#'
315317
#' anno <- AnnotationDbi::select(
316318
#' getExportedValue('org.Mm.eg.db', 'org.Mm.eg.db'),
@@ -344,11 +346,11 @@ ma_plot <- function(
344346
...
345347
){
346348
df = genes.de.results %>%
347-
dplyr::mutate(gene = gene_name, log10pval = log10(pvalAdj)) %>%
348-
dplyr::filter(!is.na(log10pval))
349+
dplyr::mutate(gene = .data$gene_name, log10pval = log10(.data$pvalAdj)) %>%
350+
dplyr::filter(!is.na(.data$log10pval))
349351

350352
p <- ggplot(data = df,
351-
mapping = aes(x = log2exp, y = log2FC)) +
353+
mapping = aes(x = .data$log2exp, y = .data$log2FC)) +
352354
ggplot2::theme_minimal() +
353355
xlab("Average log2(exp)") +
354356
ylab("log2(FC)")
@@ -437,29 +439,29 @@ ma_enhance <- function(
437439

438440
if(add.expression.colour.gradient){
439441
df.colour.gradient <- df %>%
440-
dplyr::filter(abs(log2FC) > lfc.threshold & log10pval < logp.threshold) %>%
441-
dplyr::arrange(log2exp)
442+
dplyr::filter(abs(.data$log2FC) > lfc.threshold & .data$log10pval < logp.threshold) %>%
443+
dplyr::arrange(.data$log2exp)
442444
if(identical(colour.gradient.scale$left, colour.gradient.scale$right)){
443445
p <- p +
444446
geom_point(data = df.colour.gradient,
445-
mapping = aes(x = log2exp, y = log2FC, colour = log2exp)) +
447+
mapping = aes(x = .data$log2exp, y = .data$log2FC, colour = .data$log2exp)) +
446448
scale_color_gradient(low = colour.gradient.scale$left[1],
447449
high = colour.gradient.scale$left[2],
448450
breaks = colour.gradient.breaks,
449451
limits = colour.gradient.limits) +
450452
labs(colour = "log2(exp)")
451453
}else{
452454
p <- p +
453-
geom_point(data = dplyr::filter(df.colour.gradient, log2FC < 0),
454-
mapping = aes(x = log2exp, y = log2FC, colour = log2exp)) +
455+
geom_point(data = dplyr::filter(df.colour.gradient, .data$log2FC < 0),
456+
mapping = aes(x = .data$log2exp, y = .data$log2FC, colour = .data$log2exp)) +
455457
scale_color_gradient(low = colour.gradient.scale$left[1],
456458
high = colour.gradient.scale$left[2],
457459
breaks = colour.gradient.breaks,
458460
limits = colour.gradient.limits) +
459461
labs(colour = "log2(exp)") +
460462
ggnewscale::new_scale_colour() +
461-
geom_point(data = dplyr::filter(df.colour.gradient, log2FC > 0),
462-
mapping = aes(x = log2exp, y = log2FC, colour = log2exp)) +
463+
geom_point(data = dplyr::filter(df.colour.gradient, .data$log2FC > 0),
464+
mapping = aes(x = .data$log2exp, y = .data$log2FC, colour = .data$log2exp)) +
463465
scale_colour_gradient(low = colour.gradient.scale$right[1],
464466
high = colour.gradient.scale$right[2],
465467
breaks = colour.gradient.breaks,
@@ -480,12 +482,12 @@ ma_enhance <- function(
480482
if(!is.null(annotation)){
481483
df <- df %>%
482484
dplyr::mutate(
483-
symbol = annotation$SYMBOL[match(gene, annotation$ENSEMBL)],
484-
name = ifelse(is.na(symbol), gene, symbol)
485+
symbol = annotation$SYMBOL[match(.data$gene, annotation$ENSEMBL)],
486+
name = ifelse(is.na(.data$symbol), .data$gene, .data$symbol)
485487
) %>%
486-
dplyr::select(-symbol)
488+
dplyr::select(-.data$symbol)
487489
}else{
488-
df <- df %>% dplyr::mutate(name = gene)
490+
df <- df %>% dplyr::mutate(name = .data$gene)
489491
}
490492

491493
df.label <- tibble::tibble()
@@ -494,7 +496,7 @@ ma_enhance <- function(
494496
genes.to.label <- df$name[(match(genes.to.label, c(df$name, df$gene)) - 1) %% nrow(df) + 1]
495497
genes.to.label <- unique(genes.to.label[!is.na(genes.to.label)])
496498
genes.to.rename <- genes.to.rename[genes.to.rename %in% genes.to.label]
497-
df.label <- dplyr::filter(df, name %in% genes.to.label)
499+
df.label <- dplyr::filter(df, .data$name %in% genes.to.label)
498500
df.label$name[match(genes.to.rename, df.label$name)] <- names(genes.to.rename)
499501
if(nrow(df.label) == 0){
500502
message(paste0("add.labels.custom was TRUE but no genes specified; ",
@@ -504,11 +506,11 @@ ma_enhance <- function(
504506

505507
if(add.labels.auto){
506508
if(length(n.labels.auto) == 1) n.labels.auto <- rep(n.labels.auto, 3)
507-
df.significant <- dplyr::filter(df, !(name %in% genes.to.label))
509+
df.significant <- dplyr::filter(df, !(.data$name %in% genes.to.label))
508510
df.significant <- df.significant[order(abs(df.significant$log2FC), decreasing=TRUE), ]
509511
df.highest.lfc <- utils::head(df.significant, n.labels.auto[1])
510512
df.rest <- utils::tail(df.significant, nrow(df.significant) - n.labels.auto[1]) %>%
511-
dplyr::filter(abs(log2FC) > lfc.threshold, log10pval < logp.threshold)
513+
dplyr::filter(abs(.data$log2FC) > lfc.threshold, .data$log10pval < logp.threshold)
512514

513515
df.rest <- df.rest[order(abs(df.rest$log10pval), decreasing=TRUE), ]
514516
df.lowest.p.vals <- utils::head(df.rest, n.labels.auto[2])
@@ -518,16 +520,18 @@ ma_enhance <- function(
518520
df.highest.abn <- utils::head(df.rest, n.labels.auto[3])
519521

520522
df.label <- rbind(df.lowest.p.vals, df.highest.lfc, df.highest.abn, df.label) %>%
521-
dplyr::distinct(name, .keep_all = TRUE)
523+
dplyr::distinct(.data$name, .keep_all = TRUE)
522524
}
523525

524526
set.seed(seed = seed)
525527
p <- p +
526-
ggrepel::geom_label_repel(data = df.label,
527-
mapping = aes(x = log2exp, y = log2FC, label = name),
528-
max.overlaps = Inf,
529-
force = label.force,
530-
point.size = NA)
528+
ggrepel::geom_label_repel(
529+
data = df.label,
530+
mapping = aes(x = .data$log2exp, y = .data$log2FC, label = .data$name),
531+
max.overlaps = Inf,
532+
force = label.force,
533+
point.size = NA
534+
)
531535
}
532536
return(p)
533537
}

R/DEplotPanel.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ DEplotPanelServer <- function(id, DEresults, anno){
139139
}else{
140140
data <- results$DEtableSubset
141141
}
142-
data <- data %>% dplyr::mutate(`-log10pval` = -log10(pvalAdj))
142+
data <- data %>% dplyr::mutate(`-log10pval` = -log10(.data$pvalAdj))
143143
nearPoints(df = data, coordinfo = input[['plot_click']], threshold = 20, maxpoints = 10)
144144
}, digits = 4)
145145

R/DEsummaryFuns.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
#' Create heatmap of an expression matrix
22
#' @description This function creates a heatmap to visualise an expression matrix
33
#' @inheritParams generateShinyApp
4+
#' @inheritParams rescale_matrix
5+
#' @param expression.matrix.subset a subset of rows from the expression matrix;
6+
#' rows correspond to genes and columns correspond to samples
47
#' @param top.annotation.ids a vector of column indices denoting which columns
58
#' of the metadata should become heatmap annotations
6-
#' @param show.columns.names whether to show the column names below the heatmap;
9+
#' @param show.column.names whether to show the column names below the heatmap;
710
#' default is TRUE
811
#' @return The heatmap as detailed in the ComplexHeatmap package.
912
#' @export

0 commit comments

Comments
 (0)