Skip to content

Commit ecfe12a

Browse files
jgranja24rcorces
andauthored
Release 1.0.1 (#542)
* bugfix remove sys.time for weird error in R 4.0 * bug fix new uwot save and loading features * deprecated * version number * handle error where spurious fragments seem to be 1 tile above max * update bugfixes * let quantCut in plotTrajectory be null * grepExclude in plotTrajectoryHeatmap * bugfix integrative analysis coaccessibility and peak2genelinks * change file locking location groupcoverages * throw error if no peakset for adding annotations * make it so predictionScore is not needed for peak2gene links * update addPeakSet * updated description * bugfix named list * update error handling plotEnrichHeatmap * update error messages tilematrix * handle error with bsgenomes * width is end - start + 1 * bugfix checkCairo in iterativeLSI * update archr verbosity and logging for markerfeatures * bugfix + documentation * update subsetting to metadata * handling case n = 1 for deviations matrix * bugfix ordering of scTrack * handle error where no barcodes passing on a small chromosome/scaffold * bugfix validBSgenome * fix missing paren * fix missing paren * add feature for supplying custom gene list for rna integration * add sanity check to partial matrix * update cellsInArrow in case user overrides sample in ArchRProj * Add null option for genesUse to validInput * bugfix NA in combined vars * add checks for genes symbol to be not a list Co-authored-by: Ryan Corces <ryancorces@gmail.com>
1 parent 46b519f commit ecfe12a

33 files changed

Lines changed: 379 additions & 85 deletions

.DS_Store

0 Bytes
Binary file not shown.

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: ArchR
22
Type: Package
3-
Date: 2020-10-01
3+
Date: 2020-11-23
44
Title: Analyzing single-cell regulatory chromatin in R.
5-
Version: 1.0.0
5+
Version: 1.0.1
66
Authors@R: c(
77
person("Jeffrey", "Granja", email = "jgranja.stanford@gmail.com", role = c("aut","cre")),
88
person("Ryan", "Corces", role = "aut"))
@@ -11,7 +11,7 @@ Roxygen: list(markdown = TRUE)
1111
License: GPL (>= 2)
1212
LinkingTo: Rcpp
1313
LazyData: TRUE
14-
RoxygenNote: 7.0.2
14+
RoxygenNote: 7.1.1
1515
Encoding: UTF-8
1616
Imports:
1717
Rcpp (>= 0.12.16),

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export(addArchRDebugging)
1717
export(addArchRGenome)
1818
export(addArchRLogging)
1919
export(addArchRThreads)
20+
export(addArchRVerbose)
2021
export(addBgdPeaks)
2122
export(addCellColData)
2223
export(addClusters)
@@ -68,6 +69,7 @@ export(getArchRDebugging)
6869
export(getArchRGenome)
6970
export(getArchRLogging)
7071
export(getArchRThreads)
72+
export(getArchRVerbose)
7173
export(getArrowFiles)
7274
export(getAvailableMatrices)
7375
export(getBgdPeaks)

R/AllClasses.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ ArchRProject <- function(
130130

131131
message("Getting SampleNames...")
132132
sampleNames <- unlist(.safelapply(seq_along(ArrowFiles), function(x){
133-
message(x, " ", appendLF = FALSE)
133+
if(getArchRVerbose()) message(x, " ", appendLF = FALSE)
134134
.sampleName(ArrowFiles[x])
135135
}, threads = threads))
136136
message("")
@@ -159,7 +159,7 @@ ArchRProject <- function(
159159
#Cell Information
160160
message("Getting Cell Metadata...")
161161
metadataList <- .safelapply(seq_along(ArrowFiles), function(x){
162-
message(x, " ", appendLF = FALSE)
162+
if(getArchRVerbose()) message(x, " ", appendLF = FALSE)
163163
.getMetadata(ArrowFiles[x])
164164
}, threads = threads)
165165
message("")

R/AnnotationGenome.R

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,14 @@ createGenomeAnnotation <- function(
4141

4242
##################
4343
message("Getting blacklist..")
44-
blacklist <- .getBlacklist(genome = bsg@provider_version)
44+
45+
genomeName <- tryCatch({
46+
bsg@provider_version
47+
}, error = function(e){
48+
strsplit(bsg@pkgname,"\\.")[[1]][4]
49+
})
50+
51+
blacklist <- .getBlacklist(genome = genomeName)
4552

4653
}else{
4754

R/AnnotationPeaks.R

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ getMatches <- function(ArchRProj = NULL, name = NULL, annoName = NULL){
9898
#' binary value is stored indicating whether each region is observed within the peak region.
9999
#'
100100
#' @param ArchRProj An `ArchRProject` object.
101-
#' @param regions A `list` of `GRanges` that are to be overlapped with the `peakSet` in the `ArchRProject`.
101+
#' @param regions A named `list` of `GRanges` that are to be overlapped with the `peakSet` in the `ArchRProject`.
102102
#' @param name The name of `peakAnnotation` object to be stored as in `ArchRProject`.
103103
#' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `name` to be overwritten
104104
#' if it already exists in the given `ArchRProject`.
@@ -136,6 +136,10 @@ addPeakAnnotations <- function(
136136

137137
}else{
138138

139+
if(is.null(names(regions))){
140+
names(regions) <- paste0("Region_", seq_along(regions))
141+
}
142+
139143
regionPositions <- lapply(seq_along(regions), function(x){
140144

141145
.logThis(regions[[x]], paste0("regions[[x]]-", x), logFile = logFile)
@@ -184,6 +188,9 @@ addPeakAnnotations <- function(
184188
# Peak Overlap Matrix
185189
#############################################################
186190
peakSet <- getPeakSet(ArchRProj)
191+
if(is.null(peakSet)){
192+
.logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile)
193+
}
187194
allPositions <- unlist(regionPositions)
188195

189196
.logDiffTime("Creating Peak Overlap Matrix", t1 = tstart, verbose = TRUE, logFile = logFile)
@@ -431,6 +438,9 @@ addMotifAnnotations <- function(
431438
#############################################################
432439
.logDiffTime("Finding Motif Positions with motifmatchr!", t1 = tstart, verbose = TRUE, logFile = logFile)
433440
peakSet <- ArchRProj@peakSet
441+
if(is.null(peakSet)){
442+
.logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile)
443+
}
434444
motifPositions <- motifmatchr::matchMotifs(
435445
pwms = motifs,
436446
subject = peakSet,
@@ -606,7 +616,11 @@ addArchRAnnotations <- function(
606616
}
607617
}
608618

609-
genome <- tolower(validBSgenome(getGenome(ArchRProj))@provider_version)
619+
genome <- tolower(tryCatch({
620+
validBSgenome(getGenome(ArchRProj))$provider_version
621+
}, error = function(e){
622+
strsplit(validBSgenome(getGenome(ArchRProj))@pkgname,"\\.")[[1]][4]
623+
}))
610624

611625
annoPath <- file.path(find.package("ArchR", NULL, quiet = TRUE), "data", "Annotations")
612626
dir.create(annoPath, showWarnings = FALSE)
@@ -690,6 +704,9 @@ addArchRAnnotations <- function(
690704
# Peak Overlap Matrix
691705
#############################################################
692706
peakSet <- getPeakSet(ArchRProj)
707+
if(is.null(peakSet)){
708+
.logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile)
709+
}
693710
chr <- paste0(unique(seqnames(peakSet)))
694711

695712
.logMessage("Annotating Chromosomes", verbose = TRUE, logFile = logFile)
@@ -1061,6 +1078,10 @@ plotEnrichHeatmap <- function(
10611078
mat <- mat[keep, ,drop = FALSE]
10621079
.logThis(mat, "mat-mlog10Padj-Filter", logFile = logFile)
10631080

1081+
if(nrow(mat)==0){
1082+
stop("No enrichments found for your cutoff!")
1083+
}
1084+
10641085
passMat <- lapply(seq_len(nrow(mat)), function(x){
10651086
(mat[x, ] >= 0.9*max(mat[x, ])) * 1
10661087
}) %>% Reduce("rbind", .) %>% data.frame
@@ -1070,7 +1091,7 @@ plotEnrichHeatmap <- function(
10701091
mat[mat > pMax] <- pMax
10711092

10721093
if(nrow(mat)==0){
1073-
stop("No enrichments found!")
1094+
stop("No enrichments found for your cutoff!")
10741095
}
10751096

10761097
mat <- .rowScale(as.matrix(mat), min = 0)

R/ArchRBrowser.R

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,6 +1749,12 @@ plotBrowserTrack <- function(
17491749

17501750
title <- paste0(as.character(seqnames(region)),":", start(region)-1, "-", end(region), " ", title)
17511751

1752+
#Re-Order
1753+
groupDF$group2 <- factor(
1754+
paste0(groupDF$group2),
1755+
levels = gtools::mixedsort(unique(paste0(groupDF$group2)))
1756+
)
1757+
17521758
p <- ggplot(groupDF, aes(x=bp, y=y, width = tileSize, fill = group2, color = group2)) +
17531759
geom_tile(size = scTileSize) +
17541760
facet_grid(group2 ~ ., scales="free_y") +

R/ArrowRead.R

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,11 @@ getMatrixFromArrow <- function(
783783

784784
matFiles <- lapply(mat, function(x) x[[2]]) %>% Reduce("c", .)
785785
mat <- lapply(mat, function(x) x[[1]]) %>% Reduce("cbind", .)
786+
if(!all(cellNames %in% colnames(mat))){
787+
.logThis(sampledCellNames, "cellNames supplied", logFile = logFile)
788+
.logThis(colnames(mat), "cellNames from matrix", logFile = logFile)
789+
stop("Error not all cellNames found in partialMatrix")
790+
}
786791
mat <- mat[,sampledCellNames, drop = FALSE]
787792
mat <- .checkSparseMatrix(mat, length(sampledCellNames))
788793

@@ -793,6 +798,11 @@ getMatrixFromArrow <- function(
793798
}else{
794799

795800
mat <- Reduce("cbind", mat)
801+
if(!all(cellNames %in% colnames(mat))){
802+
.logThis(cellNames, "cellNames supplied", logFile = logFile)
803+
.logThis(colnames(mat), "cellNames from matrix", logFile = logFile)
804+
stop("Error not all cellNames found in partialMatrix")
805+
}
796806
mat <- mat[,cellNames, drop = FALSE]
797807
mat <- .checkSparseMatrix(mat, length(cellNames))
798808

@@ -902,6 +912,16 @@ getMatrixFromArrow <- function(
902912
stop("Means Variances and Ns lengths not identical")
903913
}
904914

915+
#Check if samples have NAs due to N = 1 sample or some other weird thing.
916+
#Set it to min non NA variance
917+
dfVars <- lapply(seq_len(nrow(dfVars)), function(x){
918+
vx <- dfVars[x, ]
919+
if(any(is.na(vx))){
920+
vx[is.na(vx)] <- min(vx[!is.na(vx)])
921+
}
922+
vx
923+
}) %>% Reduce("rbind", .)
924+
905925
combinedMeans <- rowSums(t(t(dfMeans) * ns)) / sum(ns)
906926
summedVars <- rowSums(t(t(dfVars) * (ns - 1)) + t(t(dfMeans^2) * ns))
907927
combinedVars <- (summedVars - sum(ns)*combinedMeans^2)/(sum(ns)-1)
@@ -925,8 +945,6 @@ getMatrixFromArrow <- function(
925945
length(.availableCells(ArrowFiles[y], useMatrix))
926946
}) %>% unlist
927947

928-
929-
930948
#Compute RowVars
931949
summaryDF <- .safelapply(seq_along(featureDF), function(x){
932950

R/ArrowUtils.R

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,10 +379,20 @@
379379
o <- h5createGroup(outArrow, groupName)
380380

381381
mData <- ArrowInfo[[groupName]]
382+
cellNames <- .h5read(inArrow, "Metadata/CellNames")
383+
idx <- which(cellNames %in% stringr::str_split(cellsKeep, pattern="#", simplify=TRUE)[,2])
382384

385+
if(length(idx)==0){
386+
stop("No cells matching in arrow file!")
387+
}
388+
383389
for(i in seq_len(nrow(mData))){
384390
h5name <- paste0(groupName, "/", mData$name[i])
385-
h5write(.h5read(inArrow, h5name), file = outArrow, name = h5name)
391+
mDatai <- .h5read(inArrow, h5name)
392+
if(length(mDatai)==length(cellNames)){
393+
mDatai <- mDatai[idx]
394+
}
395+
h5write(mDatai, file = outArrow, name = h5name)
386396
}
387397

388398
#2. scATAC-Fragments

R/CreateArrow.R

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ createArrowFiles <- function(
473473

474474
.logDiffTime("Continuing through after error ggplot for Fragment Size Distribution", t1 = tstart, logFile = logFile)
475475
#print(x)
476-
message("\n")
476+
if(getArchRVerbose()) message("\n")
477477

478478
})
479479
gc()
@@ -537,7 +537,7 @@ createArrowFiles <- function(
537537

538538
.logDiffTime("Continuing through after error ggplot for TSS by Frags", t1 = tstart, logFile = logFile)
539539
#message(x)
540-
message("\n")
540+
if(getArchRVerbose()) message("\n")
541541

542542
})
543543

@@ -1119,7 +1119,7 @@ createArrowFiles <- function(
11191119
TRUE
11201120
}, error = function(x){
11211121
tryCatch({
1122-
message("Attempting to index ", file," as tabix..")
1122+
if(getArchRVerbose()) message("Attempting to index ", file," as tabix..")
11231123
indexTabix(file, format = "bed")
11241124
TRUE
11251125
}, error = function(y){
@@ -1138,7 +1138,7 @@ createArrowFiles <- function(
11381138
}
11391139
}, error = function(x){
11401140
tryCatch({
1141-
message("Attempting to index ", file," as bam...")
1141+
if(getArchRVerbose()) message("Attempting to index ", file," as bam...")
11421142
indexBam(file)
11431143
TRUE
11441144
}, error = function(y){
@@ -1278,7 +1278,7 @@ createArrowFiles <- function(
12781278
o <- .suppressAll(h5createDataset(tmpFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0))
12791279
o <- .suppressAll(h5createDataset(tmpFile, chrRGValues, storage.mode = "character",
12801280
dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1))
1281-
o <- h5write(obj = cbind(dt$V2,dt$V3-dt$V2), file = tmpFile, name = chrPos)
1281+
o <- h5write(obj = cbind(dt$V2,dt$V3 - dt$V2 + 1), file = tmpFile, name = chrPos)
12821282
o <- h5write(obj = RG@lengths, file = tmpFile, name = chrRGLengths)
12831283
o <- h5write(obj = RG@values, file = tmpFile, name = chrRGValues)
12841284

@@ -1314,7 +1314,7 @@ createArrowFiles <- function(
13141314
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character",
13151315
dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1))
13161316

1317-
o <- h5write(obj = cbind(dt$V2,dt$V3-dt$V2), file = tmpChrFile, name = chrPos)
1317+
o <- h5write(obj = cbind(dt$V2,dt$V3 - dt$V2 + 1), file = tmpChrFile, name = chrPos)
13181318
o <- h5write(obj = RG@lengths, file = tmpChrFile, name = chrRGLengths)
13191319
o <- h5write(obj = RG@values, file = tmpChrFile, name = chrRGValues)
13201320

@@ -1650,7 +1650,7 @@ createArrowFiles <- function(
16501650
o <- .suppressAll(h5createDataset(tmpFile, chrRGValues, storage.mode = "character",
16511651
dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1))
16521652

1653-
o <- h5write(obj = cbind(dt$start,dt$end-dt$start), file = tmpFile, name = chrPos)
1653+
o <- h5write(obj = cbind(dt$start, dt$end - dt$start + 1), file = tmpFile, name = chrPos)
16541654
o <- h5write(obj = RG@lengths, file = tmpFile, name = chrRGLengths)
16551655
o <- h5write(obj = RG@values, file = tmpFile, name = chrRGValues)
16561656

@@ -1686,7 +1686,7 @@ createArrowFiles <- function(
16861686
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character",
16871687
dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1))
16881688

1689-
o <- h5write(obj = cbind(dt$start,dt$end-dt$start), file = tmpChrFile, name = chrPos)
1689+
o <- h5write(obj = cbind(dt$start, dt$end - dt$start + 1), file = tmpChrFile, name = chrPos)
16901690
o <- h5write(obj = RG@lengths, file = tmpChrFile, name = chrRGLengths)
16911691
o <- h5write(obj = RG@values, file = tmpChrFile, name = chrRGValues)
16921692

@@ -1930,14 +1930,29 @@ createArrowFiles <- function(
19301930
chrPos <- paste0("Fragments/",chr,"/Ranges")
19311931
chrRGLengths <- paste0("Fragments/",chr,"/RGLengths")
19321932
chrRGValues <- paste0("Fragments/",chr,"/RGValues")
1933-
o <- h5createGroup(outArrow, paste0("Fragments/",chr))
1934-
o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0))
1935-
o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0))
1936-
o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0,
1937-
size = max(nchar(mcols(fragments)$RG@values)) + 1))
1938-
o <- h5write(obj = cbind(start(fragments),width(fragments)), file = outArrow, name = chrPos)
1939-
o <- h5write(obj = mcols(fragments)$RG@lengths, file = outArrow, name = chrRGLengths)
1940-
o <- h5write(obj = mcols(fragments)$RG@values, file = outArrow, name = chrRGValues)
1933+
1934+
if(lengthRG == 0){
1935+
1936+
.logMessage(msg = paste0(prefix, " detected 0 Fragments in cells passing filtering threshold for ", chr), logFile = logFile)
1937+
1938+
o <- h5createGroup(outArrow, paste0("Fragments/",chr))
1939+
o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0))
1940+
o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0))
1941+
o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0,
1942+
size = 10))
1943+
1944+
}else{
1945+
1946+
o <- h5createGroup(outArrow, paste0("Fragments/",chr))
1947+
o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0))
1948+
o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0))
1949+
o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0,
1950+
size = max(nchar(mcols(fragments)$RG@values)) + 1))
1951+
1952+
o <- h5write(obj = cbind(start(fragments),width(fragments)), file = outArrow, name = chrPos)
1953+
o <- h5write(obj = mcols(fragments)$RG@lengths, file = outArrow, name = chrRGLengths)
1954+
o <- h5write(obj = mcols(fragments)$RG@values, file = outArrow, name = chrRGValues)
1955+
}
19411956

19421957
#Free Some Memory!
19431958
rm(fragments)
@@ -1995,15 +2010,26 @@ createArrowFiles <- function(
19952010
chrRGLengths <- paste0(chr, "._.RGLengths")
19962011
chrRGValues <- paste0(chr, "._.RGValues")
19972012

1998-
#HDF5 Write
1999-
o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0))
2000-
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0))
2001-
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0,
2002-
size = max(nchar(mcols(fragments)$RG@values)) + 1))
2003-
2004-
o <- h5write(obj = cbind(start(fragments),width(fragments)), file = tmpChrFile, name = chrPos)
2005-
o <- h5write(obj = mcols(fragments)$RG@lengths, file = tmpChrFile, name = chrRGLengths)
2006-
o <- h5write(obj = mcols(fragments)$RG@values, file = tmpChrFile, name = chrRGValues)
2013+
if(lengthRG == 0){
2014+
2015+
#HDF5 Write
2016+
o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0))
2017+
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0))
2018+
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0,
2019+
size = 10))
2020+
2021+
}else{
2022+
2023+
#HDF5 Write
2024+
o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0))
2025+
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0))
2026+
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0,
2027+
size = max(nchar(mcols(fragments)$RG@values)) + 1))
2028+
2029+
o <- h5write(obj = cbind(start(fragments),width(fragments)), file = tmpChrFile, name = chrPos)
2030+
o <- h5write(obj = mcols(fragments)$RG@lengths, file = tmpChrFile, name = chrRGLengths)
2031+
o <- h5write(obj = mcols(fragments)$RG@values, file = tmpChrFile, name = chrRGValues)
2032+
}
20072033

20082034
#Free Some Memory!
20092035
rm(fragments)

0 commit comments

Comments
 (0)