1-
21EssNonEss_plot <- function (DataMatrix ,essgenes ,nonessgenes ,filename ,fdr = 0.05 ){
32 if (is.list(DataMatrix )){
43 DataVectors <- lapply(DataMatrix ,function (x ) as.vector(unlist(x )))
@@ -54,7 +53,7 @@ combatadj<-function(Data1,Data2,fdr=0.05,distmetric="Cor",subsetByQC=NULL,genese
5453 ingenes <- intersect(rownames(Data1 ),rownames(Data2 ))
5554 combinedData <- cbind(Data1 [ingenes ,],Data2 [ingenes ,])
5655 if (! is.null(subsetByQC )){
57- clnames <- strsplit (colnames(combinedData ),' ---' )
56+ clnames <- str_split (colnames(combinedData ),' ---' )
5857 clnames <- unlist(lapply(clnames ,function (x ){x [1 ]}))
5958
6059 uc <- unique(clnames )
@@ -99,7 +98,7 @@ combatadj<-function(Data1,Data2,fdr=0.05,distmetric="Cor",subsetByQC=NULL,genese
9998
10099 stdData <- combinedData
101100 }
102- site <- strsplit (colnames(combinedData ),' ---' )
101+ site <- str_split (colnames(combinedData ),' ---' )
103102 site <- unlist(site )
104103 site <- site [seq(2 ,length(site ),2 )]
105104 names(site )<- colnames(combinedData )
@@ -174,7 +173,7 @@ classPerfCP<-function(dataset,qualityTH=Inf,QC=NULL,weights=NULL,geneset=NULL,di
174173 }
175174
176175 if (length(QC )> 0 ){
177- clnames <- strsplit (colnames(dataset ),' ---' )
176+ clnames <- str_split (colnames(dataset ),' ---' )
178177 clnames <- unlist(lapply(clnames ,function (x ){x [1 ]}))
179178
180179 uc <- unique(clnames )
@@ -185,10 +184,10 @@ classPerfCP<-function(dataset,qualityTH=Inf,QC=NULL,weights=NULL,geneset=NULL,di
185184 dataset <- dataset [,QC > = qualityTH ]
186185 }
187186
188- clnames <- strsplit (colnames(dataset ),' ---' )
187+ clnames <- str_split (colnames(dataset ),' ---' )
189188 clnames <- unlist(lapply(clnames ,function (x ){x [1 ]}))
190189
191- site <- strsplit (colnames(dataset ),' ---' )
190+ site <- str_split (colnames(dataset ),' ---' )
192191 site <- unlist(lapply(site ,function (x ){x [[2 ]]}))
193192 if (distmetric == " Cor" ){
194193 cdist <- as.matrix(1 - cor(dataset ))
@@ -1154,7 +1153,31 @@ RemovePC<-function(data,droppcanumber=1,perfCheck=TRUE){
11541153 return (correctedData )
11551154 }
11561155}
1156+ PCextract <- function (data ,pcanumber = 1 ){
1157+ if (sum(is.na(data ))!= 0 ){
1158+ # Have NAs and need to impute missing values
1159+ # data is genes x cell lines
1160+ meanVals <- rowMeans(data ,na.rm = TRUE )
1161+ genesToimpute <- which(rowSums(is.na(data ))!= 0 )
1162+ for (i in 1 : length(genesToimpute )){
1163+ selcl <- which(is.na(data [genesToimpute [i ],]))
1164+ data [genesToimpute [i ],selcl ]<- meanVals [genesToimpute [i ]]
1165+ }
1166+ }
1167+ estpca <- prcomp(t(data ),scale. = TRUE )
1168+ npcas <- 1 : ncol(data )
1169+ pcause <- npcas [npcas %in% pcanumber ]
11571170
1171+ return (estpca $ rotation [,pcause ])
1172+
1173+ }
1174+ PCvals <- function (inputdata ,PCs ){
1175+ PCout <- list ()
1176+ for (i in 1 : ncol(PCs )){
1177+ PCout [[i ]]<- t(PCs [,i ])%*% inputdata
1178+ }
1179+ return (PCout )
1180+ }
11581181silhouetteScores <- function (clusterlabels ,distmat ){
11591182 if (! is.numeric(clusterlabels )){
11601183 clustercodes <- as.integer(1 : length(unique(clusterlabels )))
@@ -1293,10 +1316,10 @@ classPerfFeature<-function(dataset,qualityTH=Inf,QC=NULL,weights=NULL,geneset=NU
12931316}
12941317
12951318distPlotCP <- function (dataset ,title ,XLIM ,YLIMS ,extraDist = NULL ,weights = TRUE ){
1296- clnames <- strsplit (colnames(dataset ),' ---' )
1319+ clnames <- str_split (colnames(dataset ),' ---' )
12971320 clnames <- unlist(lapply(clnames ,function (x ){x [1 ]}))
12981321
1299- site <- strsplit (colnames(dataset ),' ---' )
1322+ site <- str_split (colnames(dataset ),' ---' )
13001323 site <- unlist(lapply(site ,function (x ){x [[2 ]]}))
13011324
13021325
@@ -1316,10 +1339,10 @@ distPlotCP<-function(dataset,title,XLIM,YLIMS,extraDist=NULL,weights=TRUE){
13161339 }
13171340 if (! is.null(extraDist )){
13181341 if (weights ){
1319- clnamese <- strsplit (colnames(extraDist ),' ---' )
1342+ clnamese <- str_split (colnames(extraDist ),' ---' )
13201343 clnamese <- unlist(lapply(clnamese ,function (x ){x [1 ]}))
13211344
1322- sitee <- strsplit (colnames(extraDist ),' ---' )
1345+ sitee <- str_split (colnames(extraDist ),' ---' )
13231346 sitee <- unlist(lapply(sitee ,function (x ){x [2 ]}))
13241347
13251348
@@ -1419,7 +1442,7 @@ if(is.null(labels)){labels=names(Reslist)}
14191442 # lines(1:npoints,100*1:npoints*1/npoints,col=makeTransparent('black',150))
14201443 labels [1 ]<- paste(labels [1 ],percent(Res1 [1 ]))
14211444 for (i in 2 : length(Reslist )){
1422- lines(1 : npoints ,100 * Reslist [[i ]],col = collist [i ],lwd = 5 ,type = ' l' ,lty = ltylist [i ])
1445+ lines(1 : npoints ,100 * Reslist [[i ]],col = collist [i ],lwd = 4 ,type = ' l' ,lty = ltylist [i ])
14231446 labels [i ]<- paste(labels [i ],percent(Reslist [[i ]][1 ]))
14241447 }
14251448
@@ -1440,12 +1463,11 @@ nAUClist<-lapply(Reslist,function(x) trapz(1:npoints,100*x)/(100*npoints))
14401463 pdf(filename ,width = 3 ,height = 3 )
14411464 par(mar = c(2.0 ,2.0 ,0.1 ,0.2 )+ 0.1 ,mgp = c(1 ,0.25 ,0 ))
14421465 if (is.null(xlim )){
1443- plot(100 * Reslist ,frame.plot = FALSE ,col = collist [1 ],lwd = 5 ,type = ' l' ,
1444- xlab = ' neighbourhood' ,ylab = ' % cell lines matching counterpart' ,cex = 0.8 ,cex.axis = 0.8 ,cex.lab = 0.8 )
1445- }else {
1466+ plot(100 * Reslist ,frame.plot = FALSE ,col = collist [1 ],lwd = 5 ,type = ' l' ,
1467+ xlab = ' neighbourhood' ,ylab = ' % cell lines matching counterpart' ,cex = 0.8 ,cex.axis = 0.8 ,cex.lab = 0.8 )}else {
14461468 plot(100 * Reslist ,frame.plot = FALSE ,col = collist [1 ],lwd = 5 ,type = ' l' ,xlim = xlim ,
14471469 xlab = ' neighbourhood' ,ylab = ' % cell lines matching counterpart' ,cex = 0.8 ,cex.axis = 0.8 ,cex.lab = 0.8 )
1448- }
1470+ }
14491471 lines(1 : npoints ,100 * 1 : npoints * 1 / npoints ,col = makeTransparent(' black' ,150 ))
14501472 nauc <- trapz(1 : npoints ,100 * Reslist )/ (100 * npoints )
14511473 legend(' bottomright' ,legend = paste(labels ,
@@ -2011,11 +2033,11 @@ decodeCNA_cp<-function(MoBEM){
20112033 rn <- rownames(MoBEM )
20122034 ii <- grep(" cna" , rownames(MoBEM ))
20132035 cnaId <- rownames(MoBEM )[ii ]
2014- containedGenes <- unlist(lapply(strsplit (cnaId , " " ), function (x ) {
2036+ containedGenes <- unlist(lapply(str_split (cnaId , " " ), function (x ) {
20152037 x [2 ]
20162038 }))
20172039 containedGenes [is.na(containedGenes )] <- " "
2018- segments <- unlist(lapply(strsplit (unlist(lapply(strsplit (cnaId ,
2040+ segments <- unlist(lapply(str_split (unlist(lapply(str_split (cnaId ,
20192041 " :" ), function (x ) {
20202042 x [2 ]
20212043 })), " " ), function (x ) {
@@ -2251,13 +2273,12 @@ avgOverlap<-function(data,overlapAnnot,divergentCL=NULL){
22512273
22522274 for (i in 1 : nrow(overlapAnnot )){
22532275 subdata <- data [,colnames(data )%in% unlist(overlapAnnot [i ,c(" model_id" ," BROAD_ID" )])]
2254- print( i )
2255- if (length (subdata )> nrow( data ) ){
2276+
2277+ if (ncol (subdata )> 1 ){
22562278 newdata <- matrix (rowMeans(as.matrix(subdata )),ncol = 1 )
22572279 colnames(newdata )<- overlapAnnot [i ," model_id" ]
22582280 }else {
2259- newdata <- matrix (subdata ,ncol = 1 )
2260- colnames(newdata )<- overlapAnnot [i ," model_id" ]
2281+ newdata <- subdata
22612282 }
22622283
22632284 if (i == 1 ){
@@ -2313,7 +2334,10 @@ updateRownames<-function(inputdata,Map){
23132334 newnames [is.na(newnames )]<- names(newnames )[is.na(newnames )]
23142335 newnames <- newnames [newnames %in% names(table(newnames ))[table(newnames )== 1 ]]
23152336 inputdata <- inputdata [names(newnames ),]
2316- rownames(inputdata )<- newnames
2337+ if (is.matrix(inputdata )){
2338+ rownames(inputdata )<- newnames }else {
2339+ names(inputdata )<- newnames
2340+ }
23172341 return (inputdata )
23182342}
23192343
@@ -2408,13 +2432,14 @@ FindSignif<-function(inputset,integratedset,fdr=0.05){
24082432 for (i in 1 : length(tissues )){
24092433 i1 <- inputset [[tissues [i ]]]
24102434 i2 <- integratedset [[tissues [i ]]]
2435+ i1 $ fdr <- p.adjust(i1 $ p ,method = " fdr" )
2436+ i2 $ fdr <- p.adjust(i2 $ p ,method = " fdr" )
24112437 bothtest <- intersect(rownames(i1 ),rownames(i2 ))
24122438 # print(head(bothtest))
24132439 corTest [[i ]]<- cor.test(i1 [bothtest ," delta" ],i2 [bothtest ," delta" ])
24142440 i1 <- i1 [bothtest ,]
24152441 i2 <- i2 [bothtest ,]
2416- i1 $ fdr <- p.adjust(i1 $ p ,method = " fdr" )
2417- i2 $ fdr <- p.adjust(i2 $ p ,method = " fdr" )
2442+
24182443
24192444 signInt <- i2 [i2 $ fdr < fdr & i1 $ fdr > fdr ,]
24202445 signInt $ nposInd <- i1 [i2 $ fdr < fdr & i1 $ fdr > fdr ," npos" ]
@@ -2895,11 +2920,11 @@ fr<-function(testgenes,controlgenes){
28952920 sum(controlgenes %in% testgenes )/ length(controlgenes )
28962921}
28972922# https://github.com/cancerdatasci/ceres/blob/master/R/scale_to_essentials.R
2898- scale_to_essentials <- function (ge_fit ){
2923+ scale_to_essentials <- function (ge_fit , essgenes , nonessgenes ){
28992924
29002925
2901- essential_indices <- which(row.names(ge_fit ) %in% ceres :: hart_essentials [[ " Gene " ]] )
2902- nonessential_indices <- which(row.names(ge_fit ) %in% ceres :: hart_nonessentials [[ " Gene " ]] )
2926+ essential_indices <- which(row.names(ge_fit ) %in% essgenes )
2927+ nonessential_indices <- which(row.names(ge_fit ) %in% nonessgenes )
29032928
29042929 scaled_ge_fit <- ge_fit %> %
29052930 apply(2 , function (x ){
@@ -3136,16 +3161,3 @@ AovFactors<-function(InputDF){
31363161 AovRes <- aov(data ~ Batch + PreProc )
31373162
31383163}
3139- cohens_d <- function (x , y ) {
3140- lx <- length(x )- 1
3141- ly <- length(y )- 1
3142-
3143- md <- abs(mean(x ) - mean(y )) # # mean difference (numerator)
3144- csd <- lx * var(x ) + ly * var(y )
3145- csd <- csd / (lx + ly )
3146- csd <- sqrt(csd ) # # common sd computation
3147-
3148- cd <- md / csd # # cohen's d
3149-
3150- return (cd )
3151- }
0 commit comments