Skip to content

Commit d1179b5

Browse files
authored
update functions
1 parent 6a66911 commit d1179b5

2 files changed

Lines changed: 71 additions & 43 deletions

File tree

BiomarkerFunctions.R

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,14 +161,30 @@ AnovaFDRcurve<-function(AnovaResList,ResultNames=c("CCR","CERES","CCRJ"),batchle
161161

162162
}
163163

164-
plotBMres<-function(AnovaResList,plotcolours,fdr=0.05,preProc=c("CCR","CERES","CCRJ"),plotName=""){
164+
plotBMres<-function(AnovaResList,plotcolours,fdr=0.05,preProc=c("CCR","CERES","CCRJ"),plotName="",CFEtype="All"){
165165
batchnames<-names(AnovaResList)
166166

167167
BMData<-NULL
168168
for(i in 1:length(AnovaResList)){
169169
for(j in 1:3){
170-
temp<-data.frame(nmarkers=sum(AnovaResList[[i]][[j]]$fdr<fdr),preProc=preProc[j],batch=batchnames[i])
171-
BMData<-rbind(BMData,temp)
170+
if(CFEtype=="All"){
171+
temp<-data.frame(nmarkers=sum(AnovaResList[[i]][[j]]$fdr<fdr),preProc=preProc[j],batch=batchnames[i])
172+
BMData<-rbind(BMData,temp)
173+
}
174+
if(CFEtype%in%c("_mut","_HypMET")){
175+
Ares<-AnovaResList[[i]][[j]]
176+
Ares<-Ares[grep(CFEtype,Ares$CFE),]
177+
temp<-data.frame(nmarkers=sum(Ares$fdr<fdr),preProc=preProc[j],batch=batchnames[i])
178+
BMData<-rbind(BMData,temp)
179+
}
180+
if(CFEtype=="CNA"){
181+
Ares<-AnovaResList[[i]][[j]]
182+
Ares<-Ares[grep("_mut",Ares$CFE,invert=TRUE),]
183+
Ares<-Ares[grep("_HypMET",Ares$CFE,invert=TRUE),]
184+
Ares<-Ares[grep("MSI",Ares$CFE,invert=TRUE),]
185+
temp<-data.frame(nmarkers=sum(Ares$fdr<fdr),preProc=preProc[j],batch=batchnames[i])
186+
BMData<-rbind(BMData,temp)
187+
}
172188
}
173189

174190
}

Combat_HKfunctions.R

Lines changed: 52 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
EssNonEss_plot<-function(DataMatrix,essgenes,nonessgenes,filename,fdr=0.05){
32
if(is.list(DataMatrix)){
43
DataVectors<-lapply(DataMatrix,function(x) as.vector(unlist(x)))
@@ -54,7 +53,7 @@ combatadj<-function(Data1,Data2,fdr=0.05,distmetric="Cor",subsetByQC=NULL,genese
5453
ingenes<-intersect(rownames(Data1),rownames(Data2))
5554
combinedData<-cbind(Data1[ingenes,],Data2[ingenes,])
5655
if(!is.null(subsetByQC)){
57-
clnames<-strsplit(colnames(combinedData),'---')
56+
clnames<-str_split(colnames(combinedData),'---')
5857
clnames<-unlist(lapply(clnames,function(x){x[1]}))
5958

6059
uc<-unique(clnames)
@@ -99,7 +98,7 @@ combatadj<-function(Data1,Data2,fdr=0.05,distmetric="Cor",subsetByQC=NULL,genese
9998

10099
stdData<-combinedData
101100
}
102-
site<-strsplit(colnames(combinedData),'---')
101+
site<-str_split(colnames(combinedData),'---')
103102
site<-unlist(site)
104103
site<-site[seq(2,length(site),2)]
105104
names(site)<-colnames(combinedData)
@@ -174,7 +173,7 @@ classPerfCP<-function(dataset,qualityTH=Inf,QC=NULL,weights=NULL,geneset=NULL,di
174173
}
175174

176175
if(length(QC)>0){
177-
clnames<-strsplit(colnames(dataset),'---')
176+
clnames<-str_split(colnames(dataset),'---')
178177
clnames<-unlist(lapply(clnames,function(x){x[1]}))
179178

180179
uc<-unique(clnames)
@@ -185,10 +184,10 @@ classPerfCP<-function(dataset,qualityTH=Inf,QC=NULL,weights=NULL,geneset=NULL,di
185184
dataset<-dataset[,QC>=qualityTH]
186185
}
187186

188-
clnames<-strsplit(colnames(dataset),'---')
187+
clnames<-str_split(colnames(dataset),'---')
189188
clnames<-unlist(lapply(clnames,function(x){x[1]}))
190189

191-
site<-strsplit(colnames(dataset),'---')
190+
site<-str_split(colnames(dataset),'---')
192191
site<-unlist(lapply(site,function(x){x[[2]]}))
193192
if(distmetric=="Cor"){
194193
cdist<-as.matrix(1-cor(dataset))
@@ -1154,7 +1153,31 @@ RemovePC<-function(data,droppcanumber=1,perfCheck=TRUE){
11541153
return(correctedData)
11551154
}
11561155
}
1156+
PCextract<-function(data,pcanumber=1){
1157+
if(sum(is.na(data))!=0){
1158+
#Have NAs and need to impute missing values
1159+
#data is genes x cell lines
1160+
meanVals<-rowMeans(data,na.rm=TRUE)
1161+
genesToimpute<-which(rowSums(is.na(data))!=0)
1162+
for(i in 1:length(genesToimpute)){
1163+
selcl<-which(is.na(data[genesToimpute[i],]))
1164+
data[genesToimpute[i],selcl]<-meanVals[genesToimpute[i]]
1165+
}
1166+
}
1167+
estpca<-prcomp(t(data),scale.=TRUE)
1168+
npcas<-1:ncol(data)
1169+
pcause<-npcas[npcas%in%pcanumber]
11571170

1171+
return(estpca$rotation[,pcause])
1172+
1173+
}
1174+
PCvals<-function(inputdata,PCs){
1175+
PCout<-list()
1176+
for(i in 1:ncol(PCs)){
1177+
PCout[[i]]<-t(PCs[,i])%*%inputdata
1178+
}
1179+
return(PCout)
1180+
}
11581181
silhouetteScores<-function(clusterlabels,distmat){
11591182
if(!is.numeric(clusterlabels)){
11601183
clustercodes<-as.integer(1:length(unique(clusterlabels)))
@@ -1293,10 +1316,10 @@ classPerfFeature<-function(dataset,qualityTH=Inf,QC=NULL,weights=NULL,geneset=NU
12931316
}
12941317

12951318
distPlotCP<-function(dataset,title,XLIM,YLIMS,extraDist=NULL,weights=TRUE){
1296-
clnames<-strsplit(colnames(dataset),'---')
1319+
clnames<-str_split(colnames(dataset),'---')
12971320
clnames<-unlist(lapply(clnames,function(x){x[1]}))
12981321

1299-
site<-strsplit(colnames(dataset),'---')
1322+
site<-str_split(colnames(dataset),'---')
13001323
site<-unlist(lapply(site,function(x){x[[2]]}))
13011324

13021325

@@ -1316,10 +1339,10 @@ distPlotCP<-function(dataset,title,XLIM,YLIMS,extraDist=NULL,weights=TRUE){
13161339
}
13171340
if(!is.null(extraDist)){
13181341
if(weights){
1319-
clnamese<-strsplit(colnames(extraDist),'---')
1342+
clnamese<-str_split(colnames(extraDist),'---')
13201343
clnamese<-unlist(lapply(clnamese,function(x){x[1]}))
13211344

1322-
sitee<-strsplit(colnames(extraDist),'---')
1345+
sitee<-str_split(colnames(extraDist),'---')
13231346
sitee<-unlist(lapply(sitee,function(x){x[2]}))
13241347

13251348

@@ -1419,7 +1442,7 @@ if(is.null(labels)){labels=names(Reslist)}
14191442
#lines(1:npoints,100*1:npoints*1/npoints,col=makeTransparent('black',150))
14201443
labels[1]<-paste(labels[1],percent(Res1[1]))
14211444
for(i in 2:length(Reslist)){
1422-
lines(1:npoints,100*Reslist[[i]],col=collist[i],lwd=5,type='l',lty=ltylist[i])
1445+
lines(1:npoints,100*Reslist[[i]],col=collist[i],lwd=4,type='l',lty=ltylist[i])
14231446
labels[i]<-paste(labels[i],percent(Reslist[[i]][1]))
14241447
}
14251448

@@ -1440,12 +1463,11 @@ nAUClist<-lapply(Reslist,function(x) trapz(1:npoints,100*x)/(100*npoints))
14401463
pdf(filename,width=3,height=3)
14411464
par(mar=c(2.0,2.0,0.1,0.2)+0.1,mgp=c(1,0.25,0))
14421465
if(is.null(xlim)){
1443-
plot(100*Reslist,frame.plot = FALSE,col=collist[1],lwd=5,type='l',
1444-
xlab='neighbourhood',ylab='% cell lines matching counterpart',cex=0.8,cex.axis=0.8,cex.lab=0.8)
1445-
}else{
1466+
plot(100*Reslist,frame.plot = FALSE,col=collist[1],lwd=5,type='l',
1467+
xlab='neighbourhood',ylab='% cell lines matching counterpart',cex=0.8,cex.axis=0.8,cex.lab=0.8)}else{
14461468
plot(100*Reslist,frame.plot = FALSE,col=collist[1],lwd=5,type='l',xlim=xlim,
14471469
xlab='neighbourhood',ylab='% cell lines matching counterpart',cex=0.8,cex.axis=0.8,cex.lab=0.8)
1448-
}
1470+
}
14491471
lines(1:npoints,100*1:npoints*1/npoints,col=makeTransparent('black',150))
14501472
nauc<-trapz(1:npoints,100*Reslist)/(100*npoints)
14511473
legend('bottomright',legend=paste(labels,
@@ -2011,11 +2033,11 @@ decodeCNA_cp<-function(MoBEM){
20112033
rn <- rownames(MoBEM)
20122034
ii <- grep("cna", rownames(MoBEM))
20132035
cnaId <- rownames(MoBEM)[ii]
2014-
containedGenes <- unlist(lapply(strsplit(cnaId, " "), function(x) {
2036+
containedGenes <- unlist(lapply(str_split(cnaId, " "), function(x) {
20152037
x[2]
20162038
}))
20172039
containedGenes[is.na(containedGenes)] <- ""
2018-
segments <- unlist(lapply(strsplit(unlist(lapply(strsplit(cnaId,
2040+
segments <- unlist(lapply(str_split(unlist(lapply(str_split(cnaId,
20192041
":"), function(x) {
20202042
x[2]
20212043
})), " "), function(x) {
@@ -2251,13 +2273,12 @@ avgOverlap<-function(data,overlapAnnot,divergentCL=NULL){
22512273

22522274
for(i in 1:nrow(overlapAnnot)){
22532275
subdata<-data[,colnames(data)%in%unlist(overlapAnnot[i,c("model_id","BROAD_ID")])]
2254-
print(i)
2255-
if(length(subdata)>nrow(data)){
2276+
2277+
if(ncol(subdata)>1){
22562278
newdata<-matrix(rowMeans(as.matrix(subdata)),ncol=1)
22572279
colnames(newdata)<-overlapAnnot[i,"model_id"]
22582280
}else{
2259-
newdata<-matrix(subdata,ncol=1)
2260-
colnames(newdata)<-overlapAnnot[i,"model_id"]
2281+
newdata<-subdata
22612282
}
22622283

22632284
if(i==1){
@@ -2313,7 +2334,10 @@ updateRownames<-function(inputdata,Map){
23132334
newnames[is.na(newnames)]<-names(newnames)[is.na(newnames)]
23142335
newnames<-newnames[newnames%in%names(table(newnames))[table(newnames)==1]]
23152336
inputdata<-inputdata[names(newnames),]
2316-
rownames(inputdata)<-newnames
2337+
if(is.matrix(inputdata)){
2338+
rownames(inputdata)<-newnames}else{
2339+
names(inputdata)<-newnames
2340+
}
23172341
return(inputdata)
23182342
}
23192343

@@ -2408,13 +2432,14 @@ FindSignif<-function(inputset,integratedset,fdr=0.05){
24082432
for(i in 1:length(tissues)){
24092433
i1<-inputset[[tissues[i]]]
24102434
i2<-integratedset[[tissues[i]]]
2435+
i1$fdr<-p.adjust(i1$p,method="fdr")
2436+
i2$fdr<-p.adjust(i2$p,method="fdr")
24112437
bothtest<-intersect(rownames(i1),rownames(i2))
24122438
#print(head(bothtest))
24132439
corTest[[i]]<-cor.test(i1[bothtest,"delta"],i2[bothtest,"delta"])
24142440
i1<-i1[bothtest,]
24152441
i2<-i2[bothtest,]
2416-
i1$fdr<-p.adjust(i1$p,method="fdr")
2417-
i2$fdr<-p.adjust(i2$p,method="fdr")
2442+
24182443

24192444
signInt<-i2[i2$fdr<fdr&i1$fdr>fdr,]
24202445
signInt$nposInd<-i1[i2$fdr<fdr&i1$fdr>fdr,"npos"]
@@ -2895,11 +2920,11 @@ fr<-function(testgenes,controlgenes){
28952920
sum(controlgenes%in%testgenes)/length(controlgenes)
28962921
}
28972922
#https://github.com/cancerdatasci/ceres/blob/master/R/scale_to_essentials.R
2898-
scale_to_essentials <- function(ge_fit){
2923+
scale_to_essentials <- function(ge_fit,essgenes,nonessgenes){
28992924

29002925

2901-
essential_indices <- which(row.names(ge_fit) %in% ceres::hart_essentials[["Gene"]])
2902-
nonessential_indices <- which(row.names(ge_fit) %in% ceres::hart_nonessentials[["Gene"]])
2926+
essential_indices <- which(row.names(ge_fit) %in% essgenes)
2927+
nonessential_indices <- which(row.names(ge_fit) %in% nonessgenes)
29032928

29042929
scaled_ge_fit <- ge_fit %>%
29052930
apply(2, function(x){
@@ -3136,16 +3161,3 @@ AovFactors<-function(InputDF){
31363161
AovRes<-aov(data~Batch+PreProc)
31373162

31383163
}
3139-
cohens_d <- function(x, y) {
3140-
lx <- length(x)- 1
3141-
ly <- length(y)- 1
3142-
3143-
md <- abs(mean(x) - mean(y)) ## mean difference (numerator)
3144-
csd <- lx * var(x) + ly * var(y)
3145-
csd <- csd/(lx + ly)
3146-
csd <- sqrt(csd) ## common sd computation
3147-
3148-
cd <- md/csd ## cohen's d
3149-
3150-
return(cd)
3151-
}

0 commit comments

Comments
 (0)