Skip to content

Commit 8517b35

Browse files
authored
Update integratedperformance
1 parent d1179b5 commit 8517b35

1 file changed

Lines changed: 105 additions & 127 deletions

File tree

IntegratedPerformance.R

Lines changed: 105 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ library(here)
22
library(CRISPRcleanR)
33
library(ggplot2)
44
library(beeswarm)
5-
5+
library(qusage)
66

77
dir.MergeFile<-"./Results"
88
dir.Results<-"./ResultsFilter"
@@ -18,6 +18,9 @@ cmp<-read.csv(paste0(dir.Input,"/model_list_latest.csv"),header=T,stringsAsFacto
1818
cmp2<-cmp
1919
cmp2$model_id<-cmp2$BROAD_ID
2020
cmp<-rbind(cmp,cmp2)
21+
22+
load(file=paste0(dir.MergeFile,"/AllSymbolsHNGCmap.Rdata"))
23+
2124
#start with the SQ+ComBat+PC1 merged all data
2225

2326
CCR_corrected<-readRDS(file=paste0(dir.MergeFile,"/CCR_SQ_Combat_PC1_All_merge_F.Rds"))
@@ -103,37 +106,45 @@ load(file=paste0(dir.Results,"/normLRTCERESmerge",PCnumber,".RData"))
103106
load(file=paste0(dir.Results,"/normLRTCCRJmerge",PCnumber,".Rdata"))
104107

105108

106-
load(paste0(dir.Input,'/EssGenes.DNA_REPLICATION_cons.RData'))
107-
load(paste0(dir.Input,'EssGenes.KEGG_rna_polymerase.RData'))
108-
load(paste0(dir.Input,'EssGenes.PROTEASOME_cons.RData'))
109-
load(paste0(dir.Input,'EssGenes.ribosomalProteins.RData'))
110-
load(paste0(dir.Input,'EssGenes.SPLICEOSOME_cons.RData'))
109+
load(file=paste0(dir.Input,"/Kegg.DNArep.Rdata"))
110+
load(file=paste0(dir.Input,"/Kegg.Ribosome.Rdata"))
111+
load(file=paste0(dir.Input,"/Kegg.Proteasome.Rdata"))
112+
load(file=paste0(dir.Input,"/Kegg.Spliceosome.Rdata"))
113+
load(file=paste0(dir.Input,"/Kegg.RNApoly.Rdata"))
114+
load(file=paste0(dir.Input,"/Histones.Rdata"))
115+
116+
allRefEss<-unique(c(Kegg.DNArep,Kegg.Proteasome,Kegg.Ribosome,Kegg.RNApoly,Kegg.Spliceosome,Histones))
117+
allRefEss<-unique(allSymbol[na.omit(match(allRefEss,allSymbol$Input)),"Approved.symbol"])
118+
getAllMap<-function(allSymbol,allgenes){
119+
symbolS1<-allSymbol[allSymbol$Match.type=="Approved symbol",]
120+
symbolS2<-allSymbol[allSymbol$Match.type=="Previous symbol",]
121+
symbolS3<-allSymbol[allSymbol$Match.type=="Synonyms",]
122+
allMap<-symbolS1[match(allgenes,symbolS1$Input),"Approved.symbol"]
123+
names(allMap)<-allgenes
124+
m1<-names(allMap)[is.na(allMap)]
125+
m1M<-symbolS2[match(m1,symbolS2$Input),"Approved.symbol"]
126+
names(m1M)<-m1
127+
m2<-names(m1M)[is.na(m1M)]
128+
m2M<-symbolS3[match(m2,symbolS3$Input),"Approved.symbol"]
129+
names(m2M)<-m2
130+
131+
allMap[names(m1M)]<-m1M
132+
allMap[names(m2M)]<-m2M
133+
134+
135+
allMap[is.na(allMap)]<-names(allMap)[is.na(allMap)]
136+
return(allMap)
137+
}
111138

139+
112140
#PanCancerCoreFitnessGenes, Behan 2019:
113141
load(paste0(dir.Input,'/10_PANCANCER_coreFitness_genes.RData'))
114142
Behan<-PanCancerCoreFitnessGenes
115143
#load in the latest Traver Hart pan cancer genes:
116144
load(paste0(dir.Input,'/BAGEL_v2_ESSENTIAL_GENES.Rdata'))
117145

118146

119-
Recall_BehanD9<-length(intersect(EssGenes.DNA_REPLICATION_cons,Behan))/length(EssGenes.DNA_REPLICATION_cons)
120-
Recall_HartD9<-length(intersect(EssGenes.DNA_REPLICATION_cons,BAGEL_essential))/length(EssGenes.DNA_REPLICATION_cons)
121-
122-
123-
Recall_BehanK9<-length(intersect(EssGenes.KEGG_rna_polymerase,Behan))/length(EssGenes.KEGG_rna_polymerase)
124-
Recall_HartK9<-length(intersect(EssGenes.KEGG_rna_polymerase,BAGEL_essential))/length(EssGenes.KEGG_rna_polymerase)
125-
126-
127-
Recall_BehanP9<-length(intersect(EssGenes.PROTEASOME_cons,Behan))/length(EssGenes.PROTEASOME_cons)
128-
Recall_HartP9<-length(intersect(EssGenes.PROTEASOME_cons,BAGEL_essential))/length(EssGenes.PROTEASOME_cons)
129-
130-
Recall_BehanR9<-length(intersect(EssGenes.ribosomalProteins,Behan))/length(EssGenes.ribosomalProteins)
131-
Recall_HartR9<-length(intersect(EssGenes.ribosomalProteins,BAGEL_essential))/length(EssGenes.ribosomalProteins)
132-
133-
Recall_BehanS9<-length(intersect(EssGenes.SPLICEOSOME_cons,Behan))/length(EssGenes.SPLICEOSOME_cons)
134-
Recall_HartS9<-length(intersect(EssGenes.SPLICEOSOME_cons,BAGEL_essential))/length(EssGenes.SPLICEOSOME_cons)
135-
136-
#option 2 for common essentials, just use CERES:
147+
#CERES common essentials:
137148
allCEsC<-unique(c(CeresCE,PCcoreCERES))
138149

139150
CEmatrixC<-matrix(0,nrow=length(allCEsC),ncol=2)
@@ -161,118 +172,86 @@ Tier1<-as.character(TierCEC[which(TierCEC$tier=="Tier1"),1])
161172
#Tier1<-as.character(TierCE[,1])
162173
Tier2<-as.character(TierCEC[which(TierCEC$tier=="Tier1"|TierCEC$tier=="Tier2"),1])
163174

164-
Recall_IntCeresD9c<-length(intersect(EssGenes.DNA_REPLICATION_cons,Tier1))/length(EssGenes.DNA_REPLICATION_cons)
165-
Recall_IntCeresD9t2c<-length(intersect(EssGenes.DNA_REPLICATION_cons,Tier2))/length(EssGenes.DNA_REPLICATION_cons)
166-
167-
168-
Recall_IntCeresDc<-length(intersect(EssGenes.DNA_REPLICATION_cons,Tier1))/length(EssGenes.DNA_REPLICATION_cons)
169-
Recall_IntCeresDt2c<-length(intersect(EssGenes.DNA_REPLICATION_cons,Tier2))/length(EssGenes.DNA_REPLICATION_cons)
170-
171-
172-
Recall_IntCeresK9c<-length(intersect(EssGenes.KEGG_rna_polymerase,Tier1))/length(EssGenes.KEGG_rna_polymerase)
173-
Recall_IntCeresK9t2c<-length(intersect(EssGenes.KEGG_rna_polymerase,Tier2))/length(EssGenes.KEGG_rna_polymerase)
174-
175-
Recall_IntCeresP9c<-length(intersect(EssGenes.PROTEASOME_cons,Tier1))/length(EssGenes.PROTEASOME_cons)
176-
Recall_IntCeresP9t2c<-length(intersect(EssGenes.PROTEASOME_cons,Tier2))/length(EssGenes.PROTEASOME_cons)
177-
178-
Recall_IntCeresR9c<-length(intersect(EssGenes.ribosomalProteins,Tier1))/length(EssGenes.ribosomalProteins)
179-
Recall_IntCeresR9t2c<-length(intersect(EssGenes.ribosomalProteins,Tier2))/length(EssGenes.ribosomalProteins)
180-
181-
182-
Recall_IntCeresS9c<-length(intersect(EssGenes.SPLICEOSOME_cons,Tier1))/length(EssGenes.SPLICEOSOME_cons)
183-
Recall_IntCeresS9t2c<-length(intersect(EssGenes.SPLICEOSOME_cons,Tier2))/length(EssGenes.SPLICEOSOME_cons)
184-
185-
RecallDataC<-rbind(data.frame(deps=Recall_BehanS9,set="Spliceosome",data="Behan"),
186-
data.frame(deps=Recall_HartS9,set="Spliceosome",data="Hart"),
187-
data.frame(deps=Recall_IntCeresS9c,set="Spliceosome",data="Integrated Tier 1"),
188-
data.frame(deps=Recall_IntCeresS9t2c,set="Spliceosome",data="Integrated Tier 2"),
189-
190-
data.frame(deps=Recall_BehanR9,set="Ribosomal",data="Behan"),
191-
data.frame(deps=Recall_HartR9,set="Ribosomal",data="Hart"),
192-
data.frame(deps=Recall_IntCeresR9c,set="Ribosomal",data="Integrated Tier 1"),
193-
data.frame(deps=Recall_IntCeresR9t2c,set="Ribosomal",data="Integrated Tier 2"),
194-
195-
data.frame(deps=Recall_BehanP9,set="Proteasome",data="Behan"),
196-
data.frame(deps=Recall_HartP9,set="Proteasome",data="Hart"),
197-
data.frame(deps=Recall_IntCeresP9c,set="Proteasome",data="Integrated Tier 1"),
198-
data.frame(deps=Recall_IntCeresP9t2c,set="Proteasome",data="Integrated Tier 2"),
199-
200-
data.frame(deps=Recall_BehanK9,set="RNA_polymerase",data="Behan"),
201-
data.frame(deps=Recall_HartK9,set="RNA_polymerase",data="Hart"),
202-
data.frame(deps=Recall_IntCeresK9c,set="RNA_polymerase",data="Integrated Tier 1"),
203-
data.frame(deps=Recall_IntCeresK9t2c,set="RNA_polymerase",data="Integrated Tier 2"),
204-
205-
data.frame(deps=Recall_BehanD9,set="DNA_Replication",data="Behan"),
206-
data.frame(deps=Recall_HartD9,set="DNA_Replication",data="Hart"),
207-
data.frame(deps=Recall_IntCeresD9c,set="DNA_Replication",data="Integrated Tier 1"),
208-
data.frame(deps=Recall_IntCeresD9t2c,set="DNA_Replication",data="Integrated Tier 2"))
209-
210-
211-
RecallDataC$data<-factor(RecallDataC$data,levels=c("Integrated Tier 1","Integrated Tier 2","Behan","Hart"))
212-
Pcolours<-c("red","blue","pink","purple")
213-
names(Pcolours)<-c("Integrated Tier 1","Integrated Tier 2","Behan","Hart")
214-
RecallDataC$set<-factor(RecallDataC$set,levels=c("Proteasome","Ribosomal","RNA_polymerase","DNA_Replication","Spliceosome"))
215-
Recallplot<-ggplot(RecallDataC,aes(x=set,y=deps,fill=data))+geom_bar(stat="identity",position="dodge")+scale_fill_manual(values=Pcolours)+theme(axis.text.x=element_text(angle=45))+ylab("Number Genes")+xlab("")+theme_bw()+theme(legend.position = c(0.25,0.925),legend.background = element_blank(),legend.title=element_blank())
216-
217-
pdf(paste0(dir.Results,"/RecallKnownSets_IntegratedFig5_ConsensusVScurrent_CERES_PC1.pdf"))
175+
allMap<-getAllMap(allSymbol = allSymbol,allgenes)
176+
BehanM<-as.matrix(Behan,ncol=1)
177+
rownames(BehanM)<-Behan
178+
Behan<-names(updateRownames(BehanM,allMap))
179+
Bagel<-as.matrix(BAGEL_essential,ncol=1)
180+
rownames(Bagel)<-BAGEL_essential
181+
BAGELessential<-names(updateRownames(Bagel,allMap))
182+
183+
184+
RecallSpliceosome<-lapply(list(Behan=Behan,Hart=BAGELessential,Broad=BroadCE,Sanger=SangerCE,Int=Tier2),function(x) length(intersect(x,Kegg.Spliceosome))/length(Kegg.Spliceosome))
185+
RecallRibosome<-lapply(list(Behan=Behan,Hart=BAGELessential,Broad=BroadCE,Sanger=SangerCE,Int=Tier2),function(x) length(intersect(x,Kegg.Ribosome))/length(Kegg.Ribosome))
186+
RecallProteasome<-lapply(list(Behan=Behan,Hart=BAGELessential,Broad=BroadCE,Sanger=SangerCE,Int=Tier2),function(x) length(intersect(x,Kegg.Proteasome))/length(Kegg.Proteasome))
187+
RecallRNApoly<-lapply(list(Behan=Behan,Hart=BAGELessential,Broad=BroadCE,Sanger=SangerCE,Int=Tier2),function(x) length(intersect(x,Kegg.RNApoly))/length(Kegg.RNApoly))
188+
RecallDNARep<-lapply(list(Behan=Behan,Hart=BAGELessential,Broad=BroadCE,Sanger=SangerCE,Int=Tier2),function(x) length(intersect(x,Kegg.DNArep))/length(Kegg.DNArep))
189+
RecallHistones<-lapply(list(Behan=Behan,Hart=BAGELessential,Broad=BroadCE,Sanger=SangerCE,Int=Tier2),function(x) length(intersect(x,Histones))/length(Histones))
190+
191+
RecallDataC<-rbind(data.frame(deps=unlist(RecallSpliceosome),set="Spliceosome",data=unlist(names(RecallSpliceosome))),
192+
data.frame(deps=unlist(RecallRibosome),set="Ribosome",data=unlist(names(RecallRibosome))),
193+
data.frame(deps=unlist(RecallProteasome),set="Proteasome",data=unlist(names(RecallProteasome))),
194+
data.frame(deps=unlist(RecallRNApoly),set="RNA polymerase",data=unlist(names(RecallRNApoly))),
195+
data.frame(deps=unlist(RecallDNARep),set="DNA Replication",data=unlist(names(RecallDNARep))),
196+
data.frame(deps=unlist(RecallHistones),set="Histones",data=unlist(names(RecallHistones)))
197+
)
198+
199+
RecallDataC$data<-factor(RecallDataC$data,levels=c("Int","Broad","Sanger","Behan","Hart"))
200+
Pcolours<-c("blue","orange","lightblue","pink","purple")
201+
names(Pcolours)<-c("Int","Broad","Sanger","Behan","Hart")
202+
RecallDataC$set<-factor(RecallDataC$set,levels=c("Proteasome","Ribosome","RNA polymerase","DNA Replication","Spliceosome","Histones"))
203+
Recallplot<-ggplot(RecallDataC,aes(x=set,y=deps,fill=data))+geom_bar(stat="identity",position="dodge")+scale_fill_manual(values=Pcolours)+theme(axis.text.x=element_text(angle=45))+ylab("Recall")+xlab("")+theme_bw()+theme(legend.position = c(0.25,0.925),legend.background = element_blank(),legend.title=element_blank(),panel.border = element_blank(), panel.grid.major = element_blank(),
204+
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
205+
206+
pdf(paste0(dir.Results,"/RecallKnownSets_IntegratedFig5_ConsensusVScurrent_CERES_PC1_All.pdf"))
218207
print(Recallplot)
219208
dev.off()
220209

221-
222-
###load and run the normLRT recall of DEMETER genes to assess false positives.
223-
ssc<-read.csv(file=paste0(dir.Input,"/skewed_tdist.csv"),header=T,stringsAsFactors = F)
224-
SSC20<-ssc[ssc[,2]>=20,1]
225-
SSC50<-ssc[ssc[,2]>=50,1]
226-
SSC100<-ssc[ssc[,2]>=100,1]
227-
SSC200<-ssc[ssc[,2]>=200,1]
228-
229-
FDR_SC20<-c(length(intersect(SSC20,Tier1))/length(Tier1),
230-
length(intersect(SSC20,Tier2))/length(Tier2),
231-
length(intersect(SSC20,Behan))/length(Behan),
232-
length(intersect(SSC20,BAGEL_essential))/length(BAGEL_essential))
233-
FDR_SC50<-c(length(intersect(SSC50,Tier1))/length(Tier1),
234-
length(intersect(SSC50,Tier2))/length(Tier2),
235-
length(intersect(SSC50,Behan))/length(Behan),
236-
length(intersect(SSC50,BAGEL_essential))/length(BAGEL_essential))
237-
FDR_SC100<-c(length(intersect(SSC100,Tier1))/length(Tier1),
238-
length(intersect(SSC100,Tier2))/length(Tier2),
239-
length(intersect(SSC100,Behan))/length(Behan),
240-
length(intersect(SSC100,BAGEL_essential))/length(BAGEL_essential))
241-
FDR_SC200<-c(length(intersect(SSC200,Tier1))/length(Tier1),
242-
length(intersect(SSC200,Tier2))/length(Tier2),
243-
length(intersect(SSC200,Behan))/length(Behan),
244-
length(intersect(SSC200,BAGEL_essential))/length(BAGEL_essential))
245-
FDRs<-rbind(FDR_SC20,FDR_SC50,FDR_SC100,FDR_SC200)
246-
pdf(paste0(dir.Results,"/skewLRT_demeter_Tier_CERES+PC1.pdf"))
247-
barplot(t(FDRs),beside = TRUE,col=Pcolours,border=FALSE,ylab='Estimated FDR',xlab = 'Log-likelihood of skew-t distribution',names.arg = c(20,50,100,200))
210+
#load gene expression data to get negative controls
211+
load(paste0(dir.Input,"/CCLEexpression.RData"))
212+
allMap<-getAllMap(allSymbol,rownames(CCLEexpression))
213+
CCLEexpression<-updateRownames(CCLEexpression,allMap)
214+
NotExpr<-ADAM2.PercentileCF(1/CCLEexpression,display=FALSE)$cfgenes
215+
NotExpr<-as.matrix(NotExpr,ncol=1)
216+
rownames(NotExpr)<-NotExpr
217+
allMap<-getAllMap(allSymbol,NotExpr[,1])
218+
NotExpr<-names(updateRownames(NotExpr,allMap))
219+
NotExpr<-setdiff(NotExpr,allRefEss)
220+
221+
FDR_NotExpr<-c(
222+
length(intersect(NotExpr,Tier2))/length(Tier2),
223+
length(intersect(NotExpr,Behan))/length(Behan),
224+
length(intersect(NotExpr,BAGELessential))/length(BAGELessential),
225+
length(intersect(NotExpr,BroadCE))/length(BroadCE),
226+
length(intersect(NotExpr,SangerCE))/length(SangerCE))
227+
FDRdata<-data.frame(FDR=FDR_NotExpr,Set=c("Int","Behan","Hart","Broad","Sanger"))
228+
FDRdata$Set<-factor(FDRdata$Set,levels=c("Hart","Int","Broad","Behan","Sanger"))
229+
Pcolours<-c("red","blue","orange","lightblue","pink","purple")
230+
names(Pcolours)<-c("Tier1","Int","Broad","Sanger","Behan","Hart")
231+
FDRplot<-ggplot(FDRdata,aes(x=Set,y=FDR,fill=Set))+geom_bar(stat="identity",position="dodge")+scale_fill_manual(values=Pcolours)+theme(axis.text.x=element_text(angle=45))+ylab("Estimated FDR")+xlab("")+theme_bw()+theme(legend.position = c(0.25,0.925),legend.background = element_blank(),legend.title=element_blank())
232+
233+
234+
pdf(paste0(dir.Results,"/FDR_IntegratedFig5_CERES_PC1_notExpr.pdf"))
235+
print(FDRplot)
248236
dev.off()
249237

250-
allPriorSets<-c(EssGenes.DNA_REPLICATION_cons,EssGenes.KEGG_rna_polymerase,EssGenes.PROTEASOME_cons,EssGenes.ribosomalProteins,EssGenes.SPLICEOSOME_cons)
251-
allPriorSets<-unique(allPriorSets)
252-
253-
PrecisionT1<-length(intersect(Tier1,allPriorSets))/length(Tier1)
254-
PrecisionT2<-length(intersect(Tier2,allPriorSets))/length(Tier2)
255-
PrecisionBehan<-length(intersect(Behan,allPriorSets))/length(Behan)
256-
PrecisionHart<-length(intersect(BAGEL_essential,allPriorSets))/length(BAGEL_essential)
257-
258-
PrecisionData<-data.frame(c("Tier1"=PrecisionT1,"Tier2"=PrecisionT2,"Behan"=PrecisionBehan,"Hart"=PrecisionHart))
259-
PrecisionData$set<-rownames(PrecisionData)
260-
colnames(PrecisionData)<-c("Precision","Set")
261-
Pcolours<-c("red","blue","pink","purple")
262-
names(Pcolours)<-c("Tier1","Tier2","Behan","Hart")
263-
Precisionplot<-ggplot(PrecisionData,aes(x=Set,y=Precision,fill=Set))+geom_bar(stat="identity",position="dodge")+scale_fill_manual(values=Pcolours)+theme(axis.text.x=element_text(angle=45))+ylab("Precision")+xlab("")+theme_bw()+theme(legend.position = c(0.25,0.925),legend.background = element_blank(),legend.title=element_blank())
264-
265-
pdf(paste0(dir.Results,"/PrecisionKnownSets_IntegratedFig5_CERES_PC1.pdf"))
266-
print(Precisionplot)
267-
dev.off()
238+
239+
240+
###Comparison of biomarker analysis###
268241

269242
load(paste0(dir.Input,'/MoBEM.RData'))
270243

271244

272245

273246
CCR_sanger<-SangerData
274247
CCR_broad<-BroadData
248+
dn<-dimnames(CCR_sanger)
249+
CCR_sanger<-normalize.quantiles(CCR_sanger)
250+
dimnames(CCR_sanger)<-dn
275251

252+
dn<-dimnames(CCR_broad)
253+
CCR_broad<-normalize.quantiles(CCR_broad)
254+
dimnames(CCR_broad)<-dn
276255
#load the relevant Binary matrices and show how many more genes are depleted at least once in integrated versus
277256
#individual data sets
278257

@@ -301,8 +280,7 @@ CCR_allCosmic<-CCR_corrected
301280

302281
colnames(CCR_allCosmic)<-cmp[match(colnames(CCR_allCosmic),cmp$model_id),"COSMIC_ID"]
303282

304-
CCRint_Broad<-CCR_corrected[,grep("ACH",colnames(CCR_corrected))]
305-
CCRint_Sanger<-CCR_corrected[,grep("SIDM",colnames(CCR_corrected))]
283+
306284
#filter by highest normLRT genes
307285

308286
n1<-names(normLRTCCR1[[3]])[normLRTCCR1[[3]]>200]
@@ -471,9 +449,9 @@ length(Newboth)
471449
pdf(paste0(dir.Results,"/ExampleNewBiomarkersPC1.pdf"),width=6.5,height=2.25)
472450
par(mfrow=c(1,4))
473451
par(mar=c(0.3,2,0.5,0.1)+0.1,mgp=c(1.25,0.5,0))
474-
plotAssociationExamplesCP(CCR_sanger,CCR_allCosmic,16,MoBEM,cmp,SangerNew)
452+
plotAssociationExamplesCP(CCR_sanger,CCR_allCosmic,20,MoBEM,cmp,SangerNew)
475453

476-
plotAssociationExamplesCP(CCR_broad,CCR_allCosmic,52,MoBEM,cmp,BroadNew)
454+
plotAssociationExamplesCP(CCR_broad,CCR_allCosmic,60,MoBEM,cmp,BroadNew)
477455
dev.off()
478456

479457
ccrS[["Lung"]]["TP53_mut-MDM2-Lung",]

0 commit comments

Comments
 (0)