-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathpreVIP_new_Z.R
More file actions
139 lines (88 loc) · 4.85 KB
/
preVIP_new_Z.R
File metadata and controls
139 lines (88 loc) · 4.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
### The File takes 218 samples ###############################################################################
#### This file does Prefiltering also based on those genes which discriminate PvsZ and ARE releated to survival
##### This script CLUSTERS Periphery Cells ON THE PvsZ signature and PFS survival ###########################
library('affy')
library('xlsx')
library('limma')
library('survival')
rm(list =ls())
## load data ##
load("/home/abidata/Dinis/VIP/Main.Data/ExpressionConsole.normalised.RData")
## The phenoData
tab <- read.xlsx(file = '/home/abidata/Dinis/VIP/Sample.Info/15-07-14 VIP Sample Collection PvsZ DEG.xlsx', sheetIndex =1)
list.patients <- tab[,2]
### Only those patients who have a NON NA is PFS
pheno <- pData(eset.ec.norm)
pheno.ready <- pheno[!is.na(pheno$PFS),]
list.patients.final <- list.patients[(list.patients %in% pheno.ready[,3])]
## Include ONLY those features that have correspoding annotation ~ 27148 out of 70000
## Include ONLY those Samples which HAVE PFS and PvsZ Annotation
exprs <- eset.ec.norm[featureNames(eset.ec.norm) %in% rownames(anno.GENENAME),sampleNames(eset.ec.norm) %in% list.patients.final]
#############################################################################################################################
######## SIGNATURE WAS IDENTIFIED WITH THOSE FEATURES WHICH DISTINGUISHED PvsZ Clustering AND HAD SURvival Information ######
#############################################################################################################################
## LIMMA PREFILTERING BASED ON PvsZ #################################################
pheno <-pData(exprs)
pheno$Case <- as.factor(as.matrix(pheno$Case))
pheno$Topo <- as.factor(as.matrix(pheno$Topo))
pheno$Class <- as.factor(as.matrix(pheno$Class))
mm <- model.matrix( ~ 0 + Topo + Case + Class , pheno)
cc <- colnames(mm)
cc <- gsub("_","", cc)
cc <- gsub("-","", cc)
colnames(mm) <- cc
cont.matrix <- makeContrasts(ZvsP = Topocenter - Topoperiphery, levels= mm)
fit <- lmFit(exprs, design = mm)
fit2 <- contrasts.fit(fit, cont.matrix)
fit3 <- eBayes(fit2)
DEG_table <- topTable(fit3, adjust="BH", coef='ZvsP', number = Inf)
list <- rownames(DEG_table)[DEG_table$adj.P.Val < 0.05]
########### Fitting Univariate Cox's Models ######################################################
Xsig.ready <- as.data.frame(t(exprs(exprs[featureNames(exprs) %in% list,])))
### Check if there is batch effect in the signature without time ######################
v <- Xsig.ready
labs.v <- pData(exprs)$Topo
pc <- prcomp(v)
pc.pred <- predict(pc,newdata = v)
plot(pc.pred[,1], pc.pred[,2], pch = 19,col = c("red","green")[as.factor(labs.v)])
######## Getting survival times (PFS) and status #####################
Time <- (as.numeric(as.matrix(pheno$PFS)))
status <- rep(1, dim(pheno)[1]) # 1 because the relapse always occurs
surv.obj <- Surv(Time, status)
##### FITTING A UNIVARIATE COX REGRESSION MODEL ################################
pvalue.sig <- c(0)
pvalue.adj <- c(0)
for ( i in 1:ncol(Xsig.ready)){
q <- unlist(summary(coxph(surv.obj ~ Xsig.ready[,i], data = Xsig.ready)))
pvalue.sig[i] <- q$logtest.pvalue
}
pvalue.adj <- p.adjust(pvalue.sig, method = "fdr")
list.final <- colnames(Xsig.ready)[(pvalue.adj < 0.2)]
##### Check if there is batch effect now ####
Xsig.ready <- as.data.frame(t(exprs(exprs[featureNames(exprs) %in% list.final,])))
### Check if there is batch effect in the signature without time ######################
v <- Xsig.ready
labs.v <- pData(exprs)$Topo
pc <- prcomp(v)
pc.pred <- predict(pc,newdata = v)
plot(pc.pred[,1], pc.pred[,2], pch = 19,col = c("red","green")[as.factor(labs.v)])
##############################################################################################
###############################################################################################
###### OUR SIGNATURE ###########################################################################
signature <- list.final
############# Let's Compare Our Signature with the Master List to see if we have any hits ##############
master <- read.xlsx(file = '/home/abidata/Dinis/VIP/Main.Data/Master_File_FINAL.xlsx', sheetIndex =1)
signature %in% master[,2]
##########################################
exprs.final <- exprs[featureNames(exprs) %in% signature,]
annot <- anno.GENENAME[anno.GENENAME[,1] %in% signature,]
featureNames(exprs.final) == annot[,1]
###################################################
#### Prepare the data set for clustering ##########
Y <- t(exprs(exprs.final[,pData(exprs.final)$Topo == 'center']))
colnames(Y) <- annot[,3]
time <- as.numeric(as.matrix(pData(exprs.final[,pData(exprs.final)$Topo == 'center'])$PFS))
censoring <- rep(1, nrow(Y))
Y.info <- pData(exprs.final[,pData(exprs.final)$Topo == 'center'])
relev <- list('Y.train' =Y, 'time.train' = time,'censoring.train'= censoring, 'Y.info' = Y.info)
save(relev, file = 'DataVIPOWNsignatureZ.RData')