|
| 1 | +# GSE112792; we're working with log2 transformed values so we skip the weighted regression steps |
| 2 | + |
| 3 | +library(gemma.R) |
| 4 | +library(SummarizedExperiment) |
| 5 | +library(limma) |
| 6 | +library(dplyr) |
| 7 | +dat <- getDataset("GSE112792") |
| 8 | + |
| 9 | +e = assay(dat) |
| 10 | +gt = factor(colData(dat)$genotype) |
| 11 | + |
| 12 | +# some basic data filtering, not exactly the same as in Gemma but close |
| 13 | +e<-e[apply(e, 1, var) > 1e-5,] |
| 14 | +e<-e[apply(e, 1, function(x) { |
| 15 | + a<-length(unique(x))>5; |
| 16 | +}),] |
| 17 | + |
| 18 | + |
| 19 | +# test code for interpolation |
| 20 | +x = c(9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0) |
| 21 | +y = c(1.0, 0.25, 0.1111111111111111, 0.0625, 0.04, 0.027777777777777776, 0.02040816326530612, 0.015625, 0.012345679012345678, 0.01) |
| 22 | +interpolate = c(9.5, 8.5, 7.5, 6.5, 5.5, 4.5, 3.5, 2.5, 1.5, 0.5) |
| 23 | +f<-approxfun(x,y,rule=2) |
| 24 | +f(c(interpolate)) |
| 25 | + |
| 26 | + |
| 27 | + |
| 28 | +f<-lmFit(e, model.matrix(~gt)) |
| 29 | +f<-eBayes(f) |
| 30 | +write.table(f$sigma^2, "/Users/pzoot/test-vars.txt", quote=F, sep="\t") |
| 31 | +limma::fitFDist(f$sigma^2, f$df.residual) |
| 32 | + |
| 33 | +sv<-limma::squeezeVar(f$sigma^2, 6) |
| 34 | +write.table(sv$var.post, "/Users/pzoot/test-var.post.txt", quote=F, sep='\t') |
| 35 | + |
| 36 | +# raw count data from RNA seq pipeline, not via gemma |
| 37 | +counts<-read.delim("./GSE112792_counts.genes", row.names=1, header=T) |
| 38 | +# not quite equivalent to Gemma filters |
| 39 | +counts<-counts[apply(counts,1,var)> 1e-5,] |
| 40 | +counts<-counts[apply(counts, 1, function(x) { |
| 41 | + a<-length(unique(x))>5; |
| 42 | +}),] |
| 43 | + |
| 44 | +# fixme: sample and row order might not be right ...can work out from library sizes |
| 45 | +vv<-voom(as.matrix(counts), model.matrix(~gt), plot=T) |
| 46 | +gmv<-read.delim("mv-1885318782381326010.txt", header=F, sep='\t') |
| 47 | +glo<-read.delim("loess-fit-943568072532947709.txt", header=F, sep='\t') |
| 48 | +plot(gmv, pch='.', ylim=c(0,5)) |
| 49 | +lines(glo, col='red') |
| 50 | + |
| 51 | +td<-read.delim("lmtest11.dat.txt", header=T, row.names=1, sep='\t') |
| 52 | +tdes<-read.delim("lmtest11.des.txt", header=T, row.names=1, sep='\t') |
| 53 | +des<-model.matrix(~tdes$targets.TreatmentDHT); |
| 54 | +lib.size<-colSums(td) |
| 55 | +tdlogcpm<-t(log2(t(td+0.5)/(lib.size+1)*1e6)) |
| 56 | +Amean<-rowMeans(tdlogcpm) |
| 57 | +sx <- Amean+mean(log2(lib.size+1))-log2(1e6) |
| 58 | +fit <- lmFit(tdlogcpm, des) |
| 59 | +sy <- sqrt(fit$sigma) |
| 60 | +l <- lowess(sx,sy,f=0.5) |
| 61 | +write.table(l, "lmtest11.lowess.txt", quote=F, sep='\t') |
| 62 | +write.table(tdlogcpm, "lmtest11.log2cpm.txt", quote=F, sep='\t') |
| 63 | +tv<-voom(as.matrix(td), des, plot=T) |
| 64 | +write.table(tv$weights, "lmtest11.voomweights.txt", sep='\t', quote=F) |
| 65 | +fitted.values <- fit$coef %*% t(fit$design) |
| 66 | +fitted.cpm <- 2^fitted.values |
| 67 | +fitted.count <- 1e-6 * t(t(fitted.cpm)*(lib.size+1)) |
| 68 | +fitted.logcount <- log2(fitted.count) |
| 69 | +f <- approxfun(l, rule=2) |
| 70 | +w <- 1/f(fitted.logcount)^4 |
| 71 | +dim(w) <- dim(fitted.logcount) |
| 72 | + |
0 commit comments