Skip to content

Commit b488df4

Browse files
authored
Add files via upload
1 parent 01119c2 commit b488df4

1 file changed

Lines changed: 275 additions & 0 deletions

File tree

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
# Analysis of generation interval with Neanderthal variants masked
2+
3+
tgp_nea <- read.table("neanderthal_masked/TGP_10kgacounts.txt")
4+
5+
names(tgp_nea) <- count_header
6+
tgp_nea <- cbind(tgp_nea[,mut_classes] + tgp_nea[,complementary_classes],
7+
CpG = tgp_nea$CpG, bin_age = tgp_nea$bin_age)
8+
9+
tgp_nea10kgaplots <- make_plotframes(phasedstrictCT_model, tgp_nea, TGPphased_boot10kga_ests)
10+
plotframe_noNea <- tgp_nea10kgaplots[[1]]
11+
plotmean_noNea <- tgp_nea10kgaplots[[2]]
12+
13+
tgp_10kga <- read.table("TGP_10kgacounts.txt")
14+
names(tgp_10kga) <- count_header
15+
tgp_10kga <- cbind(tgp_10kga[,mut_classes] + tgp_10kga[,complementary_classes],
16+
CpG = tgp_10kga$CpG, bin_age = tgp_10kga$bin_age)
17+
18+
tgp_phased10kgaplots <- make_plotframes(phasedstrictCT_model, tgp_10kga, TGPphased_boot10kga_ests)
19+
plotframe <- tgp_phased10kgaplots[[1]]
20+
plotmean <- tgp_phased10kgaplots[[2]]
21+
22+
ggplot(plotframe_noNea, aes(y = gen_time, x = bin_age, col = sex)) +
23+
geom_point(size = 2) +
24+
geom_smooth(data = plotmean_noNea, aes(y = sexave, x = bin_age, col = 'mean'), lwd = 1.2,
25+
formula = y ~ x, method = "loess", span = 0.65, se = FALSE, color = 'darkgray') +
26+
geom_smooth(data = plotmean, aes(y = sexave, x = bin_age, col = 'mean'), lwd = 1.2, lty = 2,
27+
formula = y ~ x, method = "loess", span = 0.65, se = FALSE, color = 'black') +
28+
geom_ribbon(data = plotmean, aes(x = bin_age, y = sexave, col = 'mean',
29+
ymin = sexave_lower, ymax = sexave_upper),
30+
alpha = 0.15, color = NA) +
31+
coord_cartesian(ylim = c(13.5,38.5)) + scale_color_manual(values = rev(pal_aaas("default")(2))) +
32+
axis_formatting + annotation_logticks(sides = "b") +
33+
scale_x_log10(minor_breaks = c(1:9 * 100, 1:9 * 1000)) +
34+
labs(y = "generation interval (years)", x = "allele age (generations ago)", col = "sex")
35+
36+
##
37+
38+
for(pop in pops) {
39+
load(paste("bootstraps/phased_10kgaboot_ests", pop, ".RData", sep = ""))
40+
}
41+
42+
tgp_nea <- read.table("neanderthal_masked/TGP_10kgacounts.txt")
43+
44+
names(tgp_nea) <- count_header
45+
tgp_nea <- cbind(tgp_nea[,mut_classes] + tgp_nea[,complementary_classes],
46+
CpG = tgp_nea$CpG, bin_age = tgp_nea$bin_age)
47+
48+
49+
for(pop in pops) {
50+
pop_nea <- read.table(paste("neanderthal_masked/", pop, "_10kgacounts.txt", sep = ""))
51+
names(pop_nea) <- count_header
52+
pop_nea <- cbind(pop_nea[,mut_classes] + pop_nea[,complementary_classes],
53+
CpG = pop_nea$CpG, bin_age = pop_nea$bin_age)
54+
55+
assign(paste(pop, "noNea", sep = "_"), pop_nea)
56+
assign(paste(pop, "noNeaphasedplotframes10kga", sep = "_"),
57+
make_plotframes(phasedstrictCT_model, get(paste(pop, "noNea", sep = "_")),
58+
get(paste(pop, "phased_10kgaboot_ests", sep = ""))))
59+
}
60+
61+
popsframe_10kga_noNea <-
62+
data.frame(mean_interval = unlist(lapply(pops,
63+
function(pop) { get(paste(pop, "noNeaphasedplotframes10kga", sep = "_"))[[2]]$sexave })),
64+
bin_age = unlist(lapply(pops,
65+
function(pop) { get(paste(pop, "noNeaphasedplotframes10kga", sep = "_"))[[2]]$bin_age })),
66+
sexdiff = unlist(lapply(pops,
67+
function(pop) { get(paste(pop, "noNeaphasedplotframes10kga", sep = "_"))[[2]]$sexdiff })),
68+
sexave_upper = unlist(lapply(pops,
69+
function(pop) { get(paste(pop, "noNeaphasedplotframes10kga", sep = "_"))[[2]]$sexave_upper })),
70+
sexave_lower = unlist(lapply(pops,
71+
function(pop) { get(paste(pop, "noNeaphasedplotframes10kga", sep = "_"))[[2]]$sexave_lower })),
72+
sexdiff_upper = unlist(lapply(pops,
73+
function(pop) { get(paste(pop, "noNeaphasedplotframes10kga", sep = "_"))[[2]]$sexdiff_upper })),
74+
sexdiff_lower = unlist(lapply(pops,
75+
function(pop) { get(paste(pop, "noNeaphasedplotframes10kga", sep = "_"))[[2]]$sexdiff_lower })),
76+
pop = rep(pops, each = 100))
77+
78+
loEAS10_noNea <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_10kga_noNea, pop == "EAS"), span = 0.65)
79+
loEUR10_noNea <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_10kga_noNea, pop == "EUR"), span = 0.65)
80+
loSAS10_noNea <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_10kga_noNea, pop == "SAS"), span = 0.65)
81+
82+
predx_noNea <- 10**seq(max(loEAS10_noNea$x[1], loEUR10_noNea$x[1], loSAS10_noNea$x[1]),
83+
min(loEAS10_noNea$x[100], loEUR10_noNea$x[100], loSAS10_noNea$x[100]), .01)
84+
85+
predy_noNea <- (predict(loEAS10_noNea, data.frame(bin_age = predx)) +
86+
predict(loEUR10_noNea, data.frame(bin_age = predx)) +
87+
predict(loSAS10_noNea, data.frame(bin_age = predx))) / 3
88+
89+
ggplot(subset(popsframe_10kga_noNea, pop == "AFR"), aes(x = bin_age, y = mean_interval, col = pop)) +
90+
geom_smooth(method = "loess", span = 0.65, se = FALSE, lwd = 1.1) +
91+
geom_line(data = data.frame(mean_interval = predy_noNea, bin_age = predx_noNea), col = 'black', lwd = 1.1) +
92+
coord_cartesian(ylim = c(13.5,38.5)) + axis_formatting + annotation_logticks(sides = "b") +
93+
scale_x_log10(minor_breaks = c(1:9 * 100, 1:9 * 1000)) +
94+
labs(y = "generation interval (years)", x = "allele age (generations ago)")
95+
96+
for(pop in pops) {
97+
pop_10kga <- read.table(paste(pop, "_10kgacounts.txt", sep = ""))
98+
names(pop_10kga) <- count_header
99+
pop_10kga <- cbind(pop_10kga[,mut_classes] + pop_10kga[,complementary_classes], CpG = pop_10kga$CpG, bin_age = pop_10kga$bin_age)
100+
101+
oout <- apply(t(t(pop_10kga[,1:6] / rowSums(pop_10kga[,1:6])) - youngspecdiff), MARGIN = 1,
102+
predict_parentalages, model = strictCT_model)
103+
pop_10kga$mgen_time <- sapply(oout, "[[", 1)[1,]
104+
pop_10kga$fgen_time <- sapply(oout, "[[", 1)[2,]
105+
pop_10kga$ss_mean <- (pop_10kga$mgen_time + pop_10kga$fgen_time) / 2
106+
107+
assign(paste(pop, "10kga", sep = "_"), pop_10kga)
108+
assign(paste(pop, "phasedplotframes10kga", sep = "_"),
109+
make_plotframes(phasedstrictCT_model, get(paste(pop, "10kga", sep = "_")),
110+
get(paste(pop, "phased_10kgaboot_ests", sep = ""))))
111+
112+
}
113+
114+
popsframe_10kga <-
115+
data.frame(mean_interval = unlist(lapply(pops,
116+
function(pop) { get(paste(pop, "phasedplotframes10kga", sep = "_"))[[2]]$sexave })),
117+
bin_age = unlist(lapply(pops,
118+
function(pop) { get(paste(pop, "phasedplotframes10kga", sep = "_"))[[2]]$bin_age })),
119+
sexdiff = unlist(lapply(pops,
120+
function(pop) { get(paste(pop, "phasedplotframes10kga", sep = "_"))[[2]]$sexdiff })),
121+
sexave_upper = unlist(lapply(pops,
122+
function(pop) { get(paste(pop, "phasedplotframes10kga", sep = "_"))[[2]]$sexave_upper })),
123+
sexave_lower = unlist(lapply(pops,
124+
function(pop) { get(paste(pop, "phasedplotframes10kga", sep = "_"))[[2]]$sexave_lower })),
125+
sexdiff_upper = unlist(lapply(pops,
126+
function(pop) { get(paste(pop, "phasedplotframes10kga", sep = "_"))[[2]]$sexdiff_upper })),
127+
sexdiff_lower = unlist(lapply(pops,
128+
function(pop) { get(paste(pop, "phasedplotframes10kga", sep = "_"))[[2]]$sexdiff_lower })),
129+
pop = rep(pops, each = 100))
130+
131+
loEAS10 <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_10kga, pop == "EAS"), span = 0.65)
132+
loEUR10 <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_10kga, pop == "EUR"), span = 0.65)
133+
loSAS10 <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_10kga, pop == "SAS"), span = 0.65)
134+
135+
predx <- 10**seq(max(loEAS10$x[1], loEUR10$x[1], loSAS10$x[1]),
136+
min(loEAS10$x[100], loEUR10$x[100], loSAS10$x[100]), .01)
137+
138+
predy <- (predict(loEAS10, data.frame(bin_age = predx)) +
139+
predict(loEUR10, data.frame(bin_age = predx)) +
140+
predict(loSAS10, data.frame(bin_age = predx))) / 3
141+
142+
loEAS10upperCI <- loess(sexave_upper ~ log10(bin_age), data = subset(popsframe_10kga, pop == "EAS"), span = 0.65)
143+
loEAS10lowerCI <- loess(sexave_lower ~ log10(bin_age), data = subset(popsframe_10kga, pop == "EAS"), span = 0.65)
144+
145+
loEUR10upperCI <- loess(sexave_upper ~ log10(bin_age), data = subset(popsframe_10kga, pop == "EUR"), span = 0.65)
146+
loEUR10lowerCI <- loess(sexave_lower ~ log10(bin_age), data = subset(popsframe_10kga, pop == "EUR"), span = 0.65)
147+
148+
loSAS10upperCI <- loess(sexave_upper ~ log10(bin_age), data = subset(popsframe_10kga, pop == "SAS"), span = 0.65)
149+
loSAS10lowerCI <- loess(sexave_lower ~ log10(bin_age), data = subset(popsframe_10kga, pop == "SAS"), span = 0.65)
150+
151+
loupperCI <- (predict(loEAS10upperCI, data.frame(bin_age = predx)) +
152+
predict(loEUR10upperCI, data.frame(bin_age = predx))+
153+
predict(loSAS10upperCI, data.frame(bin_age = predx))) / 3
154+
155+
lolowerCI <- (predict(loEAS10lowerCI, data.frame(bin_age = predx)) +
156+
predict(loEUR10lowerCI, data.frame(bin_age = predx))+
157+
predict(loSAS10lowerCI, data.frame(bin_age = predx))) / 3
158+
159+
160+
ggplot(subset(popsframe_10kga, pop == "AFR"), aes(x = bin_age, y = mean_interval, col = pop)) +
161+
geom_line(data = data.frame(mean_interval = predy, bin_age = predx), col = 'black', lwd = 1.1,
162+
lty = 2) +
163+
geom_line(data = data.frame(mean_interval = predy_noNea, bin_age = predx_noNea), col = 'black',
164+
lwd = 1.1) +
165+
geom_ribbon(data = subset(popsframe_10kga, pop == "AFR"),
166+
aes(ymin = sexave_lower, ymax = sexave_upper, fill = pop), color = NA, alpha = 0.1) +
167+
geom_ribbon(data = data.frame(mean_interval = 0,
168+
sexave_lower = lolowerCI, sexave_upper = loupperCI, bin_age = predx),
169+
aes(ymin = sexave_lower, ymax = sexave_upper), color = NA, alpha = 0.1) +
170+
geom_smooth(method = "loess", span = 0.65, se = FALSE, lty = 2, lwd = 1.1) +
171+
geom_smooth(method = "loess", span = 0.65,
172+
data = subset(popsframe_10kga_noNea, pop == "AFR"), se = FALSE, lwd = 1.1) +
173+
coord_cartesian(ylim = c(13.5,38.5)) + axis_formatting + annotation_logticks(sides = "b") +
174+
scale_x_log10(minor_breaks = c(1:9 * 100, 1:9 * 1000)) +
175+
labs(y = "generation interval (years)", x = "allele age (generations ago)")
176+
177+
###
178+
179+
for(pop in pops) {
180+
load(paste("bootstraps/phased_allboot_ests", pop, ".RData", sep = ""))
181+
182+
pop_all <- read.table(paste(pop, "_allcounts.txt", sep = ""))
183+
names(pop_all) <- count_header
184+
pop_all <- cbind(pop_all[,mut_classes] + pop_all[,complementary_classes], CpG = pop_all$CpG, bin_age = pop_all$bin_age)
185+
186+
oout <- apply(t(t(pop_all[,1:6] / rowSums(pop_all[,1:6])) - youngspecdiff), MARGIN = 1,
187+
predict_parentalages, model = strictCT_model)
188+
pop_all$mgen_time <- sapply(oout, "[[", 1)[1,]
189+
pop_all$fgen_time <- sapply(oout, "[[", 1)[2,]
190+
pop_all$ss_mean <- (pop_all$mgen_time + pop_all$fgen_time) / 2
191+
192+
assign(paste(pop, "allcounts", sep = "_"), pop_all)
193+
assign(paste(pop, "phasedplotframes", sep = "_"),
194+
make_plotframes(phasedstrictCT_model, get(paste(pop, "allcounts", sep = "_")),
195+
get(paste(pop, "phased_allboot_ests", sep = ""))))
196+
}
197+
198+
popsframe_all <-
199+
data.frame(mean_interval = unlist(lapply(pops,
200+
function(pop) { get(paste(pop, "phasedplotframes", sep = "_"))[[2]]$sexave })),
201+
bin_age = unlist(lapply(pops,
202+
function(pop) { get(paste(pop, "phasedplotframes", sep = "_"))[[2]]$bin_age })),
203+
sexdiff = unlist(lapply(pops,
204+
function(pop) { get(paste(pop, "phasedplotframes", sep = "_"))[[2]]$sexdiff })),
205+
sexave_upper = unlist(lapply(pops,
206+
function(pop) { get(paste(pop, "phasedplotframes", sep = "_"))[[2]]$sexave_upper })),
207+
sexave_lower = unlist(lapply(pops,
208+
function(pop) { get(paste(pop, "phasedplotframes", sep = "_"))[[2]]$sexave_lower })),
209+
pop = rep(pops, each = 100))
210+
211+
ggplot(popsframe_all, aes(x = bin_age, y = mean_interval, col = pop)) +
212+
geom_smooth(method = "loess", span = 0.65, se = FALSE, lwd = 1.1) +
213+
geom_ribbon(aes(ymin = sexave_lower, ymax = sexave_upper, fill = pop), color = NA, alpha = 0.1) +
214+
axis_formatting + coord_cartesian(ylim = c(13.5, 38.5), xlim = c(100,80000)) + annotation_logticks(sides = "b") +
215+
scale_x_log10(minor_breaks = c(1:9 * 100, 1:9 * 1000)) +
216+
labs(y = "generation interval (years)", x = "allele age (generations ago)")
217+
218+
loEASall <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_all, pop == "EAS"), span = 0.65)
219+
loEURall <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_all, pop == "EUR"), span = 0.65)
220+
loSASall <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_all, pop == "SAS"), span = 0.65)
221+
222+
predxall <- 10**seq(max(loEASall$x[1], loEURall$x[1], loSASall$x[1]),
223+
min(loEASall$x[100], loEURall$x[100], loSASall$x[100]), .01)
224+
225+
predyall <- (predict(loEASall, data.frame(bin_age = predxall)) +
226+
predict(loEURall, data.frame(bin_age = predxall)) +
227+
predict(loSASall, data.frame(bin_age = predxall))) / 3
228+
229+
for(pop in pops) {
230+
pop_nea <- read.table(paste("neanderthal_masked/", pop, "_noNea.allcounts.txt", sep = ""))
231+
names(pop_nea) <- count_header
232+
pop_nea <- cbind(pop_nea[,mut_classes] + pop_nea[,complementary_classes],
233+
CpG = pop_nea$CpG, bin_age = pop_nea$bin_age)
234+
235+
assign(paste(pop, "neaAll", sep = "_"), pop_nea)
236+
assign(paste(pop, "noNeaphasedplotframesall", sep = "_"),
237+
make_plotframes(phasedstrictCT_model, get(paste(pop, "neaAll", sep = "_")),
238+
get(paste(pop, "phased_allboot_ests", sep = ""))))
239+
}
240+
241+
popsframe_all_noNea <-
242+
data.frame(mean_interval = unlist(lapply(pops,
243+
function(pop) { get(paste(pop, "noNeaphasedplotframesall", sep = "_"))[[2]]$sexave })),
244+
bin_age = unlist(lapply(pops,
245+
function(pop) { get(paste(pop, "noNeaphasedplotframesall", sep = "_"))[[2]]$bin_age })),
246+
sexdiff = unlist(lapply(pops,
247+
function(pop) { get(paste(pop, "noNeaphasedplotframesall", sep = "_"))[[2]]$sexdiff })),
248+
sexave_upper = unlist(lapply(pops,
249+
function(pop) { get(paste(pop, "noNeaphasedplotframesall", sep = "_"))[[2]]$sexave_upper })),
250+
sexave_lower = unlist(lapply(pops,
251+
function(pop) { get(paste(pop, "noNeaphasedplotframesall", sep = "_"))[[2]]$sexave_lower })),
252+
pop = rep(pops, each = 100))
253+
254+
loEASall_noNea <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_all_noNea, pop == "EAS"), span = 0.65)
255+
loEURall_noNea <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_all_noNea, pop == "EUR"), span = 0.65)
256+
loSASall_noNea <- loess(mean_interval ~ log10(bin_age), data = subset(popsframe_all_noNea, pop == "SAS"), span = 0.65)
257+
258+
predxall_noNea <- 10**seq(max(loEASall_noNea$x[1], loEURall_noNea$x[1], loSASall_noNea$x[1]),
259+
min(loEASall_noNea$x[100], loEURall_noNea$x[100], loSASall_noNea$x[100]), .01)
260+
261+
predyall_noNea <- (predict(loEASall_noNea, data.frame(bin_age = predxall_noNea)) +
262+
predict(loEURall_noNea, data.frame(bin_age = predxall_noNea)) +
263+
predict(loSASall_noNea, data.frame(bin_age = predxall_noNea))) / 3
264+
265+
ggplot(subset(popsframe_all, pop == "AFR"), aes(x = bin_age, y = mean_interval, col = pop)) +
266+
geom_smooth(method = "loess", span = 0.65, se = FALSE, lwd = 1.1) +
267+
geom_smooth(method = "loess", span = 0.65, se = FALSE, lwd = 1.1, lty = 2,
268+
data = subset(popsframe_all_noNea, pop == "AFR")) +
269+
geom_line(data = data.frame(bin_age = predxall_noNea, mean_interval = predyall_noNea),
270+
col = 'black', lwd = 1.1, lty = 2) +
271+
geom_line(data = data.frame(bin_age = predxall, mean_interval = predyall),
272+
col = 'black', lwd = 1.1) +
273+
axis_formatting + coord_cartesian(ylim = c(13.5, 38.5), xlim = c(100,80000)) + annotation_logticks(sides = "b") +
274+
scale_x_log10(minor_breaks = c(1:9 * 100, 1:9 * 1000)) +
275+
labs(y = "generation interval (years)", x = "allele age (generations ago)")

0 commit comments

Comments
 (0)