1+ # Analysis of generation interval with Neanderthal variants masked
2+
3+ tgp_nea <- read.table(" neanderthal_masked/TGP_10kgacounts.txt" )
4+
5+ names(tgp_nea ) <- count_header
6+ tgp_nea <- cbind(tgp_nea [,mut_classes ] + tgp_nea [,complementary_classes ],
7+ CpG = tgp_nea $ CpG , bin_age = tgp_nea $ bin_age )
8+
9+ tgp_nea10kgaplots <- make_plotframes(phasedstrictCT_model , tgp_nea , TGPphased_boot10kga_ests )
10+ plotframe_noNea <- tgp_nea10kgaplots [[1 ]]
11+ plotmean_noNea <- tgp_nea10kgaplots [[2 ]]
12+
13+ tgp_10kga <- read.table(" TGP_10kgacounts.txt" )
14+ names(tgp_10kga ) <- count_header
15+ tgp_10kga <- cbind(tgp_10kga [,mut_classes ] + tgp_10kga [,complementary_classes ],
16+ CpG = tgp_10kga $ CpG , bin_age = tgp_10kga $ bin_age )
17+
18+ tgp_phased10kgaplots <- make_plotframes(phasedstrictCT_model , tgp_10kga , TGPphased_boot10kga_ests )
19+ plotframe <- tgp_phased10kgaplots [[1 ]]
20+ plotmean <- tgp_phased10kgaplots [[2 ]]
21+
22+ ggplot(plotframe_noNea , aes(y = gen_time , x = bin_age , col = sex )) +
23+ geom_point(size = 2 ) +
24+ geom_smooth(data = plotmean_noNea , aes(y = sexave , x = bin_age , col = ' mean' ), lwd = 1.2 ,
25+ formula = y ~ x , method = " loess" , span = 0.65 , se = FALSE , color = ' darkgray' ) +
26+ geom_smooth(data = plotmean , aes(y = sexave , x = bin_age , col = ' mean' ), lwd = 1.2 , lty = 2 ,
27+ formula = y ~ x , method = " loess" , span = 0.65 , se = FALSE , color = ' black' ) +
28+ geom_ribbon(data = plotmean , aes(x = bin_age , y = sexave , col = ' mean' ,
29+ ymin = sexave_lower , ymax = sexave_upper ),
30+ alpha = 0.15 , color = NA ) +
31+ coord_cartesian(ylim = c(13.5 ,38.5 )) + scale_color_manual(values = rev(pal_aaas(" default" )(2 ))) +
32+ axis_formatting + annotation_logticks(sides = " b" ) +
33+ scale_x_log10(minor_breaks = c(1 : 9 * 100 , 1 : 9 * 1000 )) +
34+ labs(y = " generation interval (years)" , x = " allele age (generations ago)" , col = " sex" )
35+
36+ # #
37+
38+ for (pop in pops ) {
39+ load(paste(" bootstraps/phased_10kgaboot_ests" , pop , " .RData" , sep = " " ))
40+ }
41+
42+ tgp_nea <- read.table(" neanderthal_masked/TGP_10kgacounts.txt" )
43+
44+ names(tgp_nea ) <- count_header
45+ tgp_nea <- cbind(tgp_nea [,mut_classes ] + tgp_nea [,complementary_classes ],
46+ CpG = tgp_nea $ CpG , bin_age = tgp_nea $ bin_age )
47+
48+
49+ for (pop in pops ) {
50+ pop_nea <- read.table(paste(" neanderthal_masked/" , pop , " _10kgacounts.txt" , sep = " " ))
51+ names(pop_nea ) <- count_header
52+ pop_nea <- cbind(pop_nea [,mut_classes ] + pop_nea [,complementary_classes ],
53+ CpG = pop_nea $ CpG , bin_age = pop_nea $ bin_age )
54+
55+ assign(paste(pop , " noNea" , sep = " _" ), pop_nea )
56+ assign(paste(pop , " noNeaphasedplotframes10kga" , sep = " _" ),
57+ make_plotframes(phasedstrictCT_model , get(paste(pop , " noNea" , sep = " _" )),
58+ get(paste(pop , " phased_10kgaboot_ests" , sep = " " ))))
59+ }
60+
61+ popsframe_10kga_noNea <-
62+ data.frame (mean_interval = unlist(lapply(pops ,
63+ function (pop ) { get(paste(pop , " noNeaphasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexave })),
64+ bin_age = unlist(lapply(pops ,
65+ function (pop ) { get(paste(pop , " noNeaphasedplotframes10kga" , sep = " _" ))[[2 ]]$ bin_age })),
66+ sexdiff = unlist(lapply(pops ,
67+ function (pop ) { get(paste(pop , " noNeaphasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexdiff })),
68+ sexave_upper = unlist(lapply(pops ,
69+ function (pop ) { get(paste(pop , " noNeaphasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexave_upper })),
70+ sexave_lower = unlist(lapply(pops ,
71+ function (pop ) { get(paste(pop , " noNeaphasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexave_lower })),
72+ sexdiff_upper = unlist(lapply(pops ,
73+ function (pop ) { get(paste(pop , " noNeaphasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexdiff_upper })),
74+ sexdiff_lower = unlist(lapply(pops ,
75+ function (pop ) { get(paste(pop , " noNeaphasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexdiff_lower })),
76+ pop = rep(pops , each = 100 ))
77+
78+ loEAS10_noNea <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_10kga_noNea , pop == " EAS" ), span = 0.65 )
79+ loEUR10_noNea <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_10kga_noNea , pop == " EUR" ), span = 0.65 )
80+ loSAS10_noNea <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_10kga_noNea , pop == " SAS" ), span = 0.65 )
81+
82+ predx_noNea <- 10 ** seq(max(loEAS10_noNea $ x [1 ], loEUR10_noNea $ x [1 ], loSAS10_noNea $ x [1 ]),
83+ min(loEAS10_noNea $ x [100 ], loEUR10_noNea $ x [100 ], loSAS10_noNea $ x [100 ]), .01 )
84+
85+ predy_noNea <- (predict(loEAS10_noNea , data.frame (bin_age = predx )) +
86+ predict(loEUR10_noNea , data.frame (bin_age = predx )) +
87+ predict(loSAS10_noNea , data.frame (bin_age = predx ))) / 3
88+
89+ ggplot(subset(popsframe_10kga_noNea , pop == " AFR" ), aes(x = bin_age , y = mean_interval , col = pop )) +
90+ geom_smooth(method = " loess" , span = 0.65 , se = FALSE , lwd = 1.1 ) +
91+ geom_line(data = data.frame (mean_interval = predy_noNea , bin_age = predx_noNea ), col = ' black' , lwd = 1.1 ) +
92+ coord_cartesian(ylim = c(13.5 ,38.5 )) + axis_formatting + annotation_logticks(sides = " b" ) +
93+ scale_x_log10(minor_breaks = c(1 : 9 * 100 , 1 : 9 * 1000 )) +
94+ labs(y = " generation interval (years)" , x = " allele age (generations ago)" )
95+
96+ for (pop in pops ) {
97+ pop_10kga <- read.table(paste(pop , " _10kgacounts.txt" , sep = " " ))
98+ names(pop_10kga ) <- count_header
99+ pop_10kga <- cbind(pop_10kga [,mut_classes ] + pop_10kga [,complementary_classes ], CpG = pop_10kga $ CpG , bin_age = pop_10kga $ bin_age )
100+
101+ oout <- apply(t(t(pop_10kga [,1 : 6 ] / rowSums(pop_10kga [,1 : 6 ])) - youngspecdiff ), MARGIN = 1 ,
102+ predict_parentalages , model = strictCT_model )
103+ pop_10kga $ mgen_time <- sapply(oout , " [[" , 1 )[1 ,]
104+ pop_10kga $ fgen_time <- sapply(oout , " [[" , 1 )[2 ,]
105+ pop_10kga $ ss_mean <- (pop_10kga $ mgen_time + pop_10kga $ fgen_time ) / 2
106+
107+ assign(paste(pop , " 10kga" , sep = " _" ), pop_10kga )
108+ assign(paste(pop , " phasedplotframes10kga" , sep = " _" ),
109+ make_plotframes(phasedstrictCT_model , get(paste(pop , " 10kga" , sep = " _" )),
110+ get(paste(pop , " phased_10kgaboot_ests" , sep = " " ))))
111+
112+ }
113+
114+ popsframe_10kga <-
115+ data.frame (mean_interval = unlist(lapply(pops ,
116+ function (pop ) { get(paste(pop , " phasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexave })),
117+ bin_age = unlist(lapply(pops ,
118+ function (pop ) { get(paste(pop , " phasedplotframes10kga" , sep = " _" ))[[2 ]]$ bin_age })),
119+ sexdiff = unlist(lapply(pops ,
120+ function (pop ) { get(paste(pop , " phasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexdiff })),
121+ sexave_upper = unlist(lapply(pops ,
122+ function (pop ) { get(paste(pop , " phasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexave_upper })),
123+ sexave_lower = unlist(lapply(pops ,
124+ function (pop ) { get(paste(pop , " phasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexave_lower })),
125+ sexdiff_upper = unlist(lapply(pops ,
126+ function (pop ) { get(paste(pop , " phasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexdiff_upper })),
127+ sexdiff_lower = unlist(lapply(pops ,
128+ function (pop ) { get(paste(pop , " phasedplotframes10kga" , sep = " _" ))[[2 ]]$ sexdiff_lower })),
129+ pop = rep(pops , each = 100 ))
130+
131+ loEAS10 <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_10kga , pop == " EAS" ), span = 0.65 )
132+ loEUR10 <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_10kga , pop == " EUR" ), span = 0.65 )
133+ loSAS10 <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_10kga , pop == " SAS" ), span = 0.65 )
134+
135+ predx <- 10 ** seq(max(loEAS10 $ x [1 ], loEUR10 $ x [1 ], loSAS10 $ x [1 ]),
136+ min(loEAS10 $ x [100 ], loEUR10 $ x [100 ], loSAS10 $ x [100 ]), .01 )
137+
138+ predy <- (predict(loEAS10 , data.frame (bin_age = predx )) +
139+ predict(loEUR10 , data.frame (bin_age = predx )) +
140+ predict(loSAS10 , data.frame (bin_age = predx ))) / 3
141+
142+ loEAS10upperCI <- loess(sexave_upper ~ log10(bin_age ), data = subset(popsframe_10kga , pop == " EAS" ), span = 0.65 )
143+ loEAS10lowerCI <- loess(sexave_lower ~ log10(bin_age ), data = subset(popsframe_10kga , pop == " EAS" ), span = 0.65 )
144+
145+ loEUR10upperCI <- loess(sexave_upper ~ log10(bin_age ), data = subset(popsframe_10kga , pop == " EUR" ), span = 0.65 )
146+ loEUR10lowerCI <- loess(sexave_lower ~ log10(bin_age ), data = subset(popsframe_10kga , pop == " EUR" ), span = 0.65 )
147+
148+ loSAS10upperCI <- loess(sexave_upper ~ log10(bin_age ), data = subset(popsframe_10kga , pop == " SAS" ), span = 0.65 )
149+ loSAS10lowerCI <- loess(sexave_lower ~ log10(bin_age ), data = subset(popsframe_10kga , pop == " SAS" ), span = 0.65 )
150+
151+ loupperCI <- (predict(loEAS10upperCI , data.frame (bin_age = predx )) +
152+ predict(loEUR10upperCI , data.frame (bin_age = predx ))+
153+ predict(loSAS10upperCI , data.frame (bin_age = predx ))) / 3
154+
155+ lolowerCI <- (predict(loEAS10lowerCI , data.frame (bin_age = predx )) +
156+ predict(loEUR10lowerCI , data.frame (bin_age = predx ))+
157+ predict(loSAS10lowerCI , data.frame (bin_age = predx ))) / 3
158+
159+
160+ ggplot(subset(popsframe_10kga , pop == " AFR" ), aes(x = bin_age , y = mean_interval , col = pop )) +
161+ geom_line(data = data.frame (mean_interval = predy , bin_age = predx ), col = ' black' , lwd = 1.1 ,
162+ lty = 2 ) +
163+ geom_line(data = data.frame (mean_interval = predy_noNea , bin_age = predx_noNea ), col = ' black' ,
164+ lwd = 1.1 ) +
165+ geom_ribbon(data = subset(popsframe_10kga , pop == " AFR" ),
166+ aes(ymin = sexave_lower , ymax = sexave_upper , fill = pop ), color = NA , alpha = 0.1 ) +
167+ geom_ribbon(data = data.frame (mean_interval = 0 ,
168+ sexave_lower = lolowerCI , sexave_upper = loupperCI , bin_age = predx ),
169+ aes(ymin = sexave_lower , ymax = sexave_upper ), color = NA , alpha = 0.1 ) +
170+ geom_smooth(method = " loess" , span = 0.65 , se = FALSE , lty = 2 , lwd = 1.1 ) +
171+ geom_smooth(method = " loess" , span = 0.65 ,
172+ data = subset(popsframe_10kga_noNea , pop == " AFR" ), se = FALSE , lwd = 1.1 ) +
173+ coord_cartesian(ylim = c(13.5 ,38.5 )) + axis_formatting + annotation_logticks(sides = " b" ) +
174+ scale_x_log10(minor_breaks = c(1 : 9 * 100 , 1 : 9 * 1000 )) +
175+ labs(y = " generation interval (years)" , x = " allele age (generations ago)" )
176+
177+ # ##
178+
179+ for (pop in pops ) {
180+ load(paste(" bootstraps/phased_allboot_ests" , pop , " .RData" , sep = " " ))
181+
182+ pop_all <- read.table(paste(pop , " _allcounts.txt" , sep = " " ))
183+ names(pop_all ) <- count_header
184+ pop_all <- cbind(pop_all [,mut_classes ] + pop_all [,complementary_classes ], CpG = pop_all $ CpG , bin_age = pop_all $ bin_age )
185+
186+ oout <- apply(t(t(pop_all [,1 : 6 ] / rowSums(pop_all [,1 : 6 ])) - youngspecdiff ), MARGIN = 1 ,
187+ predict_parentalages , model = strictCT_model )
188+ pop_all $ mgen_time <- sapply(oout , " [[" , 1 )[1 ,]
189+ pop_all $ fgen_time <- sapply(oout , " [[" , 1 )[2 ,]
190+ pop_all $ ss_mean <- (pop_all $ mgen_time + pop_all $ fgen_time ) / 2
191+
192+ assign(paste(pop , " allcounts" , sep = " _" ), pop_all )
193+ assign(paste(pop , " phasedplotframes" , sep = " _" ),
194+ make_plotframes(phasedstrictCT_model , get(paste(pop , " allcounts" , sep = " _" )),
195+ get(paste(pop , " phased_allboot_ests" , sep = " " ))))
196+ }
197+
198+ popsframe_all <-
199+ data.frame (mean_interval = unlist(lapply(pops ,
200+ function (pop ) { get(paste(pop , " phasedplotframes" , sep = " _" ))[[2 ]]$ sexave })),
201+ bin_age = unlist(lapply(pops ,
202+ function (pop ) { get(paste(pop , " phasedplotframes" , sep = " _" ))[[2 ]]$ bin_age })),
203+ sexdiff = unlist(lapply(pops ,
204+ function (pop ) { get(paste(pop , " phasedplotframes" , sep = " _" ))[[2 ]]$ sexdiff })),
205+ sexave_upper = unlist(lapply(pops ,
206+ function (pop ) { get(paste(pop , " phasedplotframes" , sep = " _" ))[[2 ]]$ sexave_upper })),
207+ sexave_lower = unlist(lapply(pops ,
208+ function (pop ) { get(paste(pop , " phasedplotframes" , sep = " _" ))[[2 ]]$ sexave_lower })),
209+ pop = rep(pops , each = 100 ))
210+
211+ ggplot(popsframe_all , aes(x = bin_age , y = mean_interval , col = pop )) +
212+ geom_smooth(method = " loess" , span = 0.65 , se = FALSE , lwd = 1.1 ) +
213+ geom_ribbon(aes(ymin = sexave_lower , ymax = sexave_upper , fill = pop ), color = NA , alpha = 0.1 ) +
214+ axis_formatting + coord_cartesian(ylim = c(13.5 , 38.5 ), xlim = c(100 ,80000 )) + annotation_logticks(sides = " b" ) +
215+ scale_x_log10(minor_breaks = c(1 : 9 * 100 , 1 : 9 * 1000 )) +
216+ labs(y = " generation interval (years)" , x = " allele age (generations ago)" )
217+
218+ loEASall <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_all , pop == " EAS" ), span = 0.65 )
219+ loEURall <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_all , pop == " EUR" ), span = 0.65 )
220+ loSASall <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_all , pop == " SAS" ), span = 0.65 )
221+
222+ predxall <- 10 ** seq(max(loEASall $ x [1 ], loEURall $ x [1 ], loSASall $ x [1 ]),
223+ min(loEASall $ x [100 ], loEURall $ x [100 ], loSASall $ x [100 ]), .01 )
224+
225+ predyall <- (predict(loEASall , data.frame (bin_age = predxall )) +
226+ predict(loEURall , data.frame (bin_age = predxall )) +
227+ predict(loSASall , data.frame (bin_age = predxall ))) / 3
228+
229+ for (pop in pops ) {
230+ pop_nea <- read.table(paste(" neanderthal_masked/" , pop , " _noNea.allcounts.txt" , sep = " " ))
231+ names(pop_nea ) <- count_header
232+ pop_nea <- cbind(pop_nea [,mut_classes ] + pop_nea [,complementary_classes ],
233+ CpG = pop_nea $ CpG , bin_age = pop_nea $ bin_age )
234+
235+ assign(paste(pop , " neaAll" , sep = " _" ), pop_nea )
236+ assign(paste(pop , " noNeaphasedplotframesall" , sep = " _" ),
237+ make_plotframes(phasedstrictCT_model , get(paste(pop , " neaAll" , sep = " _" )),
238+ get(paste(pop , " phased_allboot_ests" , sep = " " ))))
239+ }
240+
241+ popsframe_all_noNea <-
242+ data.frame (mean_interval = unlist(lapply(pops ,
243+ function (pop ) { get(paste(pop , " noNeaphasedplotframesall" , sep = " _" ))[[2 ]]$ sexave })),
244+ bin_age = unlist(lapply(pops ,
245+ function (pop ) { get(paste(pop , " noNeaphasedplotframesall" , sep = " _" ))[[2 ]]$ bin_age })),
246+ sexdiff = unlist(lapply(pops ,
247+ function (pop ) { get(paste(pop , " noNeaphasedplotframesall" , sep = " _" ))[[2 ]]$ sexdiff })),
248+ sexave_upper = unlist(lapply(pops ,
249+ function (pop ) { get(paste(pop , " noNeaphasedplotframesall" , sep = " _" ))[[2 ]]$ sexave_upper })),
250+ sexave_lower = unlist(lapply(pops ,
251+ function (pop ) { get(paste(pop , " noNeaphasedplotframesall" , sep = " _" ))[[2 ]]$ sexave_lower })),
252+ pop = rep(pops , each = 100 ))
253+
254+ loEASall_noNea <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_all_noNea , pop == " EAS" ), span = 0.65 )
255+ loEURall_noNea <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_all_noNea , pop == " EUR" ), span = 0.65 )
256+ loSASall_noNea <- loess(mean_interval ~ log10(bin_age ), data = subset(popsframe_all_noNea , pop == " SAS" ), span = 0.65 )
257+
258+ predxall_noNea <- 10 ** seq(max(loEASall_noNea $ x [1 ], loEURall_noNea $ x [1 ], loSASall_noNea $ x [1 ]),
259+ min(loEASall_noNea $ x [100 ], loEURall_noNea $ x [100 ], loSASall_noNea $ x [100 ]), .01 )
260+
261+ predyall_noNea <- (predict(loEASall_noNea , data.frame (bin_age = predxall_noNea )) +
262+ predict(loEURall_noNea , data.frame (bin_age = predxall_noNea )) +
263+ predict(loSASall_noNea , data.frame (bin_age = predxall_noNea ))) / 3
264+
265+ ggplot(subset(popsframe_all , pop == " AFR" ), aes(x = bin_age , y = mean_interval , col = pop )) +
266+ geom_smooth(method = " loess" , span = 0.65 , se = FALSE , lwd = 1.1 ) +
267+ geom_smooth(method = " loess" , span = 0.65 , se = FALSE , lwd = 1.1 , lty = 2 ,
268+ data = subset(popsframe_all_noNea , pop == " AFR" )) +
269+ geom_line(data = data.frame (bin_age = predxall_noNea , mean_interval = predyall_noNea ),
270+ col = ' black' , lwd = 1.1 , lty = 2 ) +
271+ geom_line(data = data.frame (bin_age = predxall , mean_interval = predyall ),
272+ col = ' black' , lwd = 1.1 ) +
273+ axis_formatting + coord_cartesian(ylim = c(13.5 , 38.5 ), xlim = c(100 ,80000 )) + annotation_logticks(sides = " b" ) +
274+ scale_x_log10(minor_breaks = c(1 : 9 * 100 , 1 : 9 * 1000 )) +
275+ labs(y = " generation interval (years)" , x = " allele age (generations ago)" )
0 commit comments