|
| 1 | +# devtools::install_github("R-Computing-Lab/BGmisc") |
| 2 | +library(tidyverse) |
| 3 | +library(usethis) |
| 4 | +library(readr) |
| 5 | + |
| 6 | +# https://doi.org/10.6084/m9.figshare.31513204.v2 |
| 7 | +ped <- read_delim("data-raw/Pedigree4G.csv", |
| 8 | + delim = ";", escape_double = FALSE, |
| 9 | + trim_ws = TRUE |
| 10 | +) %>% |
| 11 | + janitor::clean_names() %>% |
| 12 | + rename( |
| 13 | + ID = id, |
| 14 | + dadID = sire, |
| 15 | + momID = dam |
| 16 | + ) |
| 17 | + |
| 18 | + |
| 19 | +pheno <- read_delim("data-raw/FenotiposPesos90.csv", |
| 20 | + col_types = cols(p0 = col_character()), |
| 21 | + delim = ";", escape_double = FALSE, trim_ws = TRUE |
| 22 | +) %>% |
| 23 | + janitor::clean_names() %>% |
| 24 | + rename( |
| 25 | + ID = id, |
| 26 | + dadID = sire_id, |
| 27 | + momID = dam_id |
| 28 | + ) |
| 29 | + |
| 30 | +# check the distribution of p0 |
| 31 | +pheno %>% |
| 32 | + pull(p0) %>% |
| 33 | + table() |
| 34 | + |
| 35 | + |
| 36 | +pheno <- pheno %>% |
| 37 | + mutate( |
| 38 | + p0 = as.double(case_when( |
| 39 | + p0 == "ABORTO" ~ NA_character_, |
| 40 | + TRUE ~ p0 |
| 41 | + )) |
| 42 | + ) |
| 43 | + |
| 44 | + |
| 45 | + |
| 46 | +ped_pheno <- ped %>% |
| 47 | + full_join(pheno, by = c("ID", "dadID", "momID")) |
| 48 | + |
| 49 | +# find the duplicated IDs in ped_pheno |
| 50 | +dup_id <- ped_pheno %>% |
| 51 | + group_by(ID) %>% |
| 52 | + summarise(n = n()) %>% |
| 53 | + filter(n > 1) %>% |
| 54 | + pull(ID) |
| 55 | + |
| 56 | +dup_id_df <- ped_pheno %>% |
| 57 | + filter(ID %in% dup_id) %>% |
| 58 | + arrange(ID) |
| 59 | + |
| 60 | + |
| 61 | +ped_growth <- ped_pheno %>% |
| 62 | + filter(!is.na(p0)) %>% |
| 63 | + select( |
| 64 | + ID, Sexo, dadID, momID, |
| 65 | + p0, p15, p30, p45, p60, p90 |
| 66 | + ) %>% |
| 67 | + pivot_longer( |
| 68 | + cols = starts_with("p"), names_to = "day", |
| 69 | + values_to = "weight" |
| 70 | + ) %>% |
| 71 | + mutate(day = as.numeric(str_remove(day, "p"))) |
| 72 | + |
| 73 | +# growth graph over time p0, p15, p30, p45, p60, p90, where 0 is day 0, 15 is day 15, etc. |
| 74 | + |
| 75 | + |
| 76 | + |
| 77 | +library(ggplot2) |
| 78 | +if (FALSE) { |
| 79 | + pl <- ggplot( |
| 80 | + ped_growth %>% filter(ID %in% sample(unique(ped_growth$ID), 1000)), |
| 81 | + aes(x = day, y = weight, group = ID) |
| 82 | + ) + |
| 83 | + geom_line(alpha = 0.1) + |
| 84 | + geom_jitter(alpha = 0.05, width = 1, height = 0) + |
| 85 | + geom_smooth(method = "loess", se = TRUE, color = "blue", group = 1) + |
| 86 | + labs( |
| 87 | + title = "Growth Trajectories of Individuals Over Time", |
| 88 | + x = "Day", |
| 89 | + y = "Weight" |
| 90 | + ) + |
| 91 | + theme_minimal() |
| 92 | + ## |
| 93 | + pl |
| 94 | +} |
| 95 | +# data processing |
| 96 | + |
| 97 | +write.csv(ped_pheno, "data-raw/ped_pheno.csv", row.names = FALSE) |
| 98 | +usethis::use_data(ped_pheno, overwrite = TRUE, compress = "xz") |
0 commit comments