draft

smasongarrison · smasongarrison · commit 818aca5c82de · 2026-04-03T14:32:35.000-04:00
diff --git a/vignettes/articles/references.bib b/vignettes/articles/references.bib
@@ -0,0 +1,258 @@
+@article{Wright1922,
+	title = {Coefficients of inbreeding and relationship},
+	volume = {56},
+	issn = {0003-0147},
+	url = {www.jstor.org/stable/2456273},
+	doi = {10.1086/279872},
+	number = {645},
+	journal = {The American Naturalist},
+	author = {Wright, Sewall},
+	year = {1922},
+	note = {Publisher: Science Press},
+	pages = {330--338},
+}
+@article{kinship2,
+	author = {Sinnwell, J. P. and Therneau, T. M. and Schaid, D. J.},
+	title = {The kinship2 r package for pedigree data},
+	journal = {Human Heredity},
+	year = {2014},
+	volume = {78}, issue = {2},
+	pages = {91-93},
+	doi = {10.1159/000363105}
+	}
+@Manual{kinship2R,
+    title = {kinship2: Pedigree Functions},
+    author = {Jason Sinnwell and Terry Therneau},
+    year = {2022},
+    note = {R package version 1.9.6},
+    url = {https://CRAN.R-project.org/package=kinship2},
+  }
+@article{Garrison2018,
+	title = {Popular {Structural} {Equation} {Modeling} {Programs} for {Behavior} {Genetics}},
+	volume = {25},
+	issn = {1070-5511},
+	url = {https://doi.org/10.1080/10705511.2018.1493385},
+	doi = {10.1080/10705511.2018.1493385},
+	language = {en},
+	number = {6},
+	journal = {Structural Equation Modeling: A Multidisciplinary Journal},
+	author = {Garrison, S. Mason},
+	month = sep,
+	year = {2018},
+	note = {Publisher: Routledge},
+	keywords = {\#nosource},
+	pages = {972--977},
+	annote = {doi: 10.1080/10705511.2018.1493385},
+}
+
+@article{Neale2016,
+	title = {{OpenMx} 2.0: {Extended} {Structural} {Equation} and {Statistical} {Modeling}},
+	volume = {81},
+	issn = {0033-3123},
+	shorttitle = {{OpenMx} 2.0},
+	url = {http://link.springer.com/10.1007/s11336-014-9435-8},
+	doi = {10.1007/s11336-014-9435-8},
+	abstract = {Psychometrika, doi:10.1007/s11336-014-9435-8},
+	language = {en},
+	number = {2},
+	journal = {Psychometrika},
+	author = {Neale, Michael C and Hunter, Michael D and Pritikin, Joshua N and Zahery, Mahsa and Brick, Timothy R and Kirkpatrick, Robert M and Estabrook, Ryne and Bates, Timothy C and Maes, Hermine H and Boker, Steven M.},
+	month = jun,
+	year = {2016},
+	pmid = {25622929},
+	pages = {535--549},
+}
+
+@article{hunter_analytic_2021,
+	title = {The {Analytic} {Identification} of {Variance} {Component} {Models} {Common} to {Behavior} {Genetics}},
+	volume = {51},
+	issn = {1573-3297},
+	doi = {10.1007/s10519-021-10055-x},
+	abstract = {Many behavior genetics models follow the same general structure. We describe this general structure and analytically derive simple criteria for its identification. In particular, we find that variance components can be uniquely estimated whenever the relatedness matrices that define the components are linearly independent (i.e., not confounded). Thus, we emphasize determining which variance components can be identified given a set of genetic and environmental relationships, rather than the estimation procedures. We validate the identification criteria with several well-known models, and further apply them to several less common models. The first model distinguishes child-rearing environment from extended family environment. The second model adds a gene-by-common-environment interaction term in sets of twins reared apart and together. The third model separates measured-genomic relatedness from the scanner site variation in a hypothetical functional magnetic resonance imaging study. The computationally easy analytic identification criteria allow researchers to quickly address model identification issues and define novel variance components, facilitating the development of new research questions.},
+	number = {4},
+	journal = {Behavior Genetics},
+	author = {Hunter, Michael D. and Garrison, S. Mason and Burt, S. Alexandra and Rodgers, Joseph L.},
+	month = jul,
+	year = {2021},
+	pmid = {34089112},
+	pmcid = {PMC8394168},
+	keywords = {Behavior genetics, Humans, Model identification, Models, Genetic, Structural equation modeling, Twins, Variance components},
+	pages = {425--437},
+}
+
+@Manual{easy,
+    title = {EasyMx: Easy Model-Builder Functions for 'OpenMx'},
+    author = {Michael D. Hunter},
+    year = {2023},
+    note = {R package version 0.3-2},
+    url = {https://CRAN.R-project.org/package=EasyMx},
+  }
+
+@article{kirkpatrick_combining_2021,
+	title = {Combining {Structural}-{Equation} {Modeling} with {Genomic}-{Relatedness}-{Matrix} {Restricted} {Maximum} {Likelihood} in {OpenMx}},
+	volume = {51},
+	issn = {1573-3297},
+	url = {https://doi.org/10.1007/s10519-020-10037-5},
+	doi = {10.1007/s10519-020-10037-5},
+	language = {en},
+	number = {3},
+	urldate = {2021-01-22},
+	journal = {Behavior Genetics},
+	author = {Kirkpatrick, Robert M. and Pritikin, Joshua N. and Hunter, Michael D. and Neale, Michael C.},
+	month = jan,
+	year = {2021},
+	pages = {331--342},
+}
+@article{hunter_modeling_2023,
+    author = {Hunter, Michael D. and Lyu, Xuanyu and  Garrison, S. Mason and  Rodgers, Joseph L. and Smith, K. and Coon, Hilary and Burt, S. Alexandra},
+    title = {Modeling mtDNA Effects from Extended Pedigrees in the Utah Population Database},
+    journal = {Behavior Genetics},
+    year = {2023},
+    month = {June},
+    doi = {10.1007/s10519-023-10156-9},
+    note = {Abstract presented at the Behavior Genetics Annual Meeting, Murcia, Spain}
+}
+@article{burt_mom_genes_2023,
+    author = {Burt, S. Alexandra},
+    title = {Mom genes: Leveraging maternal lineage to estimate the contributions of mitochondrial DNA},
+    journal = {Behavior Genetics},
+    year = {2023},
+    month = {June},
+    doi = {10.1007/s10519-023-10156-9},
+    note = {Abstract presented at the 53rd Behavior Genetics Association Annual Meeting, Murcia, Spain}
+}
+@mastersthesis{lyu_masters_thesis_2023,
+    author = {Lyu, Xuanyu},
+    title = {Statistical power analysis on mtDNA effects estimation},
+    school = {Wake Forest University},
+    year = {2023},
+    month = {May},
+    note = {Abstract presented at the 53rd Behavior Genetics Association Annual Meeting, Murcia, Spain}
+}
+
+@article{DOnofrio2003,
+	title = {The role of the children of twins design in elucidating causal relations between parent characteristics and child outcomes.},
+	volume = {44},
+	issn = {0021-9630},
+	url = {http://www.ncbi.nlm.nih.gov/pubmed/14626455},
+	doi = {10.1111/1469-7610.00196},
+	language = {eng},
+	number = {8},
+	urldate = {2016-10-03},
+	journal = {Journal of Child Psychology and Psychiatry},
+	author = {D'Onofrio, Brian M and Turkheimer, Eric N. and Eaves, Lindon J and Corey, Linda A. and Berg, Kare and Solaas, Marit H. and Emery, Robert E},
+	month = nov,
+	year = {2003},
+	pmid = {14626455},
+	pages = {1130--1144},
+}
+
+@article{rodgers_mdan,
+	title = {The {Cross}-{Generational} {Mother}–{Daughter}–{Aunt}–{Niece} {Design}: {Establishing} {Validity} of the {MDAN} {Design} with {NLSY} {Fertility} {Variables}},
+	volume = {38},
+	issn = {1573-3297},
+	url = {https://doi.org/10.1007/s10519-008-9225-0},
+	doi = {10.1007/s10519-008-9225-0},
+	number = {6},
+	journal = {Behavior Genetics},
+	author = {Rodgers, Joseph Lee and Bard, David E. and Johnson, Amber and D’Onofrio, Brian and Miller, Warren B.},
+	month = nov,
+	year = {2008},
+	pages = {567--578},
+}
+
+@article{garrison_analyzing_2023,
+    author = {Garrison, S. Mason and Lyu, Xuanyu and Hunter, Michael D. and Rodgers, Joseph L. and Smith, Ken R. and Coon, Hilary and Burt, S. Alexandra},
+    title = {Analyzing extended cousin similarity to unravel the mystery of mtDNA and longevity},
+    journal = {Behavior Genetics},
+    year = {2023},
+    month = {June},
+    doi = {10.1007/s10519-023-10156-9},
+    note = {Abstract presented at the 53rd Behavior Genetics Association Annual Meeting, Murcia, Spain}
+}
+@article{lyu_statistical_power_2023,
+    author = {Lyu, Xuanyu and Hunter, Michael D. and Rodgers, Joseph L. and Smith, Ken R. and Coon, Hilary and Burt, S. Alexandra and Garrison, S. Mason},
+    title = {Statistical power analysis on mtDNA effects estimation},
+    journal = {Behavior Genetics},
+    year = {2023},
+    month = {June},
+    doi = {10.1007/s10519-023-10156-9},
+    note = {Abstract presented at the 53rd Behavior Genetics Association Annual Meeting, Murcia, Spain}
+}
+
+@incollection{lange_genetic_2002,
+	address = {New York, NY},
+	series = {Statistics for {Biology} and {Health}},
+	title = {Genetic {Identity} {Coefficients}},
+	isbn = {978-0-387-21750-5},
+	url = {https://doi.org/10.1007/978-0-387-21750-5_5},
+	abstract = {Genetic identity coefficients are powerful theoretical tools for genetic analysis. Geneticists have devised these indices to measure the degree of inbreeding of a single individual and the degree of relatedness of a pair of relatives. Since the degree of inbreeding of a single individual can be summarized by the relationship between his or her parents, we will focus on identity coefficients for relative pairs. These coefficients pertain to a generic autosomal locus and depend only on the relevant pedigree connecting two relatives and not on any phenotypes observed in the pedigree. In Chapter 6 we will investigate the applications of identity coefficients. Readers desiring motivation for the combinatorial problems attacked here may want to glance at Chapter 6 first.},
+	language = {en},
+	urldate = {2023-09-05},
+	booktitle = {Mathematical and {Statistical} {Methods} for {Genetic} {Analysis}},
+	publisher = {Springer},
+	author = {Lange, Kenneth},
+	editor = {Lange, Kenneth},
+	year = {2002},
+	doi = {10.1007/978-0-387-21750-5_5},
+	keywords = {Autosomal Locus, Binomial Sampling, Cholesky Decomposition, Inbreeding Coefficient, Kinship Coefficient},
+	pages = {81--96},
+}
+
+@Article{mcardleRAM,
+  author        = {J. Jack {McArdle} and Roderick P. {McDonald}},
+  title         = {Some algebraic properties of the reticular action model for moment structures},
+  doi           = {10.1111/j.2044-8317.1984.tb00802.x},
+  pages         = {234-251},
+  volume        = {37},
+  date-modified = {2011-07-08 14:39:48 -0400},
+  file          = {:McArdleMcDonald1984RAM.pdf:PDF},
+  journal       = {British Journal of Mathematical and Statistical Psychology},
+  keywords      = {SEM, Model Estimation Implementation, Notre Dame Reading List, RAM},
+  creationdate     = {2010-01-01T13:36:01},
+  year          = {1984},
+}
+
+@book{neale2004,
+	title = {Methodology for Genetic Studies of Twins and Families},
+	author = {Neale, Michael C. and Maes, Hermine H M},
+	year = {2004},
+	date = {2004},
+	publisher = {Kluwer Academic Publishers B.V.},
+	volume = {48},
+	doi = {10.1136/jmg.30.9.800-a},
+	note = {PMID: 4881448
+Issue: 3},
+	address = {Dordrecht, The Netherlands}
+}
+
+@article{plomin2016,
+	title = {Top 10 Replicated Findings From Behavioral Genetics.},
+	author = {Plomin, Robert and DeFries, John C. and Knopik, Valerie S and Neiderhiser, Jenae M.},
+	year = {2016},
+	month = {01},
+	date = {2016-01},
+	journal = {Perspectives on Psychological Science},
+	pages = {3--23},
+	volume = {11},
+	number = {1},
+	doi = {10.1177/1745691615617439},
+	url = {http://www.ncbi.nlm.nih.gov/pubmed/26817721},
+	note = {ISBN: 1745-6924 (Electronic) 1745-6916 (Linking)}
+}
+
+@article{eaves1978,
+	title = {Model-fitting approaches to the analysis of human behaviour.},
+	author = {Eaves, Lindon J and Last, K A and Young, P A and Martin, N G},
+	year = {1978},
+	month = {12},
+	date = {1978-12-01},
+	journal = {Heredity},
+	pages = {249--320},
+	volume = {41},
+	number = {3},
+	doi = {10.1038/hdy.1978.101},
+	url = {http://www.nature.com/articles/hdy1978101},
+	note = {ISBN: 0018-067X (Print)\r0018-067X (Linking)},
+	langid = {en}
+}
diff --git a/vignettes/articles/tutorialmanuscript.Xmd b/vignettes/articles/tutorialmanuscript.Xmd
@@ -0,0 +1,112 @@
+---
+title: "From Twins to Pedigrees: A Tutorial for Extended Family Variance Component Modeling with BGmisc"
+shorttitle: "Extended Family Modeling with BGmisc"
+author:
+  - name: "S. Mason Garrison"
+    affiliation: "1"
+    corresponding: true
+    email: "garrissm@wfu.edu"
+abstract: |
+  Twin studies remain the dominant design in behavior genetics, yet most twin half-siblings, cousins, and multi-generational relatives whose distinct kinship coefficients jointly identify a richer set of variance components than any MZ/DZ comparison alone. We demonstrate how to fit extended pedigree models using the BGmisc package and OpenMx. 
+  We apply the extended pedigree model to mutiple datasets
+  of Youth (a large human panel study with researcher-linked kinship), the Kluane Red Squirrel Project
+  (a multi-generational animal field study), and a children-of-twins dataset.
+  dataset with genomic relatedness data. In each case, fitting the extended
+  pedigree model on data the researcher already possesses -- but typically
+  confounds become testable, and components inaccessible to twin designs emerge.
+  We provide reproducible code for each application and practical guidance on
+  identification, starting values, and the interpretation of results.
+  registries contain far more information than researchers typically use: full siblings,
+  across three empirically distinct settings: the National Longitudinal Survey
+  discards -- changes the substantive conclusions: heritability estimates shift,
+keywords: ["extended pedigree", "variance components", "heritability", "BGmisc", "OpenMx", "behavior genetics", "tutorial"]
+output:
+  rmarkdown::html_vignette:
+    toc: true
+    toc_depth: 3
+  pdf_document:
+    keep_tex: true
+bibliography: references.bib
+vignette: >
+  %\VignetteIndexEntry{Extended Family Modeling with BGmisc}
+  %\VignetteEncoding{UTF-8}
+  %\VignetteEngine{knitr::rmarkdown}
+editor_options: 
+  markdown: 
+    wrap: 100
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE, comment = "#>",
+  echo = TRUE, message = FALSE, warning = FALSE,
+  fig.width = 7, fig.height = 4.5
+)
+
+options(rmarkdown.html_vignette.check_title = FALSE)
+```
+
+# Introduction
+
+In behavior geneticists quest to understanding the relative contributions of genetic and
+environmental factors to phenotypic variation, they have long relied on the classical twin design.
+By comparing the simiarly of monozygotic (MZ) twins -- who share essentially all their DNA -- to
+that of dizygotic (DZ) twins -- who share on average half their segregating alleles -- researchers
+can partition phenotypic variance into additive genetic ($a^2$), shared environmental ($c^2$), and
+nonshared environmental ($e^2$) components [@plomin2016; @neale2004]. The twin design is elegant
+precisely because it requires only two types of pairs to identify three parameters.
+
+Its simplicity, however, is also its limitation. The classical ACE model is just identified: two
+observed statistics (MZ and DZ intraclass correlations) and three unknown parameters, with zero
+degrees of freedom remaining to test model fit or estimate additional components. Dominance genetic
+variance ($d^2$), epistasis, and interactions between nuclear and mitochondria DNA are inestimable
+from twins alone. More practically: these data are often collected in the context of larger family
+studies, either intentionally (e.g., twin registries that also include siblings and parents) or as a
+byproduct of large panel studies (e.g., the National Longitudinal Survey of Youth, which includes
+researcher-linked kinship). In most cases, the additional relatives are excluded from analysis, and
+the twin design is applied to a subset of the data, even though these relatives carry independent
+information about the genetic and environmental architecture of the phenotype.
+
+The extended pedigree model, which we have introduced elsewhere (see ETC), leverages the full range
+of kinship coefficients in a pedigree to identify a richer set of variance components than the
+classical twin design. By including multiple types of relatives, researchers can estimate not only
+additive genetic variance but also dominance, shared environmental variance, and even more complex
+interactions. This tutorial demonstrates how to fit extended pedigree models using the BGmisc
+package and OpenMx, applying the model to multiple datasets across empirically distinct settings:
+the National Longitudinal Survey of Youth (a large human panel study with researcher-linked
+kinship), the Kluane Red Squirrel Project (a multi-generational animal field study), and a
+children-of-twins dataset. In each case, fitting the extended pedigree model on data the researcher
+already possesses -- but typically discards. We provide reproducible code for each application and
+practical guidance on identification, starting values, and the interpretation of results.
+
+The extended pedigree model addresses this gap directly. Rather than relying on a single MZ/DZ
+contrast, it leverages the full spectrum of pairwise kinship coefficients available in a family
+dataset: 1.0 for identical twins, 0.5 for parent-offspring and full siblings, 0.25 for half-siblings
+and grandparent-grandchild pairs, 0.125 for first cousins, and so on. Each distinct relatedness
+value provides independent leverage for disentangling genetic from environmental contributions. As
+the number of distinct kinship types increases, so does the number of identifiable variance
+components.
+
+Extended pedigree designs have been used in behavior genetics since at least the 1970s [@eaves1978; @fulker_multiple_1988], but they have remained a minority practice. Partially over concerns about model identification and power (Wilson, 1982, 1989), the complexity of fitting these models, and the relative costs of collecting twin data compared to extended family data. 
+
+<! -- https://onlinelibrary.wiley.com/doi/10.1002/bimj.4710310511 
+
+but also because the twin design has been so successful and widely adopted. The twin design is often seen as the "gold standard" in behavior genetics, and many researchers may be hesitant to deviate from this established approach. Additionally, many human datasets simply do not include the necessary family structure to fit extended pedigree models, which may limit their applicability in certain contexts.
+
+<the reasosn are numerous for why this is the case, but a key factor is that many human datasets simply do not include the necessary family structure to fit these models. And the twin design is often the default analytic approach, even when more complex family data are available.
+
+Deriving  
+
+In contrast, similar
+models are common in plant and animal breeding, where pedigree data is more routinely collected and
+analyzed.
+
+A persistent barrier has been the programming complexity of constructing relatedness matrices for
+arbitrary family structures, checking model identification, and assembling the resulting multi-group
+structural equation models. The `BGmisc` R package [@Garrison2024; @garrison_bgmisc_2025] was
+extended to address these challenges, providing tools for calculating relatedness matrices from
+pedigree data, checking model identification, and fitting extended pedigree models using OpenMx. The
+package is designed to be user-friendly and flexible, allowing researchers to easily incorporate a
+wide range of family structures into their analyses.
+
+# References