phoible
diff --git a/‎data/phoible.csv‎
Lines changed: 23003 additions & 23003 deletions b/‎data/phoible.csv‎
Lines changed: 23003 additions & 23003 deletions
diff --git a/‎raw-data/GM/README.md‎
Lines changed: 0 additions & 12 deletions b/‎raw-data/GM/README.md‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎raw-data/PH/README.md‎
Lines changed: 8 additions & 6 deletions b/‎raw-data/PH/README.md‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎raw-data/UZ/UZ_inventories.tsv‎ ‎raw-data/PH/UZ_inventories.tsv‎raw-data/UZ/UZ_inventories.tsv renamed to raw-data/PH/UZ_inventories.tsv b/‎raw-data/UZ/UZ_inventories.tsv‎ ‎raw-data/PH/UZ_inventories.tsv‎raw-data/UZ/UZ_inventories.tsv renamed to raw-data/PH/UZ_inventories.tsv
diff --git a/‎raw-data/GM/gm-afr-inventories.tsv‎ ‎raw-data/PH/gm-afr-inventories.tsv‎raw-data/GM/gm-afr-inventories.tsv renamed to raw-data/PH/gm-afr-inventories.tsv b/‎raw-data/GM/gm-afr-inventories.tsv‎ ‎raw-data/PH/gm-afr-inventories.tsv‎raw-data/GM/gm-afr-inventories.tsv renamed to raw-data/PH/gm-afr-inventories.tsv
diff --git a/‎raw-data/GM/gm-sea-inventories.tsv‎ ‎raw-data/PH/gm-sea-inventories.tsv‎raw-data/GM/gm-sea-inventories.tsv renamed to raw-data/PH/gm-sea-inventories.tsv b/‎raw-data/GM/gm-sea-inventories.tsv‎ ‎raw-data/PH/gm-sea-inventories.tsv‎raw-data/GM/gm-sea-inventories.tsv renamed to raw-data/PH/gm-sea-inventories.tsv
diff --git a/‎raw-data/UW/uw_inventories.tsv‎ ‎raw-data/PH/wip_inventories.tsv‎raw-data/UW/uw_inventories.tsv renamed to raw-data/PH/wip_inventories.tsv b/‎raw-data/UW/uw_inventories.tsv‎ ‎raw-data/PH/wip_inventories.tsv‎raw-data/UW/uw_inventories.tsv renamed to raw-data/PH/wip_inventories.tsv
diff --git a/‎raw-data/UZ/README.md‎
Lines changed: 0 additions & 15 deletions b/‎raw-data/UZ/README.md‎
Lines changed: 0 additions & 15 deletions
diff --git a/‎scripts/aggregate-raw-data.R‎
Lines changed: 5 additions & 5 deletions b/‎scripts/aggregate-raw-data.R‎
Lines changed: 5 additions & 5 deletions
@@ -1,18 +1,20 @@
 # PH
 
-The `PH` folder contains data drawn from journal articles, theses, and 
-published grammars, added by members of the Linguistic Phonetics 
-Laboratory at the University of Washington. The contents are described in:
+The `PH` folder contains phonological inventory data (phonemes, allophones, and tones) drawn from journal articles, theses, and published grammars. Collectively, they represent a convenience sample of languages and they were selected to improve worldwide coverage of the aggregated phoible data. Their source is tagged `ph` in the aggregated [phoible.csv](../../data/phoible.csv) file.
+
+The inventory data in [phoible_inventories.tsv](phoible_inventories.tsv) were added by members of the Linguistic Phonetics Laboratory at the University of Washington. The contents are described in:
 
 > Moran, Steven. (2012). Phonetics Information Base and Lexicon. PhD thesis, University of Washington. Online: [https://digital.lib.washington.edu/researchworks/handle/1773/22452](https://digital.lib.washington.edu/researchworks/handle/1773/22452).
 
-The inventory data are available in phoible long format in [phoible_inventories.tsv](phoible_inventories.tsv) and contain phonemes, allophones, and tones.
+The inventory data in [gm-afr-inventories.tsv](gm-afr-inventories.tsv) and [gm-sea-inventories.tsv](gm-sea-inventories.tsv) contain data from African and Southeast Asian languages collected and edited by Christopher Green and Steven Moran.
+
+The inventory data in [UZ_inventories.tsv](UZ_inventories.tsv) were added by members of the Department of Comparative Linguistics at the University of Zurich.
 
-The data adhere to the [phoible conventions](http://phoible.github.io/conventions/) and [Unicode IPA](http://langsci-press.org/catalog/book/176). 
+All data in `PH` adhere to the [phoible conventions](https://phoible.org/conventions) and [Unicode IPA](http://langsci-press.org/catalog/book/176). For more information, see the [phoible FAQ](https://phoible.org/faq).
 
 We have also collected for each citation a BibTeX reference, available in the [phoible-references.bib](../../data/phoible-references.bib) file. See the [InventoryID-Bibtex.csv](../../mappings/InventoryID-Bibtex.csv) mapping file for details.
 
 Note that the ISO 639-3 codes in the PH source may be out of date with the current ISO 639-3 standard. For more info, see: [https://iso639-3.sil.org/](https://iso639-3.sil.org/).
 
 For up-to-date language codes for each inventory, we maintain a phoible index here:
-[InventoryID-LanguageCodes.csv](../../mappings/InventoryID-LanguageCodes.csv).
+[InventoryID-LanguageCodes.csv](../../mappings/InventoryID-LanguageCodes.csv).
@@ -32,9 +32,9 @@ er_path <- file.path(data_dir, "ER", "ER_inventories.tsv")
 ea_path <- file.path(data_dir, "EA", "EA_inventories.tsv")
 ea_ipa_path <- file.path(data_dir, "EA", "EA_IPA_correspondences.tsv")
 ph_path <- file.path(data_dir, "PH", "phoible_inventories.tsv")
-uz_path <- file.path(data_dir, "UZ", "UZ_inventories.tsv")
-gm_afr_path <- file.path(data_dir, "GM", "gm-afr-inventories.tsv")
-gm_sea_path <- file.path(data_dir, "GM", "gm-sea-inventories.tsv")
+uz_path <- file.path(data_dir, "PH", "UZ_inventories.tsv")
+gm_afr_path <- file.path(data_dir, "PH", "gm-afr-inventories.tsv")
+gm_sea_path <- file.path(data_dir, "PH", "gm-sea-inventories.tsv")
 aa_path <- file.path(data_dir, "AA", "AA_inventories.tsv")
 spa_path <- file.path(data_dir, "SPA", "SPA_Phones.tsv")
 spa_ipa_path <- file.path(data_dir, "SPA", "SPA_IPA_correspondences.tsv")
@@ -92,7 +92,7 @@ sparse_cols <- c("InventoryID", "LanguageCode", "LanguageName", "Phoneme",
                  "SpecificDialect", "FileNames")
 uz_data <- parse_sparse(uz_raw, id_col="FileNames", fill_cols=sparse_cols)
 ## clean up
-uz_data <- validate_data(uz_data, "uz", debug=debug)
+uz_data <- validate_data(uz_data, "ph", debug=debug)
 if (!debug) rm(uz_raw)
 
 ## GM has dense lx.code, name, and dialect columns, but sparse FileNames column.
@@ -105,7 +105,7 @@ gm_sea_raw <- read.delim(gm_sea_path, na.strings="", quote="",
 gm_raw <- rbind(gm_afr_raw, gm_sea_raw)
 gm_data <- parse_sparse(gm_raw, id_col="InventoryID", fill_cols="FileNames")
 ## clean up
-gm_data <- validate_data(gm_data, "gm", debug=debug)
+gm_data <- validate_data(gm_data, "ph", debug=debug)
 if (!debug) rm(gm_raw, gm_afr_raw, gm_sea_raw)
 
 ## AA has blank lines between languages; InventoryID is sparse and unique; all