Skip to content

Commit 2cbf599

Browse files
committed
Back out some changes not directly related to remote kinship execution
1 parent 335b231 commit 2cbf599

6 files changed

Lines changed: 9 additions & 225 deletions

File tree

ehr/resources/pipeline/kinship/populateKinship.r

Lines changed: 4 additions & 164 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,10 @@ library(Matrix)
1313
library(dplyr)
1414

1515
spec <- matrix(c(
16-
'inputFile', '-f', 1, 'character',
17-
'mergeSpeciesWithHybrids', '-m', 0, 'logical',
18-
'performKinshipValidation', '-v', 0, 'logical'
16+
'inputFile', '-f', 1, 'character'
1917
), ncol=4, byrow=TRUE)
2018
opts <- getopt(spec, commandArgs(trailingOnly = TRUE))
2119

22-
if (is.null(opts$mergeSpeciesWithHybrids)){
23-
opts$mergeSpeciesWithHybrids <- FALSE
24-
}
25-
26-
if (is.null(opts$performKinshipValidation)){
27-
opts$performKinshipValidation <- FALSE
28-
}
29-
3020
allPed <- read.table(opts$inputFile, quote="\"")
3121
colnames(allPed)<-c('Id', 'Dam', 'Sire', 'Gender', 'Species')
3222

@@ -43,147 +33,10 @@ if (any(allPed$Species == 'Unknown')) {
4333
print(paste0('There are ', sum(allPed$Species == 'Unknown'), ' Ids with species = Unknown'))
4434
}
4535

46-
# The purpose of this function is to handle instances where there was cross-breeding.
47-
# While this is probably rare, it can occur. When this happens, simply merge the entire species together and process as one unit.
48-
# This ensures that all relevant ancestors from both species are present
49-
generateSpeciesToProcess <- function(allPed, mergeSpeciesWithHybrids) {
50-
hybridParents <- dplyr::bind_rows(
51-
merge(allPed, allPed, by.x = 'Dam', by.y = 'Id') %>% filter(Species.x != Species.y) %>% select(Id, Dam, Species.x, Species.y) %>% rename(Parent = Dam) %>% mutate(ParentType = 'Dam'),
52-
merge(allPed, allPed, by.x = 'Sire', by.y = 'Id') %>% filter(Species.x != Species.y) %>% select(Id, Sire, Species.x, Species.y) %>% rename(Parent = Sire) %>% mutate(ParentType = 'Sire')
53-
)
54-
55-
if (nrow(hybridParents) > 0) {
56-
print(paste0('There were ', nrow(hybridParents), ' records with parents of a different species'))
57-
}
58-
59-
if (mergeSpeciesWithHybrids && nrow(hybridParents) > 0) {
60-
speciesGroups <- as.list(as.character(unique(allPed$Species)))
61-
toMerge <- unique(hybridParents[c('Species.x', 'Species.y')])
62-
for (idx in 1:nrow(toMerge)){
63-
speciesToCollapse <- as.character(unlist(toMerge[idx,,drop = TRUE]))
64-
matchingIdx <- sapply(speciesGroups, function(x){
65-
return(length(intersect(speciesToCollapse, x)) > 0)
66-
})
67-
68-
speciesToCollapse <- sort(unique(c(speciesToCollapse, unlist(speciesGroups[matchingIdx]))))
69-
speciesGroups <- speciesGroups[!matchingIdx]
70-
speciesGroups <- append(speciesGroups, list(speciesToCollapse))
71-
}
72-
73-
print('These species will be merged for processing:')
74-
invisible(lapply(speciesGroups, function(x){
75-
if (length(x) > 1) {
76-
print(x)
77-
}
78-
}))
79-
80-
return(speciesGroups)
81-
} else {
82-
return(unique(allPed$Species[allPed$Species != 'Unknown']))
83-
}
84-
}
85-
86-
validateExpectedKinshipSubset <- function(dataToTest, expectedValues, errorRows, testReciprocal = TRUE) {
87-
if (nrow(dataToTest) == 0 || nrow(expectedValues) == 0) {
88-
return(errorRows)
89-
}
90-
91-
# Generate the reciprocal of relationships as well:
92-
if (testReciprocal) {
93-
ret2 <- data.frame(Id = expectedValues$Id2, Id2 = expectedValues$Id, Relationship = expectedValues$Relationship, ExpectedCoefficient = expectedValues$ExpectedCoefficient)
94-
ret2$Relationship <- sapply(expectedValues$Relationship, function(x){
95-
x <- unlist(strsplit(x, split = '/'))
96-
if (length(x) == 1) {
97-
return(x)
98-
}
99-
100-
return(paste0(x[2], '/', x[1]))
101-
})
102-
expectedValues <- dplyr::bind_rows(expectedValues, ret2)
103-
rm(ret2)
104-
}
105-
106-
dat <- merge(dataToTest, expectedValues, by = c('Id', 'Id2'), all.x = T, all.y = T) %>%
107-
filter(!is.na(ExpectedCoefficient)) %>%
108-
filter(is.na(coefficient) | coefficient < ExpectedCoefficient)
109-
110-
if (nrow(dat) == 0) {
111-
return(errorRows)
112-
}
113-
114-
if (all(is.null(errorRows))) {
115-
return(dat)
116-
}
117-
118-
return(dplyr::bind_rows(errorRows, dat))
119-
}
120-
121-
validateExpectedKinship <- function(pedDf, dataToTest) {
122-
errorRows <- NULL
123-
124-
# See reference: https://en.wikipedia.org/wiki/Coefficient_of_relationship#Kinship_coefficient
125-
self <- data.frame(Id = pedDf$Id, Id2 = pedDf$Id, Relationship = 'Self', ExpectedCoefficient = 0.5)
126-
errorRows <- validateExpectedKinshipSubset(dataToTest, self, errorRows, testReciprocal = FALSE)
127-
rm(self)
128-
129-
parentChild <- dplyr::bind_rows(
130-
data.frame(Id = pedDf$Id, Id2 = pedDf$Dam, Relationship = 'Child/Parent'),
131-
data.frame(Id = pedDf$Id, Id2 = pedDf$Sire, Relationship = 'Child/Parent')
132-
) %>% filter(!is.na(Id2)) %>% mutate(ExpectedCoefficient = 0.25)
133-
errorRows <- validateExpectedKinshipSubset(dataToTest, parentChild, errorRows)
134-
rm(parentChild)
135-
136-
grandParentOffspring1 <- merge(pedDf[!is.na(pedDf$Dam),], pedDf, by.x = c('Dam'), by.y = c('Id'), all.x = F, all.y = F)
137-
grandParentOffspring1 <- dplyr::bind_rows(
138-
grandParentOffspring1 %>% select(Id, Dam.y) %>% filter(!is.na(Dam.y)) %>% rename(Id2 = Dam.y) %>% mutate(Relationship = 'Grandchild/Maternal Granddam'),
139-
grandParentOffspring1 %>% select(Id, Sire.y) %>% filter(!is.na(Sire.y)) %>% rename(Id2 = Sire.y) %>% mutate(Relationship = 'Grandchild/Maternal Grandsire')
140-
) %>% mutate(ExpectedCoefficient = 0.125)
141-
errorRows <- validateExpectedKinshipSubset(dataToTest, grandParentOffspring1, errorRows)
142-
rm(grandParentOffspring1)
143-
144-
grandParentOffspring2 <- merge(pedDf[!is.na(pedDf$Sire),], pedDf, by.x = c('Sire'), by.y = c('Id'), all.x = F, all.y = F)
145-
grandParentOffspring2 <- dplyr::bind_rows(
146-
grandParentOffspring2 %>% select(Id, Dam.y) %>% filter(!is.na(Dam.y)) %>% rename(Id2 = Dam.y) %>% mutate(Relationship = 'Grandchild/Paternal Granddam'),
147-
grandParentOffspring2 %>% select(Id, Sire.y) %>% filter(!is.na(Sire.y)) %>% rename(Id2 = Sire.y) %>% mutate(Relationship = 'Grandchild/Paternal Grandsire')
148-
) %>% mutate(ExpectedCoefficient = 0.125)
149-
errorRows <- validateExpectedKinshipSubset(dataToTest, grandParentOffspring2, errorRows)
150-
rm(grandParentOffspring2)
151-
152-
fullSibs <- merge(pedDf[!is.na(pedDf$Dam) & !is.na(pedDf$Sire),], pedDf[!is.na(pedDf$Dam) & !is.na(pedDf$Sire),], by = c('Sire', 'Dam'), all.x = F, all.y = F) %>%
153-
select(Id.x, Id.y) %>%
154-
rename(Id = Id.x, Id2 = Id.y) %>%
155-
filter(Id != Id2) %>%
156-
mutate(Relationship = 'Full sib', ExpectedCoefficient = 0.25)
157-
errorRows <- validateExpectedKinshipSubset(dataToTest, fullSibs, errorRows)
158-
rm(fullSibs)
159-
160-
halfSibs1 <- merge(pedDf[!is.na(pedDf$Dam),], pedDf[!is.na(pedDf$Dam),], by = c('Dam'), all.x = F, all.y = F) %>%
161-
filter(Sire.x != Sire.y) %>%
162-
select(Id.x, Id.y) %>%
163-
rename(Id = Id.x, Id2 = Id.y) %>%
164-
filter(Id != Id2) %>%
165-
mutate(Relationship = 'Half sib', ExpectedCoefficient = 0.125)
166-
errorRows <- validateExpectedKinshipSubset(dataToTest, halfSibs1, errorRows)
167-
rm(halfSibs1)
168-
169-
halfSibs2 <- merge(pedDf[!is.na(pedDf$Sire),], pedDf[!is.na(pedDf$Sire),], by = c('Sire'), all.x = F, all.y = F) %>%
170-
filter(Dam.x != Dam.y) %>%
171-
select(Id.x, Id.y) %>%
172-
rename(Id = Id.x, Id2 = Id.y) %>%
173-
filter(Id != Id2) %>%
174-
mutate(Relationship = 'Half sib', ExpectedCoefficient = 0.125)
175-
errorRows <- validateExpectedKinshipSubset(dataToTest, halfSibs2, errorRows)
176-
rm(halfSibs2)
177-
178-
return(errorRows)
179-
}
180-
181-
speciesToProcess <- generateSpeciesToProcess(allPed, opts$mergeSpeciesWithHybrids)
182-
18336
newRecords <- NULL
184-
for (speciesSet in speciesToProcess){
185-
allRecordsForSpecies <- allPed[allPed$Species %in% speciesSet,]
186-
print(paste0('Processing species set: ', paste0(speciesSet, collapse = ','), ', with ', nrow(allRecordsForSpecies), ' IDs'))
37+
for (species in unique(allPed$Species)){
38+
allRecordsForSpecies <- allPed[allPed$Species %in% species,]
39+
print(paste0('Processing species: ', species, ', with ', nrow(allRecordsForSpecies), ' IDs'))
18740
if (nrow(allRecordsForSpecies) == 1) {
18841
print('single record, skipping')
18942
newRecords <- dplyr::bind_rows(newRecords,data.frame(Id = allRecordsForSpecies$Id, Id2 = allRecordsForSpecies$Id, coefficient = 0.5, Species = allRecordsForSpecies$Species))
@@ -219,19 +72,6 @@ for (speciesSet in speciesToProcess){
21972
temp.tri <- merge(temp.tri, allRecordsForSpecies[c('Id', 'Species')], by = 'Id', all.x = TRUE)
22073

22174
newRecords <- dplyr::bind_rows(newRecords,temp.tri)
222-
223-
# NOTE: perform per-species to save memory
224-
if (opts$performKinshipValidation) {
225-
print('Validating coefficients against expected values')
226-
errorRows <- validateExpectedKinship(allRecordsForSpecies, temp.tri)
227-
if (!all(is.null(errorRows))) {
228-
fileName <- paste0('kinshipErrors_', paste0(speciesSet, collapse = '.'), '.txt')
229-
print(paste0('There were unexpected kinship values! See the file ', fileName, ' for more information'))
230-
write.table(newRecords, file = fileName, row.names = FALSE, quote = FALSE, sep = '\t')
231-
} else {
232-
print('All coefficients were within expected ranges from predicted values')
233-
}
234-
}
23575
}
23676

23777
# write TSV to disk

ehr/resources/web/ehr/panel/GeneticCalculationSettingsPanel.js

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,6 @@ Ext4.define('EHR.panel.GeneticCalculationSettingsPanel', {
4242
});
4343
}
4444
}
45-
},{
46-
xtype: 'checkbox',
47-
fieldLabel: 'Merge Species With Hybrids?',
48-
itemId: 'mergeSpeciesWithHybrids',
49-
listeners: {
50-
render: function (c) {
51-
Ext4.create('Ext.tip.ToolTip', {
52-
target: c.getEl(),
53-
html: 'If any hybrid animals are detected, these species groups will be merged and processed as one unit. Merging all these species together ensures that the correct ancestors from each side are present'
54-
});
55-
}
56-
}
5745
},{
5846
xtype: 'numberfield',
5947
hideTrigger: true,
@@ -110,7 +98,6 @@ Ext4.define('EHR.panel.GeneticCalculationSettingsPanel', {
11098
this.down('#hourOfDay').setValue(results.hourOfDay);
11199
this.down('#containerPath').setValue(results.containerPath);
112100
this.down('#kinshipValidation').setValue(results.kinshipValidation);
113-
this.down('#mergeSpeciesWithHybrids').setValue(results.mergeSpeciesWithHybrids);
114101
this.down('#allowImportDuringBusinessHours').setValue(results.allowImportDuringBusinessHours)
115102
},
116103

@@ -123,7 +110,6 @@ Ext4.define('EHR.panel.GeneticCalculationSettingsPanel', {
123110
enabled: this.down('#enabled').getValue(),
124111
hourOfDay: this.down('#hourOfDay').getValue(),
125112
kinshipValidation: this.down('#kinshipValidation').getValue(),
126-
mergeSpeciesWithHybrids: this.down('#mergeSpeciesWithHybrids').getValue(),
127113
allowImportDuringBusinessHours: this.down('#allowImportDuringBusinessHours').getValue()
128114
},
129115
method : 'POST',

ehr/src/org/labkey/ehr/EHRController.java

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,7 @@ public ApiResponse execute(ScheduleGeneticCalculationForm form, BindException er
640640
errors.reject(ERROR_MSG, "Unable to find container for path: " + form.getContainerPath());
641641
return null;
642642
}
643-
GeneticCalculationsJob.setProperties(form.isEnabled(), c, form.getHourOfDay(), form.isKinshipValidation(), form.isMergeSpeciesWithHybrids(), form.isAllowImportDuringBusinessHours());
643+
GeneticCalculationsJob.setProperties(form.isEnabled(), c, form.getHourOfDay(), form.isKinshipValidation(), form.isAllowImportDuringBusinessHours());
644644

645645
return new ApiSimpleResponse("success", true);
646646
}
@@ -760,7 +760,6 @@ public static class ScheduleGeneticCalculationForm
760760
private int hourOfDay;
761761

762762
private boolean _kinshipValidation;
763-
private boolean _mergeSpeciesWithHybrids;
764763
private boolean _allowImportDuringBusinessHours;
765764

766765
public boolean isEnabled()
@@ -803,16 +802,6 @@ public void setKinshipValidation(boolean kinshipValidation)
803802
_kinshipValidation = kinshipValidation;
804803
}
805804

806-
public boolean isMergeSpeciesWithHybrids()
807-
{
808-
return _mergeSpeciesWithHybrids;
809-
}
810-
811-
public void setMergeSpeciesWithHybrids(boolean mergeSpeciesWithHybrids)
812-
{
813-
_mergeSpeciesWithHybrids = mergeSpeciesWithHybrids;
814-
}
815-
816805
public boolean isAllowImportDuringBusinessHours()
817806
{
818807
return _allowImportDuringBusinessHours;
@@ -840,7 +829,6 @@ public ApiResponse execute(ScheduleGeneticCalculationForm form, BindException er
840829
ret.put("enabled", GeneticCalculationsJob.isEnabled());
841830
ret.put("hourOfDay", GeneticCalculationsJob.getHourOfDay());
842831
ret.put("kinshipValidation", GeneticCalculationsJob.isKinshipValidation());
843-
ret.put("mergeSpeciesWithHybrids", GeneticCalculationsJob.isMergeSpeciesWithHybrids());
844832
ret.put("allowImportDuringBusinessHours", GeneticCalculationsJob.isAllowImportDuringBusinessHours());
845833

846834
return new ApiSimpleResponse(ret);

ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsJob.java

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -122,17 +122,6 @@ public static boolean isKinshipValidation()
122122
return false;
123123
}
124124

125-
public static boolean isMergeSpeciesWithHybrids()
126-
{
127-
Map<String, String> saved = PropertyManager.getProperties(GENETICCALCULATIONS_PROPERTY_DOMAIN);
128-
129-
if (saved.containsKey("mergeSpeciesWithHybrids"))
130-
return Boolean.parseBoolean(saved.get("mergeSpeciesWithHybrids"));
131-
else
132-
return false;
133-
}
134-
135-
136125
public static boolean isAllowImportDuringBusinessHours()
137126
{
138127
Map<String, String> saved = PropertyManager.getProperties(GENETICCALCULATIONS_PROPERTY_DOMAIN);
@@ -173,14 +162,13 @@ public static Integer getHourOfDay()
173162
return null;
174163
}
175164

176-
public static void setProperties(Boolean isEnabled, Container c, Integer hourOfDay, Boolean isKinshipValidation, Boolean mergeSpeciesWithHybrids, Boolean allowImportDuringBusinessHours)
165+
public static void setProperties(Boolean isEnabled, Container c, Integer hourOfDay, Boolean isKinshipValidation, Boolean allowImportDuringBusinessHours)
177166
{
178167
PropertyManager.PropertyMap props = PropertyManager.getWritableProperties(GENETICCALCULATIONS_PROPERTY_DOMAIN, true);
179168
props.put("enabled", isEnabled.toString());
180169
props.put("container", c.getId());
181170
props.put("hourOfDay", hourOfDay.toString());
182171
props.put("kinshipValidation", isKinshipValidation.toString());
183-
props.put("mergeSpeciesWithHybrids", mergeSpeciesWithHybrids.toString());
184172
props.put("allowImportDuringBusinessHours", allowImportDuringBusinessHours.toString());
185173
props.save();
186174

ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsRTask.java

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -104,25 +104,13 @@ public RecordedActionSet run() throws PipelineJobException
104104
{
105105
List<RecordedAction> actions = new ArrayList<>();
106106

107-
actions.add(runScript("populateInbreeding.r", GeneticCalculationsImportTask.INBREEDING_FILE, "Inbreeding Coefficient Output", null));
108-
109-
List<String> kinshipArgs = new ArrayList<>();
110-
if (getJob().getParameters().containsKey("mergeSpeciesWithHybrids") && "true".equalsIgnoreCase(getJob().getParameters().get("mergeSpeciesWithHybrids")))
111-
{
112-
kinshipArgs.add("-m");
113-
}
114-
115-
if (getJob().getParameters().containsKey("kinshipValidation") && "true".equalsIgnoreCase(getJob().getParameters().get("kinshipValidation")))
116-
{
117-
kinshipArgs.add("-v");
118-
}
119-
120-
actions.add(runScript("populateKinship.r", GeneticCalculationsImportTask.KINSHIP_FILE, "Kinship Output", kinshipArgs));
107+
actions.add(runScript("populateInbreeding.r", GeneticCalculationsImportTask.INBREEDING_FILE, "Inbreeding Coefficient Output"));
108+
actions.add(runScript("populateKinship.r", GeneticCalculationsImportTask.KINSHIP_FILE, "Kinship Output"));
121109

122110
return new RecordedActionSet(actions);
123111
}
124112

125-
public RecordedAction runScript(String scriptName, String outputFileName, String actionLabel, @Nullable List<String> extraArgs) throws PipelineJobException
113+
public RecordedAction runScript(String scriptName, String outputFileName, String actionLabel) throws PipelineJobException
126114
{
127115
PipelineJob job = getJob();
128116
FileAnalysisJobSupport support = (FileAnalysisJobSupport) job;
@@ -146,10 +134,6 @@ public RecordedAction runScript(String scriptName, String outputFileName, String
146134
args.add(scriptPath);
147135
args.add("-f");
148136
args.add(tsvFile.getPath());
149-
if (extraArgs != null)
150-
{
151-
args.addAll(extraArgs);
152-
}
153137

154138
getJob().getLogger().info("Using working directory of: " + support.getAnalysisDirectory().getPath());
155139
ProcessBuilder pb = new ProcessBuilder(args);

ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsRunnable.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,6 @@ private void startCalculation(User u, Container c, boolean allowRunningDuringDay
8686
String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
8787
"<bioml>\n" +
8888
"\t<note label=\"allowRunningDuringDay\" type=\"input\">" + allowRunningDuringDay + "</note>" +
89-
"\t<note label=\"kinshipValidation\" type=\"input\">" + GeneticCalculationsJob.isKinshipValidation() + "</note>" +
90-
"\t<note label=\"mergeSpeciesWithHybrids\" type=\"input\">" + GeneticCalculationsJob.isMergeSpeciesWithHybrids() + "</note>" +
9189
"</bioml>";
9290

9391
AbstractFileAnalysisProtocol<?> protocol = factory.createProtocolInstance(protocolName, "", xml);

0 commit comments

Comments
 (0)