Skip to content

Commit 68b7619

Browse files
committed
Changed parent_id so it could be a list
Entities need to be able to come from multiple parents so I updated the code to allow for that.
1 parent f489229 commit 68b7619

4 files changed

Lines changed: 81 additions & 51 deletions

File tree

src/messes/convert/mwtab_functions.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,48 +9,64 @@
99

1010

1111
def create_sample_lineages(input_json: dict, entity_table_name: str="entity", parent_key: str="parent_id") -> dict:
12-
"""Determine all the ancestors and siblings for each entity in the entity table.
12+
"""Determine all the ancestors, parents, and siblings for each entity in the entity table.
1313
1414
The returned dictionary is of the form:
1515
1616
{entity_id:{"ancestors":[ancestor0, ancestor1, ...],
17+
"parents":[parent0, parent1, ...],
1718
"siblings":[sibling0, sibling1, ...]}
1819
...
1920
}
2021
22+
parents are the immediate ancestors an entity comes from. They are also included in the ancestors list.
23+
2124
Args:
2225
input_json: the dictionary where the entity table is.
2326
entity_table_name: the name of the entity table in input_json.
2427
parent_key: the field name for the field that points to the entity's parent.
2528
2629
Returns:
2730
a dictionary where the keys are the entity ids and the values are a dictionary
28-
of it's ancestors and siblings.
31+
of it's ancestors, parents, and siblings.
2932
"""
3033

3134
lineages = {}
3235
for entity_name, entity_attributes in input_json[entity_table_name].items():
3336
ancestors = []
34-
while parent_name := entity_attributes.get(parent_key):
35-
ancestors.append(parent_name)
36-
if parent_name not in input_json[entity_table_name]:
37-
print("Error: The parent entity, \"" + parent_name + "\", pulled from the entity \"" + entity_name + \
38-
"\" in the \"" + entity_table_name + "\" table is not in the \"" + entity_table_name + "\" table. " +\
39-
"Parent entities must be in the table with thier children.", file=sys.stderr)
40-
sys.exit()
41-
entity_attributes = input_json[entity_table_name][parent_name]
42-
ancestors.reverse()
43-
lineages[entity_name] = {"ancestors":ancestors}
37+
immediate_parents = []
38+
if parents := entity_attributes.get(parent_key):
39+
parents = parents if isinstance(parents, list) else [parents]
40+
immediate_parents = parents
41+
next_parents = parents
42+
while next_parents:
43+
parents = next_parents
44+
next_parents = []
45+
for parent_name in parents:
46+
ancestors.append(parent_name)
47+
if parent_name not in input_json[entity_table_name]:
48+
print("Error: The parent entity, \"" + parent_name + "\", pulled from the entity \"" + entity_name + \
49+
"\" in the \"" + entity_table_name + "\" table is not in the \"" + entity_table_name + "\" table. " +\
50+
"Parent entities must be in the table with thier children.", file=sys.stderr)
51+
sys.exit()
52+
if grandparents := input_json[entity_table_name][parent_name].get(parent_key):
53+
grandparents = grandparents if isinstance(grandparents, list) else [grandparents]
54+
next_parents += grandparents
55+
ancestors.reverse()
56+
57+
lineages[entity_name] = {"ancestors": ancestors, "parents": immediate_parents}
4458

4559
for entity_name in lineages:
4660
siblings = []
4761
if not lineages[entity_name]["ancestors"]:
4862
lineages[entity_name]["siblings"] = []
4963
continue
50-
parent_name = lineages[entity_name]["ancestors"][-1]
64+
parents = lineages[entity_name]["parents"]
5165
for sibling_name, entity_attributes in input_json[entity_table_name].items():
52-
if (sibling_parent_name := entity_attributes.get(parent_key)) and sibling_parent_name == parent_name:
53-
siblings.append(sibling_name)
66+
if sibling_name != entity_name and (sibling_parents := entity_attributes.get(parent_key)):
67+
sibling_parents = sibling_parents if isinstance(sibling_parents, list) else [sibling_parents]
68+
if set(sibling_parents).intersection(parents):
69+
siblings.append(sibling_name)
5470

5571
lineages[entity_name]["siblings"] = siblings
5672

src/messes/validate/validate.py

Lines changed: 48 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -964,43 +964,57 @@ def validate_parent_id(table: dict, table_name: str, entity_name: str, check_typ
964964
"""
965965
has_errors = False
966966
for entity, attributes in table.items():
967-
if parent_name := attributes.get("parent_id"):
967+
if parents := attributes.get("parent_id"):
968968
ancestors = []
969-
parent_attributes = attributes
970-
while (parent_name := parent_attributes.get("parent_id")):
971-
if parent_name in ancestors:
969+
parents = parents if isinstance(parents, list) else [parents]
970+
no_errors = True
971+
next_parents = parents
972+
while next_parents and no_errors:
973+
parents = next_parents
974+
next_parents = []
975+
for parent_name in parents:
976+
ancestors.append(parent_name)
977+
if parent_name in table and (grandparents := table[parent_name].get("parent_id")):
978+
grandparents = grandparents if isinstance(grandparents, list) else [grandparents]
979+
for grandparent_name in grandparents:
980+
if grandparent_name in ancestors:
981+
## Don't print the circular ancestry message if an entity has itself as a parent.
982+
## This is specifically checked for just below this.
983+
if grandparent_name != entity:
984+
print("Error: The " + entity_name + ", \"" + entity + "\", in the \"" + \
985+
table_name + "\" table has a circular ancestry, i.e., somewhere in the lineage a " + \
986+
entity_name + " has a \"parent_id\" to a child in the lineage.", file=sys.stderr)
987+
has_errors = True
988+
no_errors = False
989+
break
990+
next_parents.append(grandparent_name)
991+
if not no_errors:
992+
break
993+
994+
parents = attributes["parent_id"]
995+
parents = parents if isinstance(parents, list) else [parents]
996+
for parent_name in parents:
997+
if parent_name == entity:
972998
print("Error: The " + entity_name + ", \"" + entity + "\", in the \"" + \
973-
table_name + "\" table has a circular ancestry, i.e., somewhere in the lineage a " + \
974-
entity_name + " has a \"parent_id\" to a child in the lineage.", file=sys.stderr)
999+
table_name + "\" table has itself listed in its parent_id. " + \
1000+
"Records cannot be their own parents.", file=sys.stderr)
1001+
has_errors = True
1002+
1003+
if parent_name not in table:
1004+
print("Error: The parent " + entity_name + ", \"" + parent_name + \
1005+
"\", for the " + entity_name + " \"" + entity + \
1006+
"\" in the \"" + table_name + "\" table is not itself in the \"" + \
1007+
table_name + "\" table. " +\
1008+
"Parent entities must be in the table as well.", file=sys.stderr)
1009+
has_errors = True
1010+
1011+
elif check_type and (type_to_check := attributes.get(type_keyword)) and \
1012+
(parent_type := table[parent_name].get(type_keyword)) and \
1013+
type_to_check != parent_type:
1014+
print("Error: The " + entity_name + ", \"" + entity + \
1015+
"\", does not have the same " + type_keyword + " as its parent \"" + \
1016+
parent_name + "\".", file=sys.stderr)
9751017
has_errors = True
976-
break
977-
ancestors.append(parent_name)
978-
if not parent_name in table:
979-
break
980-
parent_attributes = table[parent_name]
981-
982-
parent_name = attributes["parent_id"]
983-
if parent_name == entity:
984-
print("Error: The " + entity_name + ", \"" + entity + "\", in the \"" + \
985-
table_name + "\" table has itself listed for its parent_id. " + \
986-
"Records cannot be their own parents.", file=sys.stderr)
987-
has_errors = True
988-
989-
if parent_name not in table:
990-
print("Error: The parent " + entity_name + ", \"" + parent_name + \
991-
"\", for the " + entity_name + " \"" + entity + \
992-
"\" in the \"" + table_name + "\" table is not itself in the \"" + \
993-
table_name + "\" table. " +\
994-
"Parent entities must be in the table as well.", file=sys.stderr)
995-
has_errors = True
996-
997-
elif check_type and (type_to_check := attributes.get(type_keyword)) and \
998-
(parent_type := table[parent_name].get(type_keyword)) and \
999-
type_to_check != parent_type:
1000-
print("Error: The " + entity_name + ", \"" + entity + \
1001-
"\", does not have the same " + type_keyword + " as its parent \"" + \
1002-
parent_name + "\".", file=sys.stderr)
1003-
has_errors = True
10041018

10051019
return has_errors
10061020

src/messes/validate/validate_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
"type":"object",
8888
"properties":{
8989
"id": {"type":"string", "minLength":1},
90-
"parent_id": {"type":"string"},
90+
"parent_id": {"type":["string", "array"]},
9191
"protocol.id": {"type":["string", "array"], "minItems":1, "items":{"type":"string", "minLength":1}, "minLength":1},
9292
"type": {"type":"string", "enum":["sample", "subject"]}
9393
},

tests/test_validate/test_validate_integration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ def test_parent_id_errors():
381381

382382
errors = [
383383
'Error: The protocol, "IC-FTMS_preparation", in the "protocol" table has itself ' +\
384-
'listed for its parent_id. Records cannot be their own parents.',
384+
'listed in its parent_id. Records cannot be their own parents.',
385385

386386
'Error: The protocol, "ICMS1", does not have the same type as its parent "IC-FTMS_preparation".',
387387

0 commit comments

Comments
 (0)