Skip to content

Commit 3559fc9

Browse files
authored
Remove auto-flattening for unpack_arcs. (#244)
Currently the auto-flattening of unpack_arcs leads to the following scenario: Two properties are requested for a given entity but data only exists for one of the properties. The REST api will not contain the property with no data in the response, therefore we cannot tell **from only the response** the difference between one property requested and one property out of multiple properties requested contains data. An example: ``` node_resp = dc_client.node.fetch(node_dcids="bio/APOE", expression="<-[encodesGene,variantID]") node_resp.get_properties() > {'bio/APOE': [Node(dcid='bio/AB035149.1', name='AB035149.1', provenanceId='dc/base/NCBI_Gene'...), ...] ...} ``` In the response from get_properties(), we can't tell if it's for encodesGene or variantID. A long term fix for this would be to store the original requested properties in the NodeResponse object and only do this "autoflattening" when there was one **requested** property. However, another consideration is that many return types for a single method can cause a lot confusion.
1 parent 338000c commit 3559fc9

2 files changed

Lines changed: 15 additions & 14 deletions

File tree

datacommons_client/tests/endpoints/test_response.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,11 @@ def test_flatten_arcs():
182182
result = flatten_properties(response.data)
183183

184184
assert "dc/03lw9rhpendw5" in result
185-
assert result["dc/03lw9rhpendw5"].value == "191 Peachtree Tower"
185+
assert result["dc/03lw9rhpendw5"] == {
186+
"name": [
187+
Node(value="191 Peachtree Tower", provenanceId="dc/base/EIA_860")
188+
]
189+
}
186190

187191

188192
def test_flatten_multiple_arcs_with_multiple_nodes():

datacommons_client/utils/data_processing.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,28 @@
11
from dataclasses import asdict
22
import json
3-
from typing import Any, Dict
3+
from typing import Any, Dict, List
44

55
from datacommons_client.models.node import ArcLabel
66
from datacommons_client.models.node import Arcs
77
from datacommons_client.models.node import Name
8+
from datacommons_client.models.node import Node
89
from datacommons_client.models.node import NodeDCID
910
from datacommons_client.models.node import NodeGroup
1011
from datacommons_client.models.node import Properties
12+
from datacommons_client.models.node import Property
1113

1214

13-
def unpack_arcs(arcs: Dict[ArcLabel, NodeGroup]) -> Any:
15+
def unpack_arcs(arcs: Dict[ArcLabel, NodeGroup]) -> Dict[Property, List[Node]]:
1416
"""Simplify the 'arcs' structure."""
15-
if len(arcs) > 1:
16-
# Multiple arcs: return dictionary of property nodes
17-
return {
18-
prop: getattr(arc_data, "nodes", []) for prop, arc_data in arcs.items()
19-
}
20-
# Single arc: extract first node's data
21-
for property_data in arcs.values():
22-
nodes = property_data.nodes
23-
if nodes is not None:
24-
return nodes if len(nodes) > 1 else nodes[0]
17+
# Return dictionary of property nodes
18+
return {
19+
prop: getattr(arc_data, "nodes", []) for prop, arc_data in arcs.items()
20+
}
2521

2622

2723
def flatten_properties(
28-
data: Dict[NodeDCID, Arcs | Properties]) -> Dict[str, Any]:
24+
data: Dict[NodeDCID, Arcs | Properties]
25+
) -> Dict[NodeDCID, List[Property] | Dict[Property, List[Node]]]:
2926
"""
3027
Flatten the properties of a node response.
3128

0 commit comments

Comments
 (0)