Skip to content

Commit a695beb

Browse files
authored
Add children / descendants methods (#245)
Refactors the ascendency features in order to work for children/descendants as well. This creates convenient ways to fetch parents and children of given entities. This can be done for the immediate cases, or recursively up and down the graph. `fetch_entity_children` takes one or more entity dcids and fetches its children via the `containedInPlace`. It optionally accepts `children_type` which gets only children of a specific type (e.g., 'Country', 'State', 'IPCCPlace_50') `fetch_entity_descendancy` is a similar method, but it does so via BFS recursively. It is highly recommended to specify a type in that case, as otherwise the search can become extremely large for entities further up in the graph.
1 parent bcab3e2 commit a695beb

4 files changed

Lines changed: 414 additions & 191 deletions

File tree

datacommons_client/endpoints/node.py

Lines changed: 191 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from concurrent.futures import ThreadPoolExecutor
2-
from typing import Optional
2+
from functools import partial
3+
from typing import Literal, Optional
34

45
from datacommons_client.endpoints.base import API
56
from datacommons_client.endpoints.base import Endpoint
@@ -8,17 +9,17 @@
89
from datacommons_client.endpoints.response import NodeResponse
910
from datacommons_client.models.node import Name
1011
from datacommons_client.models.node import Node
11-
from datacommons_client.utils.graph import build_ancestry_map
12-
from datacommons_client.utils.graph import build_ancestry_tree
13-
from datacommons_client.utils.graph import fetch_parents_lru
14-
from datacommons_client.utils.graph import flatten_ancestry
12+
from datacommons_client.utils.graph import build_graph_map
13+
from datacommons_client.utils.graph import build_relationship_tree
14+
from datacommons_client.utils.graph import fetch_relationship_lru
15+
from datacommons_client.utils.graph import flatten_relationship
1516
from datacommons_client.utils.names import DEFAULT_NAME_LANGUAGE
1617
from datacommons_client.utils.names import DEFAULT_NAME_PROPERTY
1718
from datacommons_client.utils.names import extract_name_from_english_name_property
1819
from datacommons_client.utils.names import extract_name_from_property_with_language
1920
from datacommons_client.utils.names import NAME_WITH_LANGUAGE_PROPERTY
2021

21-
ANCESTRY_MAX_WORKERS = 10
22+
PLACES_MAX_WORKERS = 10
2223

2324

2425
class NodeEndpoint(Endpoint):
@@ -252,99 +253,234 @@ def fetch_entity_names(
252253

253254
return names
254255

255-
def fetch_entity_parents(
256+
def _fetch_contained_in_place(
256257
self,
257-
entity_dcids: str | list[str],
258-
*,
259-
as_dict: bool = True) -> dict[str, list[Node | dict]]:
260-
"""Fetches the direct parents of one or more entities using the 'containedInPlace' property.
258+
node_dcids: str | list[str],
259+
out: bool = True,
260+
contained_type: Optional[str] = None,
261+
as_dict: bool = False,
262+
) -> dict[str, list[Node | dict]]:
263+
"""Fetches places that contain or are contained in the given nodes. Uses the
264+
`containedInPlace` property to fetch parent or child place relationships.
261265
262266
Args:
263-
entity_dcids (str | list[str]): A single DCID or a list of DCIDs to query.
264-
as_dict (bool): If True, returns a dictionary mapping each input DCID to its
265-
immediate parent entities. If False, returns a dictionary of Parent objects (which
266-
are dataclasses).
267+
node_dcids (str | list[str]): One or more DCIDs representing geographic places.
268+
out (bool, optional): If True, fetch places contained in the given node(s).
269+
If False, fetch places that contain the given node(s). Defaults to True.
270+
contained_type (str, optional): Optional type constraint (e.g., 'Country',
271+
'Country'). If provided, only fetches places of that type.
272+
as_dict (bool, optional): If True, returns the result as a dictionary of
273+
lists of dictionaries. If False, returns Node objects. Defaults to False.
267274
268275
Returns:
269-
dict[str, list[Parent | dict]]: A dictionary mapping each input DCID to a list of its
270-
immediate parent entities. Each parent is represented as a Parent object (which
271-
contains the DCID, name, and type of the parent entity) or as a dictionary with
272-
the same data.
276+
dict[str, list[dict]] | dict[str, list[Any]]: A dictionary where keys are DCIDs
277+
and values are lists of place relationships, either as raw objects or
278+
dictionaries (if `as_dict` is True).
273279
"""
274-
# Fetch property values from the API
280+
if out and contained_type:
281+
raise ValueError("When 'out' is True, `contained_type' must be None.")
282+
283+
prop = "containedInPlace+" if contained_type else "containedInPlace"
284+
275285
data = self.fetch_property_values(
276-
node_dcids=entity_dcids,
277-
properties="containedInPlace",
286+
node_dcids=node_dcids,
287+
properties=prop,
288+
out=out,
289+
constraints=f"typeOf:{contained_type}" if contained_type else None,
278290
).get_properties()
279291

280-
if as_dict:
281-
return {k: v.to_dict() for k, v in data.items()}
292+
result = {}
293+
for entity, property_nodes in data.items():
294+
nodes = property_nodes.get(prop, [])
295+
result[entity] = [node.to_dict() for node in nodes] if as_dict else nodes
296+
297+
return result
282298

283-
return data
299+
def fetch_place_parents(
300+
self,
301+
place_dcids: str | list[str],
302+
*,
303+
as_dict: bool = True,
304+
) -> dict[str, list[Node | dict]]:
305+
"""Fetches the direct parents of one or more entities using the 'containedInPlace' property.
284306
285-
def _fetch_parents_cached(self, dcid: str) -> tuple[Node, ...]:
286-
"""Returns cached parent nodes for a given entity using an LRU cache.
307+
Args:
308+
place_dcids (str | list[str]): A single place DCID or a list of DCIDs to query.
309+
as_dict (bool): If True, returns a dictionary mapping each input DCID to its
310+
immediate parent entities. If False, returns a dictionary of Node objects.
287311
288-
This private wrapper exists because `@lru_cache` cannot be applied directly
289-
to instance methods. By passing the `NodeEndpoint` instance (`self`) as an
290-
argument caching is preserved while keeping the implementation modular and testable.
312+
Returns:
313+
dict[str, list[Node | dict]]: A dictionary mapping each input DCID to a list of its
314+
immediate parent entities. Each parent is represented as a Node object or
315+
as a dictionary with the same data.
316+
"""
317+
return self._fetch_contained_in_place(
318+
node_dcids=place_dcids,
319+
out=True,
320+
contained_type=None,
321+
as_dict=as_dict,
322+
)
323+
324+
def fetch_place_children(
325+
self,
326+
place_dcids: str | list[str],
327+
*,
328+
children_type: Optional[str] = None,
329+
as_dict: bool = True,
330+
) -> dict[str, list[Node | dict]]:
331+
"""Fetches the direct children of one or more entities using the 'containedInPlace' property.
291332
292333
Args:
293-
dcid (str): The DCID of the entity whose parents should be fetched.
334+
place_dcids (str | list[str]): A single place DCID or a list of DCIDs to query.
335+
children_type (str, optional): The type of the child entities to
336+
fetch (e.g., 'Country', 'State', 'IPCCPlace_50'). If None, fetches all child types.
337+
as_dict (bool): If True, returns a dictionary mapping each input DCID to its
338+
immediate children entities. If False, returns a dictionary of Node objects.
294339
295340
Returns:
296-
tuple[Parent, ...]: A tuple of Parent objects representing the entity's immediate parents.
341+
dict[str, list[Node | dict]]: A dictionary mapping each input DCID to a list of its
342+
immediate children. Each child is represented as a Node object or as a dictionary with
343+
the same data.
297344
"""
298-
return fetch_parents_lru(self, dcid)
345+
return self._fetch_contained_in_place(
346+
node_dcids=place_dcids,
347+
out=False,
348+
contained_type=children_type,
349+
as_dict=as_dict,
350+
)
299351

300-
def fetch_entity_ancestry(
352+
def _fetch_place_relationships(
301353
self,
302-
entity_dcids: str | list[str],
354+
place_dcids: str | list[str],
303355
as_tree: bool = False,
304356
*,
305-
max_concurrent_requests: Optional[int] = ANCESTRY_MAX_WORKERS
357+
contained_type: Optional[str] = None,
358+
relationship: Literal["parents", "children"],
359+
max_concurrent_requests: Optional[int] = PLACES_MAX_WORKERS,
306360
) -> dict[str, list[dict[str, str]] | dict]:
307-
"""Fetches the full ancestry (flat or nested) for one or more entities.
308-
For each input DCID, this method builds the complete ancestry graph using a
361+
"""Fetches a full ancestors/descendants map per place DCID.
362+
363+
For each input place DCID, this method builds the complete graph using a
309364
breadth-first traversal and parallel fetching.
310-
It returns either a flat list of unique parents or a nested tree structure for
311-
each entity, depending on the `as_tree` flag. The flat list matches the structure
312-
of the `/api/place/parent` endpoint of the DC website.
365+
313366
Args:
314-
entity_dcids (str | list[str]): One or more DCIDs of the entities whose ancestry
367+
place_dcids (str | list[str]): One or more DCIDs of the entities whose ancestry
315368
will be fetched.
316369
as_tree (bool): If True, returns a nested tree structure; otherwise, returns a flat list.
317370
Defaults to False.
371+
contained_type (Optional[str]): The type of the ancestry to fetch (e.g., 'Country', 'State').
372+
If None, fetches all ancestry types.
373+
relationship (Literal["parents", "children"]): The type of relationship to fetch.
318374
max_concurrent_requests (Optional[int]): The maximum number of concurrent requests to make.
319-
Defaults to ANCESTRY_MAX_WORKERS.
375+
Defaults to PLACES_MAX_WORKERS.
320376
Returns:
321377
dict[str, list[dict[str, str]] | dict]: A dictionary mapping each input DCID to either:
322-
- A flat list of parent dictionaries (if `as_tree` is False), or
323-
- A nested ancestry tree (if `as_tree` is True). Each parent is represented by
324-
a dict with 'dcid', 'name', and 'type'.
378+
- A flat list of Node dictionaries (if `as_tree` is False), or
379+
- A nested tree (if `as_tree` is True).
325380
"""
326381

327-
if isinstance(entity_dcids, str):
328-
entity_dcids = [entity_dcids]
382+
if isinstance(place_dcids, str):
383+
place_dcids = [place_dcids]
329384

330385
result = {}
331386

387+
# Create a partial function to fetch relationships with the current parameters
388+
fetch_fn = partial(
389+
fetch_relationship_lru,
390+
self,
391+
contained_type=contained_type,
392+
relationship=relationship,
393+
)
394+
332395
# Use a thread pool to fetch ancestry graphs in parallel for each input entity
333396
with ThreadPoolExecutor(max_workers=max_concurrent_requests) as executor:
334397
futures = [
335-
executor.submit(build_ancestry_map,
336-
root=dcid,
337-
fetch_fn=self._fetch_parents_cached)
338-
for dcid in entity_dcids
398+
executor.submit(build_graph_map, root=dcid, fetch_fn=fetch_fn)
399+
for dcid in place_dcids
339400
]
340-
341401
# Gather ancestry maps and postprocess into flat or nested form
342402
for future in futures:
343403
dcid, ancestry = future.result()
344404
if as_tree:
345-
ancestry = build_ancestry_tree(dcid, ancestry)
405+
ancestry = build_relationship_tree(root=dcid,
406+
graph=ancestry,
407+
relationship_key=relationship)
346408
else:
347-
ancestry = flatten_ancestry(ancestry)
409+
ancestry = flatten_relationship(ancestry)
348410
result[dcid] = ancestry
349411

350412
return result
413+
414+
def fetch_place_ancestors(
415+
self,
416+
place_dcids: str | list[str],
417+
as_tree: bool = False,
418+
*,
419+
max_concurrent_requests: Optional[int] = PLACES_MAX_WORKERS,
420+
) -> dict[str, list[dict[str, str]] | dict]:
421+
"""Fetches the full ancestry (flat or nested) for one or more entities.
422+
For each input DCID, this method builds the complete ancestry graph using a
423+
breadth-first traversal and parallel fetching.
424+
It returns either a flat list of unique parents or a nested tree structure for
425+
each entity, depending on the `as_tree` flag. The flat list matches the structure
426+
of the `/api/place/parent` endpoint of the DC website.
427+
Args:
428+
place_dcids (str | list[str]): One or more DCIDs of the entities whose ancestry
429+
will be fetched.
430+
as_tree (bool): If True, returns a nested tree structure; otherwise, returns a flat list.
431+
Defaults to False.
432+
max_concurrent_requests (Optional[int]): The maximum number of concurrent requests to make.
433+
Defaults to PLACES_MAX_WORKERS.
434+
Returns:
435+
dict[str, list[dict[str, str]] | dict]: A dictionary mapping each input DCID to either:
436+
- A flat list of parent dictionaries (if `as_tree` is False), or
437+
- A nested ancestry tree (if `as_tree` is True). Each parent is represented by
438+
a dict with 'dcid', 'name', and 'type'.
439+
"""
440+
441+
return self._fetch_place_relationships(
442+
place_dcids=place_dcids,
443+
as_tree=as_tree,
444+
contained_type=None,
445+
relationship="parents",
446+
max_concurrent_requests=max_concurrent_requests,
447+
)
448+
449+
def fetch_place_descendants(
450+
self,
451+
place_dcids: str | list[str],
452+
descendants_type: Optional[str] = None,
453+
as_tree: bool = False,
454+
*,
455+
max_concurrent_requests: Optional[int] = PLACES_MAX_WORKERS,
456+
) -> dict[str, list[dict[str, str]] | dict]:
457+
"""Fetches the full descendants (flat or nested) for one or more entities.
458+
For each input DCID, this method builds the complete descendants graph using a
459+
breadth-first traversal and parallel fetching.
460+
461+
It returns either a flat list of unique child or a nested tree structure for
462+
each entity, depending on the `as_tree` flag.
463+
464+
Args:
465+
place_dcids (str | list[str]): One or more DCIDs of the entities whose descendants
466+
will be fetched.
467+
descendants_type (Optional[str]): The type of the descendants to fetch (e.g., 'Country', 'State').
468+
If None, fetches all descendant types.
469+
as_tree (bool): If True, returns a nested tree structure; otherwise, returns a flat list.
470+
Defaults to False.
471+
max_concurrent_requests (Optional[int]): The maximum number of concurrent requests to make.
472+
Defaults to PLACES_MAX_WORKERS.
473+
Returns:
474+
dict[str, list[dict[str, str]] | dict]: A dictionary mapping each input DCID to either:
475+
- A flat list of Node dictionaries (if `as_tree` is False), or
476+
- A nested ancestry tree (if `as_tree` is True). Each child is represented by
477+
a dict.
478+
"""
479+
480+
return self._fetch_place_relationships(
481+
place_dcids=place_dcids,
482+
as_tree=as_tree,
483+
contained_type=descendants_type,
484+
relationship="children",
485+
max_concurrent_requests=max_concurrent_requests,
486+
)

0 commit comments

Comments
 (0)