|
1 | 1 | from concurrent.futures import ThreadPoolExecutor |
2 | | -from typing import Optional |
| 2 | +from functools import partial |
| 3 | +from typing import Literal, Optional |
3 | 4 |
|
4 | 5 | from datacommons_client.endpoints.base import API |
5 | 6 | from datacommons_client.endpoints.base import Endpoint |
|
8 | 9 | from datacommons_client.endpoints.response import NodeResponse |
9 | 10 | from datacommons_client.models.node import Name |
10 | 11 | from datacommons_client.models.node import Node |
11 | | -from datacommons_client.utils.graph import build_ancestry_map |
12 | | -from datacommons_client.utils.graph import build_ancestry_tree |
13 | | -from datacommons_client.utils.graph import fetch_parents_lru |
14 | | -from datacommons_client.utils.graph import flatten_ancestry |
| 12 | +from datacommons_client.utils.graph import build_graph_map |
| 13 | +from datacommons_client.utils.graph import build_relationship_tree |
| 14 | +from datacommons_client.utils.graph import fetch_relationship_lru |
| 15 | +from datacommons_client.utils.graph import flatten_relationship |
15 | 16 | from datacommons_client.utils.names import DEFAULT_NAME_LANGUAGE |
16 | 17 | from datacommons_client.utils.names import DEFAULT_NAME_PROPERTY |
17 | 18 | from datacommons_client.utils.names import extract_name_from_english_name_property |
18 | 19 | from datacommons_client.utils.names import extract_name_from_property_with_language |
19 | 20 | from datacommons_client.utils.names import NAME_WITH_LANGUAGE_PROPERTY |
20 | 21 |
|
21 | | -ANCESTRY_MAX_WORKERS = 10 |
| 22 | +PLACES_MAX_WORKERS = 10 |
22 | 23 |
|
23 | 24 |
|
24 | 25 | class NodeEndpoint(Endpoint): |
@@ -252,99 +253,234 @@ def fetch_entity_names( |
252 | 253 |
|
253 | 254 | return names |
254 | 255 |
|
255 | | - def fetch_entity_parents( |
| 256 | + def _fetch_contained_in_place( |
256 | 257 | self, |
257 | | - entity_dcids: str | list[str], |
258 | | - *, |
259 | | - as_dict: bool = True) -> dict[str, list[Node | dict]]: |
260 | | - """Fetches the direct parents of one or more entities using the 'containedInPlace' property. |
| 258 | + node_dcids: str | list[str], |
| 259 | + out: bool = True, |
| 260 | + contained_type: Optional[str] = None, |
| 261 | + as_dict: bool = False, |
| 262 | + ) -> dict[str, list[Node | dict]]: |
| 263 | + """Fetches places that contain or are contained in the given nodes. Uses the |
| 264 | + `containedInPlace` property to fetch parent or child place relationships. |
261 | 265 |
|
262 | 266 | Args: |
263 | | - entity_dcids (str | list[str]): A single DCID or a list of DCIDs to query. |
264 | | - as_dict (bool): If True, returns a dictionary mapping each input DCID to its |
265 | | - immediate parent entities. If False, returns a dictionary of Parent objects (which |
266 | | - are dataclasses). |
| 267 | + node_dcids (str | list[str]): One or more DCIDs representing geographic places. |
| 268 | + out (bool, optional): If True, fetch places contained in the given node(s). |
| 269 | + If False, fetch places that contain the given node(s). Defaults to True. |
| 270 | + contained_type (str, optional): Optional type constraint (e.g., 'Country', |
| 271 | + 'Country'). If provided, only fetches places of that type. |
| 272 | + as_dict (bool, optional): If True, returns the result as a dictionary of |
| 273 | + lists of dictionaries. If False, returns Node objects. Defaults to False. |
267 | 274 |
|
268 | 275 | Returns: |
269 | | - dict[str, list[Parent | dict]]: A dictionary mapping each input DCID to a list of its |
270 | | - immediate parent entities. Each parent is represented as a Parent object (which |
271 | | - contains the DCID, name, and type of the parent entity) or as a dictionary with |
272 | | - the same data. |
| 276 | + dict[str, list[dict]] | dict[str, list[Any]]: A dictionary where keys are DCIDs |
| 277 | + and values are lists of place relationships, either as raw objects or |
| 278 | + dictionaries (if `as_dict` is True). |
273 | 279 | """ |
274 | | - # Fetch property values from the API |
| 280 | + if out and contained_type: |
| 281 | + raise ValueError("When 'out' is True, `contained_type' must be None.") |
| 282 | + |
| 283 | + prop = "containedInPlace+" if contained_type else "containedInPlace" |
| 284 | + |
275 | 285 | data = self.fetch_property_values( |
276 | | - node_dcids=entity_dcids, |
277 | | - properties="containedInPlace", |
| 286 | + node_dcids=node_dcids, |
| 287 | + properties=prop, |
| 288 | + out=out, |
| 289 | + constraints=f"typeOf:{contained_type}" if contained_type else None, |
278 | 290 | ).get_properties() |
279 | 291 |
|
280 | | - if as_dict: |
281 | | - return {k: v.to_dict() for k, v in data.items()} |
| 292 | + result = {} |
| 293 | + for entity, property_nodes in data.items(): |
| 294 | + nodes = property_nodes.get(prop, []) |
| 295 | + result[entity] = [node.to_dict() for node in nodes] if as_dict else nodes |
| 296 | + |
| 297 | + return result |
282 | 298 |
|
283 | | - return data |
| 299 | + def fetch_place_parents( |
| 300 | + self, |
| 301 | + place_dcids: str | list[str], |
| 302 | + *, |
| 303 | + as_dict: bool = True, |
| 304 | + ) -> dict[str, list[Node | dict]]: |
| 305 | + """Fetches the direct parents of one or more entities using the 'containedInPlace' property. |
284 | 306 |
|
285 | | - def _fetch_parents_cached(self, dcid: str) -> tuple[Node, ...]: |
286 | | - """Returns cached parent nodes for a given entity using an LRU cache. |
| 307 | + Args: |
| 308 | + place_dcids (str | list[str]): A single place DCID or a list of DCIDs to query. |
| 309 | + as_dict (bool): If True, returns a dictionary mapping each input DCID to its |
| 310 | + immediate parent entities. If False, returns a dictionary of Node objects. |
287 | 311 |
|
288 | | - This private wrapper exists because `@lru_cache` cannot be applied directly |
289 | | - to instance methods. By passing the `NodeEndpoint` instance (`self`) as an |
290 | | - argument caching is preserved while keeping the implementation modular and testable. |
| 312 | + Returns: |
| 313 | + dict[str, list[Node | dict]]: A dictionary mapping each input DCID to a list of its |
| 314 | + immediate parent entities. Each parent is represented as a Node object or |
| 315 | + as a dictionary with the same data. |
| 316 | + """ |
| 317 | + return self._fetch_contained_in_place( |
| 318 | + node_dcids=place_dcids, |
| 319 | + out=True, |
| 320 | + contained_type=None, |
| 321 | + as_dict=as_dict, |
| 322 | + ) |
| 323 | + |
| 324 | + def fetch_place_children( |
| 325 | + self, |
| 326 | + place_dcids: str | list[str], |
| 327 | + *, |
| 328 | + children_type: Optional[str] = None, |
| 329 | + as_dict: bool = True, |
| 330 | + ) -> dict[str, list[Node | dict]]: |
| 331 | + """Fetches the direct children of one or more entities using the 'containedInPlace' property. |
291 | 332 |
|
292 | 333 | Args: |
293 | | - dcid (str): The DCID of the entity whose parents should be fetched. |
| 334 | + place_dcids (str | list[str]): A single place DCID or a list of DCIDs to query. |
| 335 | + children_type (str, optional): The type of the child entities to |
| 336 | + fetch (e.g., 'Country', 'State', 'IPCCPlace_50'). If None, fetches all child types. |
| 337 | + as_dict (bool): If True, returns a dictionary mapping each input DCID to its |
| 338 | + immediate children entities. If False, returns a dictionary of Node objects. |
294 | 339 |
|
295 | 340 | Returns: |
296 | | - tuple[Parent, ...]: A tuple of Parent objects representing the entity's immediate parents. |
| 341 | + dict[str, list[Node | dict]]: A dictionary mapping each input DCID to a list of its |
| 342 | + immediate children. Each child is represented as a Node object or as a dictionary with |
| 343 | + the same data. |
297 | 344 | """ |
298 | | - return fetch_parents_lru(self, dcid) |
| 345 | + return self._fetch_contained_in_place( |
| 346 | + node_dcids=place_dcids, |
| 347 | + out=False, |
| 348 | + contained_type=children_type, |
| 349 | + as_dict=as_dict, |
| 350 | + ) |
299 | 351 |
|
300 | | - def fetch_entity_ancestry( |
| 352 | + def _fetch_place_relationships( |
301 | 353 | self, |
302 | | - entity_dcids: str | list[str], |
| 354 | + place_dcids: str | list[str], |
303 | 355 | as_tree: bool = False, |
304 | 356 | *, |
305 | | - max_concurrent_requests: Optional[int] = ANCESTRY_MAX_WORKERS |
| 357 | + contained_type: Optional[str] = None, |
| 358 | + relationship: Literal["parents", "children"], |
| 359 | + max_concurrent_requests: Optional[int] = PLACES_MAX_WORKERS, |
306 | 360 | ) -> dict[str, list[dict[str, str]] | dict]: |
307 | | - """Fetches the full ancestry (flat or nested) for one or more entities. |
308 | | - For each input DCID, this method builds the complete ancestry graph using a |
| 361 | + """Fetches a full ancestors/descendants map per place DCID. |
| 362 | +
|
| 363 | + For each input place DCID, this method builds the complete graph using a |
309 | 364 | breadth-first traversal and parallel fetching. |
310 | | - It returns either a flat list of unique parents or a nested tree structure for |
311 | | - each entity, depending on the `as_tree` flag. The flat list matches the structure |
312 | | - of the `/api/place/parent` endpoint of the DC website. |
| 365 | +
|
313 | 366 | Args: |
314 | | - entity_dcids (str | list[str]): One or more DCIDs of the entities whose ancestry |
| 367 | + place_dcids (str | list[str]): One or more DCIDs of the entities whose ancestry |
315 | 368 | will be fetched. |
316 | 369 | as_tree (bool): If True, returns a nested tree structure; otherwise, returns a flat list. |
317 | 370 | Defaults to False. |
| 371 | + contained_type (Optional[str]): The type of the ancestry to fetch (e.g., 'Country', 'State'). |
| 372 | + If None, fetches all ancestry types. |
| 373 | + relationship (Literal["parents", "children"]): The type of relationship to fetch. |
318 | 374 | max_concurrent_requests (Optional[int]): The maximum number of concurrent requests to make. |
319 | | - Defaults to ANCESTRY_MAX_WORKERS. |
| 375 | + Defaults to PLACES_MAX_WORKERS. |
320 | 376 | Returns: |
321 | 377 | dict[str, list[dict[str, str]] | dict]: A dictionary mapping each input DCID to either: |
322 | | - - A flat list of parent dictionaries (if `as_tree` is False), or |
323 | | - - A nested ancestry tree (if `as_tree` is True). Each parent is represented by |
324 | | - a dict with 'dcid', 'name', and 'type'. |
| 378 | + - A flat list of Node dictionaries (if `as_tree` is False), or |
| 379 | + - A nested tree (if `as_tree` is True). |
325 | 380 | """ |
326 | 381 |
|
327 | | - if isinstance(entity_dcids, str): |
328 | | - entity_dcids = [entity_dcids] |
| 382 | + if isinstance(place_dcids, str): |
| 383 | + place_dcids = [place_dcids] |
329 | 384 |
|
330 | 385 | result = {} |
331 | 386 |
|
| 387 | + # Create a partial function to fetch relationships with the current parameters |
| 388 | + fetch_fn = partial( |
| 389 | + fetch_relationship_lru, |
| 390 | + self, |
| 391 | + contained_type=contained_type, |
| 392 | + relationship=relationship, |
| 393 | + ) |
| 394 | + |
332 | 395 | # Use a thread pool to fetch ancestry graphs in parallel for each input entity |
333 | 396 | with ThreadPoolExecutor(max_workers=max_concurrent_requests) as executor: |
334 | 397 | futures = [ |
335 | | - executor.submit(build_ancestry_map, |
336 | | - root=dcid, |
337 | | - fetch_fn=self._fetch_parents_cached) |
338 | | - for dcid in entity_dcids |
| 398 | + executor.submit(build_graph_map, root=dcid, fetch_fn=fetch_fn) |
| 399 | + for dcid in place_dcids |
339 | 400 | ] |
340 | | - |
341 | 401 | # Gather ancestry maps and postprocess into flat or nested form |
342 | 402 | for future in futures: |
343 | 403 | dcid, ancestry = future.result() |
344 | 404 | if as_tree: |
345 | | - ancestry = build_ancestry_tree(dcid, ancestry) |
| 405 | + ancestry = build_relationship_tree(root=dcid, |
| 406 | + graph=ancestry, |
| 407 | + relationship_key=relationship) |
346 | 408 | else: |
347 | | - ancestry = flatten_ancestry(ancestry) |
| 409 | + ancestry = flatten_relationship(ancestry) |
348 | 410 | result[dcid] = ancestry |
349 | 411 |
|
350 | 412 | return result |
| 413 | + |
| 414 | + def fetch_place_ancestors( |
| 415 | + self, |
| 416 | + place_dcids: str | list[str], |
| 417 | + as_tree: bool = False, |
| 418 | + *, |
| 419 | + max_concurrent_requests: Optional[int] = PLACES_MAX_WORKERS, |
| 420 | + ) -> dict[str, list[dict[str, str]] | dict]: |
| 421 | + """Fetches the full ancestry (flat or nested) for one or more entities. |
| 422 | + For each input DCID, this method builds the complete ancestry graph using a |
| 423 | + breadth-first traversal and parallel fetching. |
| 424 | + It returns either a flat list of unique parents or a nested tree structure for |
| 425 | + each entity, depending on the `as_tree` flag. The flat list matches the structure |
| 426 | + of the `/api/place/parent` endpoint of the DC website. |
| 427 | + Args: |
| 428 | + place_dcids (str | list[str]): One or more DCIDs of the entities whose ancestry |
| 429 | + will be fetched. |
| 430 | + as_tree (bool): If True, returns a nested tree structure; otherwise, returns a flat list. |
| 431 | + Defaults to False. |
| 432 | + max_concurrent_requests (Optional[int]): The maximum number of concurrent requests to make. |
| 433 | + Defaults to PLACES_MAX_WORKERS. |
| 434 | + Returns: |
| 435 | + dict[str, list[dict[str, str]] | dict]: A dictionary mapping each input DCID to either: |
| 436 | + - A flat list of parent dictionaries (if `as_tree` is False), or |
| 437 | + - A nested ancestry tree (if `as_tree` is True). Each parent is represented by |
| 438 | + a dict with 'dcid', 'name', and 'type'. |
| 439 | + """ |
| 440 | + |
| 441 | + return self._fetch_place_relationships( |
| 442 | + place_dcids=place_dcids, |
| 443 | + as_tree=as_tree, |
| 444 | + contained_type=None, |
| 445 | + relationship="parents", |
| 446 | + max_concurrent_requests=max_concurrent_requests, |
| 447 | + ) |
| 448 | + |
| 449 | + def fetch_place_descendants( |
| 450 | + self, |
| 451 | + place_dcids: str | list[str], |
| 452 | + descendants_type: Optional[str] = None, |
| 453 | + as_tree: bool = False, |
| 454 | + *, |
| 455 | + max_concurrent_requests: Optional[int] = PLACES_MAX_WORKERS, |
| 456 | + ) -> dict[str, list[dict[str, str]] | dict]: |
| 457 | + """Fetches the full descendants (flat or nested) for one or more entities. |
| 458 | + For each input DCID, this method builds the complete descendants graph using a |
| 459 | + breadth-first traversal and parallel fetching. |
| 460 | +
|
| 461 | + It returns either a flat list of unique child or a nested tree structure for |
| 462 | + each entity, depending on the `as_tree` flag. |
| 463 | +
|
| 464 | + Args: |
| 465 | + place_dcids (str | list[str]): One or more DCIDs of the entities whose descendants |
| 466 | + will be fetched. |
| 467 | + descendants_type (Optional[str]): The type of the descendants to fetch (e.g., 'Country', 'State'). |
| 468 | + If None, fetches all descendant types. |
| 469 | + as_tree (bool): If True, returns a nested tree structure; otherwise, returns a flat list. |
| 470 | + Defaults to False. |
| 471 | + max_concurrent_requests (Optional[int]): The maximum number of concurrent requests to make. |
| 472 | + Defaults to PLACES_MAX_WORKERS. |
| 473 | + Returns: |
| 474 | + dict[str, list[dict[str, str]] | dict]: A dictionary mapping each input DCID to either: |
| 475 | + - A flat list of Node dictionaries (if `as_tree` is False), or |
| 476 | + - A nested ancestry tree (if `as_tree` is True). Each child is represented by |
| 477 | + a dict. |
| 478 | + """ |
| 479 | + |
| 480 | + return self._fetch_place_relationships( |
| 481 | + place_dcids=place_dcids, |
| 482 | + as_tree=as_tree, |
| 483 | + contained_type=descendants_type, |
| 484 | + relationship="children", |
| 485 | + max_concurrent_requests=max_concurrent_requests, |
| 486 | + ) |
0 commit comments