66from datacommons_client .endpoints .payloads import ObservationDate
77from datacommons_client .endpoints .resolve import ResolveEndpoint
88from datacommons_client .utils .decorators import requires_pandas
9+ from datacommons_client .utils .error_handling import NoDataForPropertyError
910
1011try :
1112 import pandas as pd
@@ -58,6 +59,58 @@ def __init__(
5859 self .observation = ObservationEndpoint (api = self .api )
5960 self .resolve = ResolveEndpoint (api = self .api )
6061
62+ def _find_filter_facet_ids (
63+ self ,
64+ fetch_by : Literal ["entity" , "entity_type" ],
65+ date : ObservationDate | str ,
66+ variable_dcids : str | list [str ],
67+ entity_dcids : Literal ["all" ] | list [str ] = "all" ,
68+ entity_type : Optional [str ] = None ,
69+ parent_entity : Optional [str ] = None ,
70+ property_filters : Optional [dict [str , str | list [str ]]] = None ,
71+ ) -> list [str ] | None :
72+ """Finds matching facet IDs for property filters.
73+
74+ Args:
75+ fetch_by (Literal["entity", "entity_type"]): Determines whether to fetch by entity or entity type.
76+ variable_dcids (str | list[str]): The variable DCIDs for which to retrieve facet IDs.
77+ entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs, or "all" if filtering by entity type.
78+ entity_type (Optional[str]): The entity type, required if fetching by entity type.
79+ parent_entity (Optional[str]): The parent entity, used when fetching by entity type.
80+ property_filters (Optional[dict[str, str | list[str]]): A dictionary of properties to match facets against.
81+
82+ Returns:
83+ list[str] | None: A list of matching facet IDs, or None if no filters are applied.
84+ """
85+
86+ if not property_filters :
87+ return None
88+
89+ if fetch_by == "entity" :
90+ observations = self .observation .fetch_observations_by_entity (
91+ date = date ,
92+ entity_dcids = entity_dcids ,
93+ variable_dcids = variable_dcids ,
94+ select = ["variable" , "entity" , "facet" ],
95+ )
96+ else :
97+ observations = self .observation .fetch_observations_by_entity_type (
98+ date = date ,
99+ entity_type = entity_type ,
100+ parent_entity = parent_entity ,
101+ variable_dcids = variable_dcids ,
102+ select = ["variable" , "entity" , "facet" ],
103+ )
104+
105+ facet_sets = [
106+ observations .find_matching_facet_id (property_name = p , value = v )
107+ for p , v in property_filters .items ()
108+ ]
109+
110+ facet_ids = list ({facet for facets in facet_sets for facet in facets })
111+
112+ return facet_ids
113+
61114 @requires_pandas
62115 def observations_dataframe (
63116 self ,
@@ -66,6 +119,7 @@ def observations_dataframe(
66119 entity_dcids : Literal ["all" ] | list [str ] = "all" ,
67120 entity_type : Optional [str ] = None ,
68121 parent_entity : Optional [str ] = None ,
122+ property_filters : Optional [dict [str , str | list [str ]]] = None ,
69123 ):
70124 """
71125 Fetches statistical observations and returns them as a Pandas DataFrame.
@@ -74,15 +128,17 @@ def observations_dataframe(
74128 at a particular date (e.g., "population of USA in 2020", "GDP of California in 2010").
75129
76130 Args:
77- variable_dcids (str | list[str]): One or more variable DCIDs for the observation.
78- date (ObservationDate | str): The date for which observations are requested. It can be
131+ variable_dcids (str | list[str]): One or more variable DCIDs for the observation.
132+ date (ObservationDate | str): The date for which observations are requested. It can be
79133 a specific date, "all" to retrieve all observations, or "latest" to get the most recent observations.
80- entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs to retrieve data for.
81- Defaults to "all". DCIDs must include their type (e.g "country/GTM" for Guatemala).
82- entity_type (Optional[str], optional): The type of entities to filter by when `entity_dcids="all"`.
83- Required if `entity_dcids="all"`. Defaults to None.
84- parent_entity (Optional[str], optional): The parent entity under which the target entities fall.
85- Used only when `entity_dcids="all"`. Defaults to None.
134+ entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs to retrieve data for.
135+ Defaults to "all". DCIDs must include their type (e.g., "country/GTM" for Guatemala).
136+ entity_type (Optional[str]): The type of entities to filter by when `entity_dcids="all"`.
137+ Required if `entity_dcids="all"`. Defaults to None.
138+ parent_entity (Optional[str]): The parent entity under which the target entities fall.
139+ Used only when `entity_dcids="all"`. Defaults to None.
140+ property_filters (Optional[dict[str, str | list[str]]): An optional dictionary used to filter
141+ the data by using observation properties like `measurementMethod`, `unit`, or `observationPeriod`.
86142
87143 Returns:
88144 pd.DataFrame: A DataFrame containing the requested observations.
@@ -97,14 +153,34 @@ def observations_dataframe(
97153 "Specify 'entity_type' and 'parent_entity' only when 'entity_dcids' is 'all'."
98154 )
99155
156+ # If property filters are provided, fetch the required facet IDs. Otherwise, set to None.
157+ facets = self ._find_filter_facet_ids (
158+ fetch_by = "entity" if entity_dcids != "all" else "entity_type" ,
159+ date = date ,
160+ variable_dcids = variable_dcids ,
161+ entity_dcids = entity_dcids ,
162+ entity_type = entity_type ,
163+ parent_entity = parent_entity ,
164+ property_filters = property_filters ,
165+ )
166+
167+ if not facets and property_filters :
168+ raise NoDataForPropertyError
169+
100170 if entity_dcids == "all" :
101171 observations = self .observation .fetch_observations_by_entity_type (
102172 date = date ,
103173 parent_entity = parent_entity ,
104174 entity_type = entity_type ,
105- variable_dcids = variable_dcids )
175+ variable_dcids = variable_dcids ,
176+ filter_facet_ids = facets ,
177+ )
106178 else :
107179 observations = self .observation .fetch_observations_by_entity (
108- date = date , entity_dcids = entity_dcids , variable_dcids = variable_dcids )
180+ date = date ,
181+ entity_dcids = entity_dcids ,
182+ variable_dcids = variable_dcids ,
183+ filter_facet_ids = facets ,
184+ )
109185
110186 return pd .DataFrame (observations .get_observations_as_records ())
0 commit comments