Skip to content

Commit 06cf3c2

Browse files
authored
Add get_stat_all function (#143)
Also remove type annotations and clean up docstrings for stat_vars.py.
1 parent 6f5d229 commit 06cf3c2

5 files changed

Lines changed: 336 additions & 25 deletions

File tree

datacommons/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from datacommons.core import get_property_labels, get_property_values, get_triples
2020
from datacommons.places import get_places_in, get_related_places, get_stats
2121
from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs
22-
from datacommons.stat_vars import get_stat_value, get_stat_series
22+
from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all
2323

2424
# Other utilities
2525
from .utils import set_api_key

datacommons/examples/stat_vars.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from __future__ import print_function
1919

2020
import datacommons as dc
21+
import pprint
2122

2223

2324
def main():
@@ -44,13 +45,13 @@ def main():
4445
{
4546
'place': 'geoId/06085',
4647
'stat_var': 'UnemploymentRate_Person',
47-
'observation_period': "P1Y",
48+
'observation_period': 'P1Y',
4849
},
4950
{
5051
'place': 'geoId/06085',
5152
'stat_var': 'UnemploymentRate_Person',
52-
'observation_period': "P1Y",
53-
'measurement_method': "BLSSeasonallyUnadjusted",
53+
'observation_period': 'P1Y',
54+
'measurement_method': 'BLSSeasonallyUnadjusted',
5455
},
5556
{
5657
'place':
@@ -64,9 +65,9 @@ def main():
6465
'stat_var':
6566
'Amount_EconomicActivity_GrossDomesticProduction_Nominal',
6667
'observation_period':
67-
"P1Y",
68+
'P1Y',
6869
'unit':
69-
"PurchasingPowerStandard"
70+
'PurchasingPowerStandard'
7071
},
7172
]
7273

@@ -108,6 +109,23 @@ def call_str(pvs):
108109
unit=pvs.get('unit'),
109110
scaling_factor=pvs.get('scaling_factor')))
110111

112+
pp = pprint.PrettyPrinter(indent=4)
113+
print(
114+
"\nget_stat_all(['geoId/06085', 'country/FRA'], ['Median_Age_Person', 'Count_Person'])"
115+
)
116+
print('>>> ')
117+
pp.pprint(
118+
dc.get_stat_all(['geoId/06085', 'country/FRA'],
119+
['Median_Age_Person', 'Count_Person']))
120+
121+
print(
122+
"\nget_stat_all(['badPlaceId', 'country/FRA'], ['Median_Age_Person', 'Count_Person'])"
123+
)
124+
print('>>> ')
125+
pp.pprint(
126+
dc.get_stat_all(['badPlaceId', 'country/FRA'],
127+
['Median_Age_Person', 'Count_Person']))
128+
111129

112130
if __name__ == '__main__':
113131
main()

datacommons/stat_vars.py

Lines changed: 106 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
"""Data Commons Python API Stat Module.
1515
16-
Provides functions for getting data on StatVars from Data Commons Graph.
16+
Provides functions for getting data on StatisticalVariables from Data Commons Graph.
1717
"""
1818

1919
from __future__ import absolute_import
@@ -22,6 +22,7 @@
2222

2323
from datacommons.utils import _API_ROOT, _API_ENDPOINTS, _ENV_VAR_API_KEY
2424

25+
import collections
2526
import json
2627
import os
2728
import six.moves.urllib.error
@@ -37,21 +38,21 @@ def get_stat_value(place,
3738
observation_period=None,
3839
unit=None,
3940
scaling_factor=None):
40-
"""Returns a value for :code:`place` based on the :code:`stat_var`.
41+
"""Returns a value for `place` based on the `stat_var`.
4142
4243
Args:
43-
place (:obj:`iterable` of :obj:`str`): The dcid of `Place` to query for.
44-
stat_var (:obj:`str`): The dcid of the `StatisticalVariable`.
45-
date (:obj:`str`): Optional, the preferred date of observation
44+
place (`str`): The dcid of Place to query for.
45+
stat_var (`str`): The dcid of the StatisticalVariable.
46+
date (`str`): Optional, the preferred date of observation
4647
in ISO 8601 format. If not specified, returns the latest observation.
47-
measurement_method (:obj:`str`): Optional, the dcid of the preferred
48+
measurement_method (`str`): Optional, the dcid of the preferred
4849
`measurementMethod` value.
49-
observation_period (:obj:`str`): Optional, the preferred
50+
observation_period (`str`): Optional, the preferred
5051
`observationPeriod` value.
51-
unit (:obj:`str`): Optional, the dcid of the preferred `unit` value.
52-
scaling_factor (:obj:`int`): Optional, the preferred `scalingFactor` value.
52+
unit (`str`): Optional, the dcid of the preferred `unit` value.
53+
scaling_factor (`int`): Optional, the preferred `scalingFactor` value.
5354
Returns:
54-
A :obj:`float` the value of :code:`stat_var` for :code:`place`, filtered
55+
A `float` the value of `stat_var` for `place`, filtered
5556
by optional args.
5657
5758
Raises:
@@ -88,19 +89,19 @@ def get_stat_series(place,
8889
observation_period=None,
8990
unit=None,
9091
scaling_factor=None):
91-
"""Returns a :obj:`dict` for :code:`place` based on the :code:`stat_var`.
92+
"""Returns a `dict` mapping dates to value of `stat_var` for `place`.
9293
9394
Args:
94-
place (:obj:`iterable` of :obj:`str`): The dcid of `Place` to query for.
95-
stat_var (:obj:`str`): The dcid of the `StatisticalVariable`.
96-
measurement_method (:obj:`str`): Optional, the dcid of the preferred
95+
place (`str`): The dcid of Place to query for.
96+
stat_var (`str`): The dcid of the StatisticalVariable.
97+
measurement_method (`str`): Optional, the dcid of the preferred
9798
`measurementMethod` value.
98-
observation_period (:obj:`str`): Optional, the preferred
99+
observation_period (`str`): Optional, the preferred
99100
`observationPeriod` value.
100-
unit (:obj:`str`): Optional, the dcid of the preferred `unit` value.
101-
scaling_factor (:obj:`int`): Optional, the preferred `scalingFactor` value.
101+
unit (`str`): Optional, the dcid of the preferred `unit` value.
102+
scaling_factor (`int`): Optional, the preferred `scalingFactor` value.
102103
Returns:
103-
A :obj:`dict` mapping dates to value of :code:`stat_var` for :code:`place`,
104+
A `dict` mapping dates to value of `stat_var` for `place`,
104105
filtered by optional args.
105106
106107
Raises:
@@ -127,3 +128,90 @@ def get_stat_series(place,
127128
if 'series' not in res_json:
128129
raise ValueError('No data in response.')
129130
return res_json['series']
131+
132+
133+
def get_stat_all(places, stat_vars):
134+
"""Returns a nested `dict` of all time series for `places` and `stat_vars`.
135+
136+
Args:
137+
places (`Iterable` of `str`): The dcids of Places to query for.
138+
stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables.
139+
Returns:
140+
A nested `dict` mapping Places to StatisticalVariables and all available
141+
time series for each Place and StatisticalVariable pair.
142+
143+
Raises:
144+
ValueError: If the payload returned by the Data Commons REST API is
145+
malformed.
146+
147+
Examples:
148+
>>> get_stat_all(["geoId/05", "geoId/06"], ["Count_Person", "Count_Person_Male"])
149+
{
150+
"geoId/05": {
151+
"Count_Person": [
152+
{
153+
"val": {
154+
"2010": 1633,
155+
"2011": 1509,
156+
"2012": 1581,
157+
},
158+
"observationPeriod": "P1Y",
159+
"importName": "Wikidata",
160+
"provenanceDomain": "wikidata.org"
161+
},
162+
{
163+
"val": {
164+
"2010": 1333,
165+
"2011": 1309,
166+
"2012": 131,
167+
},
168+
"observationPeriod": "P1Y",
169+
"importName": "CensusPEPSurvey",
170+
"provenanceDomain": "census.gov"
171+
}
172+
],
173+
"Count_Person_Male": [
174+
{
175+
"val": {
176+
"2010": 1633,
177+
"2011": 1509,
178+
"2012": 1581,
179+
},
180+
"observationPeriod": "P1Y",
181+
"importName": "CensusPEPSurvey",
182+
"provenanceDomain": "census.gov"
183+
}
184+
],
185+
},
186+
"geoId/02": {
187+
"Count_Person": [],
188+
"Count_Person_Male": [
189+
{
190+
"val": {
191+
"2010": 13,
192+
"2011": 13,
193+
"2012": 322,
194+
},
195+
"observationPeriod": "P1Y",
196+
"importName": "CensusPEPSurvey",
197+
"provenanceDomain": "census.gov"
198+
}
199+
],
200+
}
201+
}
202+
"""
203+
url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all']
204+
req_json = {'stat_vars': stat_vars, 'places': places}
205+
206+
# Send the request
207+
res_json = utils._send_request(url, req_json=req_json, use_payload=False)
208+
209+
if 'placeData' not in res_json:
210+
raise ValueError('No data in response.')
211+
212+
# Unnest the REST response for keys that have single-element values.
213+
place_statvar_series = collections.defaultdict(dict)
214+
for place_dcid, place in res_json['placeData'].items():
215+
for stat_var_dcid, stat_var in place['statVarData'].items():
216+
place_statvar_series[place_dcid][stat_var_dcid] = stat_var
217+
return dict(place_statvar_series)

0 commit comments

Comments
 (0)