Skip to content

Commit d5d312a

Browse files
authored
Add get_stats endpoint. (#123)
* Add get_stats endpoint. * Restore urlopen to test, need for mocking. * Update get_stats test with get stats response, not get_places_in response * Update GetStatsTest test docstrings. * REST API uses place instead of dcids for place key var name. * Update tests to match default REST/JSON output. * Update tests to match default REST/JSON output. Co-authored-by: tjann <tjann@google.com>
1 parent e39ff3a commit d5d312a

5 files changed

Lines changed: 240 additions & 21 deletions

File tree

datacommons/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
# Data Commons Python Client API
1919
from datacommons.core import get_property_labels, get_property_values, get_triples
20-
from datacommons.places import get_places_in, get_related_places
20+
from datacommons.places import get_places_in, get_related_places, get_stats
2121
from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs
2222

2323
# Other utilities

datacommons/examples/places.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,17 @@ def main():
4040
for dcid in tracts[mc][:10]:
4141
print(' - {}'.format(dcid))
4242

43+
# Get place stats.
44+
print('Get place stats')
45+
stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], 'dc/0hyp6tkn18vcb')
46+
print(stats)
47+
4348
# Get related places.
44-
print('Get related places')
45-
# TODO(*): s/Cenus/Census/g when data is ready in BT.
46-
related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count',
47-
'CenusACS5yrSurvey', "measuredValue", {"gender": "Female"})
48-
print(related_places)
49+
# TODO(*): Fix the related places example.
50+
# print('Get related places')
51+
# related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count',
52+
# 'CensusACS5yrSurvey', "measuredValue", {"gender": "Female"})
53+
# print(related_places)
4954

5055

5156
if __name__ == '__main__':

datacommons/places.py

Lines changed: 80 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,15 @@ def get_places_in(dcids, place_type):
4949
5050
>>> get_places_in(["geoId/06"], "County")
5151
{
52-
'geoId/06': [
53-
'geoId/06041',
54-
'geoId/06089',
55-
'geoId/06015',
56-
'geoId/06023',
57-
'geoId/06067',
58-
...
59-
# and 53 more
60-
]
52+
'geoId/06': [
53+
'geoId/06041',
54+
'geoId/06089',
55+
'geoId/06015',
56+
'geoId/06023',
57+
'geoId/06067',
58+
...
59+
# and 53 more
60+
]
6161
}
6262
"""
6363
dcids = filter(lambda v: v==v, dcids) # Filter out NaN values
@@ -71,7 +71,72 @@ def get_places_in(dcids, place_type):
7171
# Create the results and format it appropriately
7272
result = utils._format_expand_payload(payload, 'place', must_exist=dcids)
7373
return result
74+
75+
def get_stats(dcids, stats_var):
76+
""" Returns :obj:`TimeSeries` for :code:`dcids` \
77+
based on the :code:`stats_var`.
7478
79+
Args:
80+
dcids (:obj:`iterable` of :obj:`str`): Dcids of places to query for.
81+
stats_var (:obj:`str`): The dcid of the :obj:StatisticalVariable.
82+
Returns:
83+
A :obj:`dict` mapping the :obj:`Place` identified by the given :code:`dcid`
84+
to its place name and the :obj:`TimeSeries` associated with the
85+
:obj:`StatisticalVariable` identified by the given :code:`stats_var`.
86+
See example below for more detail about how the returned :obj:`dict` is
87+
structured.
88+
89+
Raises:
90+
ValueError: If the payload returned by the Data Commons REST API is
91+
malformed.
92+
93+
Examples:
94+
We would like to get the :obj:`TimeSeries` of the number of males
95+
at least 25 years old that attended 12th grade but did not receive
96+
a high school diploma
97+
(`dc/0hyp6tkn18vcb <https://browser.datacommons.org/kg?dcid=dc/0hyp6tkn18vcb>`_)
98+
in `Arkansas <https://browser.datacommons.org/kg?dcid=geoId/05>`_
99+
and `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_.
100+
101+
>>> get_stats(["geoId/05", "geoId/06"], "dc/0hyp6tkn18vcb")
102+
{
103+
'geoId/05': {
104+
'place_name': 'Arkansas'
105+
'data': {
106+
'2011':18136,
107+
'2012':17279,
108+
'2013':17459,
109+
'2014':16966,
110+
'2015':17173,
111+
'2016':17041,
112+
'2017':17783,
113+
'2018':18003
114+
},
115+
},
116+
'geoId/05': {
117+
'place_name': 'California'
118+
'data': {
119+
'2011':316667,
120+
'2012':324116,
121+
'2013':331853,
122+
'2014':342818,
123+
'2015':348979,
124+
'2016':354806,
125+
'2017':360645,
126+
'2018':366331
127+
},
128+
},
129+
}
130+
"""
131+
dcids = filter(lambda v: v==v, dcids) # Filter out NaN values
132+
dcids = list(dcids)
133+
url = utils._API_ROOT + utils._API_ENDPOINTS['get_stats']
134+
payload = utils._send_request(url, req_json={
135+
'place': dcids,
136+
'stats_var': stats_var,
137+
})
138+
139+
return payload
75140

76141
def get_related_places(dcids, population_type, measured_property,
77142
measurement_method, stat_type, constraining_properties={},
@@ -112,12 +177,12 @@ def get_related_places(dcids, population_type, measured_property,
112177
"gender": "Female"
113178
}, "count", "CenusACS5yrSurvey", "measuredValue")
114179
{
115-
'geoId/06085': [
116-
'geoId/06041',
117-
'geoId/06089',
118-
'geoId/06015',
119-
'geoId/06023',
120-
]
180+
'geoId/06085': [
181+
'geoId/06041',
182+
'geoId/06089',
183+
'geoId/06015',
184+
'geoId/06023',
185+
]
121186
}
122187
"""
123188
dcids = filter(lambda v: v==v, dcids) # Filter out NaN values

datacommons/test/places_test.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,70 @@ def read(self):
8787
# Response returned when no dcids are given.
8888
return MockResponse(json.dumps({'payload': res_json}))
8989

90+
91+
# Mock responses for urlopen requests to get_stats.
92+
if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_stats']:
93+
if (data['place'] == ['geoId/05', 'geoId/06'] and
94+
data['stats_var'] == 'dc/0hyp6tkn18vcb'):
95+
# Response returned when querying for multiple valid dcids.
96+
res_json = json.dumps({
97+
'geoId/05': {
98+
'data': {
99+
'2011': 18136,
100+
'2012': 17279,
101+
'2013': 17459,
102+
'2014': 16966,
103+
'2015': 17173,
104+
'2016': 17041,
105+
'2017': 17783,
106+
'2018': 18003
107+
},
108+
'place_name': 'Arkansas'
109+
},
110+
'geoId/06': {
111+
'data': {
112+
'2011': 316667,
113+
'2012': 324116,
114+
'2013': 331853,
115+
'2014': 342818,
116+
'2015': 348979,
117+
'2016': 354806,
118+
'2017': 360645,
119+
'2018': 366331
120+
},
121+
'place_name': 'California'
122+
}
123+
})
124+
return MockResponse(json.dumps({'payload': res_json}))
125+
if (data['place'] == ['geoId/05', 'dc/MadDcid'] and
126+
data['stats_var'] == 'dc/0hyp6tkn18vcb'):
127+
# Response returned when querying for a dcid that does not exist.
128+
res_json = json.dumps({
129+
'geoId/05': {
130+
'data': {
131+
'2011': 18136,
132+
'2012': 17279,
133+
'2013': 17459,
134+
'2014': 16966,
135+
'2015': 17173,
136+
'2016': 17041,
137+
'2017': 17783,
138+
'2018': 18003
139+
},
140+
'place_name': 'Arkansas'
141+
}
142+
})
143+
return MockResponse(json.dumps({'payload': res_json}))
144+
if (data['place'] == ['dc/MadDcid', 'dc/MadderDcid'] and
145+
data['stats_var'] == 'dc/0hyp6tkn18vcb'):
146+
# Response returned when both given dcids do not exist.
147+
res_json = json.dumps([])
148+
return MockResponse(json.dumps({'payload': res_json}))
149+
if data['place'] == [] and data['stats_var'] == 'dc/0hyp6tkn18vcb':
150+
res_json = json.dumps([])
151+
# Response returned when no dcids are given.
152+
return MockResponse(json.dumps({'payload': res_json}))
153+
90154
# Otherwise, return an empty response and a 404.
91155
return urllib.error.HTTPError
92156

@@ -142,5 +206,89 @@ def test_no_dcids(self, urlopen):
142206
})
143207

144208

209+
class TestGetStats(unittest.TestCase):
210+
""" Unit stests for get_stats. """
211+
212+
@mock.patch('urllib.request.urlopen', side_effect=request_mock)
213+
def test_multiple_dcids(self, urlopen):
214+
""" Calling get_stats with proper dcids returns valid results. """
215+
# Set the API key
216+
dc.set_api_key('TEST-API-KEY')
217+
218+
# Call get_stats
219+
stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb')
220+
self.assertDictEqual(
221+
stats, {
222+
'geoId/05': {
223+
'data': {
224+
'2011': 18136,
225+
'2012': 17279,
226+
'2013': 17459,
227+
'2014': 16966,
228+
'2015': 17173,
229+
'2016': 17041,
230+
'2017': 17783,
231+
'2018': 18003
232+
},
233+
'place_name': 'Arkansas'
234+
},
235+
'geoId/06': {
236+
'data': {
237+
'2011': 316667,
238+
'2012': 324116,
239+
'2013': 331853,
240+
'2014': 342818,
241+
'2015': 348979,
242+
'2016': 354806,
243+
'2017': 360645,
244+
'2018': 366331
245+
},
246+
'place_name': 'California'
247+
}
248+
})
249+
250+
@mock.patch('urllib.request.urlopen', side_effect=request_mock)
251+
def test_bad_dcids(self, urlopen):
252+
""" Calling get_stats with dcids that do not exist returns empty
253+
results.
254+
"""
255+
# Set the API key
256+
dc.set_api_key('TEST-API-KEY')
257+
258+
# Call get_stats with one dcid that does not exist
259+
bad_dcids_1 = dc.get_stats(['geoId/05', 'dc/MadDcid'], 'dc/0hyp6tkn18vcb')
260+
self.assertDictEqual(
261+
bad_dcids_1, {
262+
'geoId/05': {
263+
'data': {
264+
'2011': 18136,
265+
'2012': 17279,
266+
'2013': 17459,
267+
'2014': 16966,
268+
'2015': 17173,
269+
'2016': 17041,
270+
'2017': 17783,
271+
'2018': 18003
272+
},
273+
'place_name': 'Arkansas'
274+
}
275+
})
276+
277+
# Call get_stats when both dcids do not exist
278+
bad_dcids_2 = dc.get_stats(['dc/MadDcid', 'dc/MadderDcid'],
279+
'dc/0hyp6tkn18vcb')
280+
self.assertFalse(bad_dcids_2)
281+
282+
@mock.patch('urllib.request.urlopen', side_effect=request_mock)
283+
def test_no_dcids(self, urlopen):
284+
""" Calling get_stats with no dcids returns empty results. """
285+
# Set the API key
286+
dc.set_api_key('TEST-API-KEY')
287+
288+
# Call get_stats with no dcids.
289+
no_dcids = dc.get_stats([], 'dc/0hyp6tkn18vcb')
290+
self.assertFalse(no_dcids)
291+
292+
145293
if __name__ == '__main__':
146294
unittest.main()

datacommons/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
'get_observations': '/node/observations',
4949
'get_pop_obs': '/bulk/pop-obs',
5050
'get_place_obs': '/bulk/place-obs',
51+
'get_stats': '/bulk/stats',
5152
}
5253

5354
# The default value to limit to

0 commit comments

Comments
 (0)