Skip to content

Commit 5cf41f7

Browse files
authored
Add get_pop_obs api (#93)
* Add get_pop_obs api * Updated comments with more detailed description. * Update comment and rst file * Updated comments * Move to populations module * Add missing change * Correct typo * More typo correction * Add comments on observations field
1 parent 09c25a3 commit 5cf41f7

10 files changed

Lines changed: 243 additions & 9 deletions

File tree

datacommons/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# Data Commons Python Client API
1919
from datacommons.core import get_property_labels, get_property_values, get_triples
2020
from datacommons.places import get_places_in
21-
from datacommons.populations import get_populations, get_observations
21+
from datacommons.populations import get_populations, get_observations, get_pop_obs
2222

2323
# Other utilities
2424
from .utils import set_api_key, clean_frame, flatten_frame

datacommons/examples/populations.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import datacommons as dc
2424
import pandas as pd
25+
import pprint
2526

2627
import datacommons.utils as utils
2728

@@ -84,5 +85,10 @@ def main():
8485
print(pd_frame)
8586

8687

88+
# Get all population and observation data of Mountain View.
89+
utils._print_header('Get Mountain View population and observation')
90+
popobs = dc.get_pop_obs("geoId/0649670")
91+
pprint.pprint(popobs)
92+
8793
if __name__ == '__main__':
8894
main()

datacommons/places.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929

3030

3131
def get_places_in(dcids, place_type):
32-
""" Returns :obj:`Place`'s contained in :code:`dcids` of type `place_type`.
32+
""" Returns :obj:`Place`s contained in :code:`dcids` of type
33+
:code:`place_type`.
3334
3435
Args:
3536
dcids (Union[:obj:`list` of :obj:`str`, :obj:`pandas.Series`]): Dcids to get

datacommons/populations.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,3 +235,116 @@ def get_observations(dcids,
235235
except ValueError:
236236
typed_results[k] = v
237237
return typed_results
238+
239+
240+
def get_pop_obs(dcid):
241+
""" Returns all :obj:`StatisticalPopulation` and :obj:`Observation` \
242+
of a :obj:`Thing`.
243+
244+
Args:
245+
dcid (:obj:`str`): Dcid of the thing.
246+
247+
Returns:
248+
A :obj:`dict` of :obj:`StatisticalPopulation` and :obj:`Observation` that
249+
are associated to the thing identified by the given :code:`dcid`. The given
250+
dcid is linked to the returned :obj:`StatisticalPopulation`,
251+
which are the :obj:`observedNode` of the returned :obj:`Observation`.
252+
See example below for more detail about how the returned :obj:`dict` is
253+
structured.
254+
255+
Raises:
256+
ValueError: If the payload returned by the Data Commons REST API is
257+
malformed.
258+
259+
Examples:
260+
We would like to get all :obj:`StatisticalPopulation` and
261+
:obj:`Observations` of
262+
`Santa Clara <https://browser.datacommons.org/kg?dcid=geoId/06085>`_.
263+
264+
>>> get_pop_obs("geoId/06085")
265+
{
266+
'name': 'Santa Clara',
267+
'placeType': 'County',
268+
'populations': {
269+
'dc/p/zzlmxxtp1el87': {
270+
'popType': 'Household',
271+
'numConstraints': 3,
272+
'propertyValues': {
273+
'householderAge': 'Years45To64',
274+
'householderRace': 'USC_AsianAlone',
275+
'income': 'USDollar35000To39999'
276+
},
277+
'observations': [
278+
{
279+
'marginOfError': 274,
280+
'measuredProp': 'count',
281+
'measuredValue': 1352,
282+
'measurementMethod': 'CensusACS5yrSurvey',
283+
'observationDate': '2017'
284+
},
285+
{
286+
'marginOfError': 226,
287+
'measuredProp': 'count',
288+
'measuredValue': 1388,
289+
'measurementMethod': 'CensusACS5yrSurvey',
290+
'observationDate': '2013'
291+
}
292+
],
293+
},
294+
},
295+
'observations': [
296+
{
297+
'meanValue': 4.1583,
298+
'measuredProp': 'particulateMatter25',
299+
'measurementMethod': 'CDCHealthTracking',
300+
'observationDate': '2014-04-04',
301+
'observedNode': 'geoId/06085'
302+
},
303+
{
304+
'meanValue': 9.4461,
305+
'measuredProp': 'particulateMatter25',
306+
'measurementMethod': 'CDCHealthTracking',
307+
'observationDate': '2014-03-20',
308+
'observedNode': 'geoId/06085'
309+
}
310+
]
311+
}
312+
313+
Notice that the return value is a multi-level :obj:`dict`. The top level
314+
contains the following keys.
315+
316+
- :code:`name` and :code:`placeType` provides the name and type of the
317+
:obj:`Place` identified by the given :code:`dcid`.
318+
- :code:`populations` maps to a :obj:`dict` containing all
319+
:obj:`StatisticalPopulation` that have the given :code:`dcid` as its
320+
:obj:`location`.
321+
- :code:`observations` maps to a :obj:`list` containing all
322+
:obj:`Observation` that have the given :code:`dcid` as its
323+
:obj:`observedNode`.
324+
325+
The :code:`populations` dictionary is keyed by the dcid of each
326+
:obj:`StatisticalPopulation`. The mapped dictionary contains the following
327+
keys.
328+
329+
- :code:`popType` which gives the population type of the
330+
:obj:`StatisticalPopulation` identified by the key.
331+
- :code:`numConstraints` which gives the number of constraining properties
332+
defined for the identified :obj:`StatisticalPopulation`.
333+
- :code:`propertyValues` which gives a :obj:`dict` mapping a constraining
334+
property to its value for the identified :obj:`StatisticalPopulation`.
335+
- :code:`observations` which gives a list of all :obj:`Observation`'s that
336+
have the identified :obj:`StatisticalPopulation` as their
337+
:obj:`observedNode`.
338+
339+
Each :obj:`Observation` is represented by a :code:`dict` that have the keys:
340+
341+
- :code:`measuredProp`
342+
- :code:`observationDate`
343+
- :code:`observationPeriod` (optional)
344+
- :code:`measurementMethod` (optional)
345+
- one of: :code:`measuredValue`, :code:`meanValue`, :code:`maxValue`,
346+
:code:`minValue`, :code:`medianValue`
347+
348+
"""
349+
url = utils._API_ROOT + utils._API_ENDPOINTS['get_pop_obs'] + '?dcid={}'.format(dcid)
350+
return utils._send_request(url, compress=True, post=False)

datacommons/test/places_test.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ def json(self):
9393
# Otherwise, return an empty response and a 404.
9494
return MockResponse({}, 404)
9595

96-
9796
class TestGetPlacesIn(unittest.TestCase):
9897
""" Unit stests for get_places_in. """
9998

datacommons/test/populations_test.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from __future__ import division
2222
from __future__ import print_function
2323

24+
import base64
2425
from pandas.util.testing import assert_series_equal
2526
from unittest import mock
2627

@@ -30,6 +31,7 @@
3031

3132
import json
3233
import unittest
34+
import zlib
3335

3436

3537
def post_request_mock(*args, **kwargs):
@@ -134,6 +136,64 @@ def json(self):
134136
return MockResponse({}, 404)
135137

136138

139+
def get_request_mock(*args, **kwargs):
140+
""" A mock GET requests sent in the requests package. """
141+
# Create the mock response object.
142+
class MockResponse:
143+
def __init__(self, json_data, status_code):
144+
self.json_data = json_data
145+
self.status_code = status_code
146+
147+
def json(self):
148+
return self.json_data
149+
150+
headers = kwargs['headers']
151+
152+
# If the API key does not match, then return 403 Forbidden
153+
if 'x-api-key' not in headers or headers['x-api-key'] != 'TEST-API-KEY':
154+
return MockResponse({}, 403)
155+
156+
# Mock responses for get requests to get_pop_obs.
157+
if args[0] == utils._API_ROOT + utils._API_ENDPOINTS['get_pop_obs'] + '?dcid=geoId/06085':
158+
# Response returned when querying for a city in the graph.
159+
res_json = json.dumps({
160+
'name': 'Mountain View',
161+
'placeType': 'City',
162+
'populations': {
163+
'dc/p/013ldrstf6lnf': {
164+
'numConstraints': 6,
165+
'observations': [
166+
{
167+
'marginOfError': 119,
168+
'measuredProp': 'count',
169+
'measuredValue': 225,
170+
'measurementMethod': 'CensusACS5yrSurvey',
171+
'observationDate': '2014'
172+
}, {
173+
'marginOfError': 108,
174+
'measuredProp': 'count',
175+
'measuredValue': 180,
176+
'measurementMethod': 'CensusACS5yrSurvey',
177+
'observationDate': '2012'
178+
}
179+
],
180+
'popType': 'Person',
181+
'propertyValues': {
182+
'age': 'Years16Onwards',
183+
'gender': 'Male',
184+
'income': 'USDollar30000To34999',
185+
'incomeStatus': 'WithIncome',
186+
'race': 'USC_HispanicOrLatinoRace',
187+
'workExperience': 'USC_NotWorkedFullTime'
188+
}
189+
}
190+
}
191+
})
192+
return MockResponse({'payload': base64.b64encode(zlib.compress(res_json.encode('utf-8')))}, 200)
193+
194+
# Otherwise, return an empty response and a 404.
195+
return MockResponse({}, 404)
196+
137197
class TestGetPopulations(unittest.TestCase):
138198
""" Unit tests for get_populations. """
139199

@@ -355,6 +415,51 @@ def test_series_no_dcids(self, post_mock):
355415
measurement_method='BLSSeasonallyAdjusted')
356416
assert_series_equal(actual, expected)
357417

418+
class TestGetPopObs(unittest.TestCase):
419+
""" Unit stests for get_pop_Obs. """
420+
421+
@mock.patch('requests.get', side_effect=get_request_mock)
422+
def test_valid_dcid(self, get_mock):
423+
""" Calling get_pop_obs with valid dcid returns valid results. """
424+
# Set the API key
425+
dc.set_api_key('TEST-API-KEY')
426+
427+
# Call get_places_in
428+
popobs = dc.get_pop_obs('geoId/06085')
429+
self.assertDictEqual(popobs, {
430+
'name': 'Mountain View',
431+
'placeType': 'City',
432+
'populations': {
433+
'dc/p/013ldrstf6lnf': {
434+
'numConstraints': 6,
435+
'observations': [
436+
{
437+
'marginOfError': 119,
438+
'measuredProp': 'count',
439+
'measuredValue': 225,
440+
'measurementMethod': 'CensusACS5yrSurvey',
441+
'observationDate': '2014'
442+
}, {
443+
'marginOfError': 108,
444+
'measuredProp': 'count',
445+
'measuredValue': 180,
446+
'measurementMethod': 'CensusACS5yrSurvey',
447+
'observationDate': '2012'
448+
}
449+
],
450+
'popType': 'Person',
451+
'propertyValues': {
452+
'age': 'Years16Onwards',
453+
'gender': 'Male',
454+
'income': 'USDollar30000To34999',
455+
'incomeStatus': 'WithIncome',
456+
'race': 'USC_HispanicOrLatinoRace',
457+
'workExperience': 'USC_NotWorkedFullTime'
458+
}
459+
}
460+
}
461+
})
462+
358463

359464
if __name__ == '__main__':
360465
unittest.main()

datacommons/utils.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from __future__ import print_function
2222

2323
from collections import defaultdict
24-
2524
import pandas as pd
2625

2726
import base64
@@ -45,7 +44,8 @@
4544
'get_triples': '/node/triples',
4645
'get_places_in': '/node/places-in',
4746
'get_populations': '/node/populations',
48-
'get_observations': '/node/observations'
47+
'get_observations': '/node/observations',
48+
'get_pop_obs': '/bulk/pop-obs'
4949
}
5050

5151
# The default value to limit to
@@ -138,7 +138,7 @@ def clean_frame(pd_frame):
138138
# ------------------------- INTERNAL HELPER FUNCTIONS -------------------------
139139

140140

141-
def _send_request(req_url, req_json={}, compress=False):
141+
def _send_request(req_url, req_json={}, compress=False, post=True):
142142
""" Sends a POST request to the given req_url with the given req_json.
143143
144144
Returns:
@@ -154,7 +154,10 @@ def _send_request(req_url, req_json={}, compress=False):
154154
headers = {'x-api-key': os.environ[_ENV_VAR_API_KEY]}
155155

156156
# Send the request and verify the request succeeded
157-
res = requests.post(req_url, headers=headers, json=req_json)
157+
if post:
158+
res = requests.post(req_url, headers=headers, json=req_json)
159+
else:
160+
res = requests.get(req_url, headers=headers)
158161
if res.status_code != 200:
159162
raise ValueError(
160163
'Response error: An HTTP {} code was returned by the mixer. Printing '
@@ -171,7 +174,7 @@ def _send_request(req_url, req_json={}, compress=False):
171174
payload = res_json['payload']
172175
if compress:
173176
payload = zlib.decompress(
174-
base64.b64decode(payload), 16 + zlib.MAX_WBITS)
177+
base64.b64decode(payload), zlib.MAX_WBITS|32)
175178
return json.loads(payload)
176179

177180

docs/source/_autosummary/datacommons.places.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ datacommons.places
88
.. autosummary::
99
:toctree: datacommons_places
1010

11-
get_places_in
11+
get_places_in

docs/source/_autosummary/datacommons.populations.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ datacommons.populations
1010

1111
get_observations
1212
get_populations
13+
get_pop_obs
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
datacommons.places.get\_pop\_obs
2+
================================
3+
4+
.. currentmodule:: datacommons.places
5+
6+
.. autofunction:: get_pop_obs

0 commit comments

Comments
 (0)