Skip to content

Commit 043abe8

Browse files
authored
Remove dependency on pandas (#105)
* Remove dependency on pandas * Remove debug print
1 parent 02c1e40 commit 043abe8

14 files changed

Lines changed: 34 additions & 504 deletions

File tree

datacommons/BUILD.bazel

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
package(default_visibility = ["//visibility:public"])
2-
load("@requirements//:requirements.bzl", "requirement")
32

43
py_library(
54
name = "datacommons",
65
srcs = glob(["*.py"]),
7-
deps = [
8-
requirement("pandas"),
9-
]
106
)

datacommons/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@
2121
from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs
2222

2323
# Other utilities
24-
from .utils import set_api_key, clean_frame, flatten_frame
24+
from .utils import set_api_key

datacommons/core.py

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@
2828

2929
from collections import defaultdict
3030

31-
import pandas as pd
32-
3331
import datacommons.utils as utils
3432
import requests
3533

@@ -40,7 +38,7 @@ def get_property_labels(dcids, out=True):
4038
""" Returns the labels of properties defined for the given :code:`dcids`.
4139
4240
Args:
43-
dcids (:obj:`list` of :obj:`str`): A list of nodes identified by their
41+
dcids (:obj:`iterable` of :obj:`str`): A list of nodes identified by their
4442
dcids.
4543
out (:obj:`bool`, optional): Whether or not the property points away from
4644
the given list of nodes.
@@ -99,6 +97,7 @@ def get_property_labels(dcids, out=True):
9997
}
10098
"""
10199
# Generate the GetProperty query and send the request
100+
dcids = list(dcids)
102101
url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_labels']
103102
payload = utils._send_request(url, req_json={'dcids': dcids})
104103

@@ -120,8 +119,7 @@ def get_property_values(dcids,
120119
""" Returns property values of given :code:`dcids` along the given property.
121120
122121
Args:
123-
dcids (Union[:obj:`list` of :obj:`str`, :obj:`pandas.Series`]): dcids to get
124-
property values for.
122+
dcids (:obj:`iterable` of :obj:`str`): dcids to get property values for.
125123
prop (:obj:`str`): The property to get property values for.
126124
out (:obj:`bool`, optional): A flag that indicates the property is directed
127125
away from the given nodes when set to true.
@@ -131,15 +129,8 @@ def get_property_values(dcids,
131129
aggregated over all given nodes.
132130
133131
Returns:
134-
When :code:`dcids` is an instance of :obj:`list`, the returned property
135-
values are formatted as a :obj:`dict` from a given dcid to a list of its
136-
property values.
137-
138-
When :code:`dcids` is an instance of :obj:`pandas.Series`, the returned
139-
property values are formatted as a :obj:`pandas.Series` where the `i`-th
140-
entry corresponds to property values associated with the `i`-th given dcid.
141-
The cells of the returned series will always contain a :obj:`list` of
142-
property values.
132+
Returned property values are formatted as a :obj:`dict` from a given dcid
133+
to a list of its property values.
143134
144135
Raises:
145136
ValueError: If the payload returned by the Data Commons REST API is
@@ -160,21 +151,11 @@ def get_property_values(dcids,
160151
"geoId/21": ["Kentucky"],
161152
"geoId/24": ["Maryland"],
162153
}
163-
164-
Next, we specify :code:`dcids` as a :obj:`pandas.Series`
165-
166-
>>> import pandas as pd
167-
>>> dcids = pd.Series(["geoId/06", "geoId/21", "geoId/24"])
168-
>>> get_property_values(dcids, "name")
169-
0 [California]
170-
1 [Kentucky]
171-
2 [Maryland]
172-
dtype: object
173154
"""
174155
# Convert the dcids field and format the request to GetPropertyValue
175-
dcids, req_dcids = utils._convert_dcids_type(dcids)
156+
dcids = list(dcids)
176157
req_json = {
177-
'dcids': req_dcids,
158+
'dcids': dcids,
178159
'property': prop,
179160
'limit': limit
180161
}
@@ -205,9 +186,6 @@ def get_property_values(dcids,
205186
# Make sure each dcid is in the results dict, and convert all sets to lists.
206187
results = {dcid: sorted(list(unique_results[dcid])) for dcid in dcids}
207188

208-
# Format the results as a Series if a Pandas Series is provided.
209-
if isinstance(dcids, pd.Series):
210-
return pd.Series([results[dcid] for dcid in dcids], index=dcids.index)
211189
return results
212190

213191

@@ -221,7 +199,7 @@ def get_triples(dcids, limit=utils._MAX_LIMIT):
221199
*predicate*).
222200
223201
Args:
224-
dcids (:obj:`list` of :obj:`str`): A list of dcids to get triples for.
202+
dcids (:obj:`iterable` of :obj:`str`): A list of dcids to get triples for.
225203
limit (:obj:`int`, optional): The maximum total number of triples to get.
226204
227205
Returns:
@@ -249,6 +227,7 @@ def get_triples(dcids, limit=utils._MAX_LIMIT):
249227
}
250228
"""
251229
# Generate the GetTriple query and send the request.
230+
dcids = list(dcids)
252231
url = utils._API_ROOT + utils._API_ENDPOINTS['get_triples']
253232
payload = utils._send_request(url, req_json={'dcids': dcids, 'limit': limit})
254233

datacommons/examples/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def main():
7777
# To expand on a column with get_property_values, the data frame has to be
7878
# flattened first. Clients can use flatten_frame to do this.
7979
utils._print_header('Flatten the Frame')
80-
pd_frame = dc.flatten_frame(pd_frame)
80+
pd_frame = pd_frame.explode('county')
8181
print(pd_frame)
8282

8383
# Get the names for each city.
@@ -87,7 +87,7 @@ def main():
8787

8888
# Format the final frame.
8989
utils._print_header('The Final Frame')
90-
pd_frame = dc.flatten_frame(pd_frame)
90+
pd_frame = pd_frame.explode('city')
9191
print(pd_frame)
9292

9393

datacommons/examples/places.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def main():
5353
# Get all CensusTracts in these two counties.
5454
utils._print_header('Get Census Tracts')
5555
pd_frame['tracts'] = dc.get_places_in(pd_frame['county'], 'CensusTract')
56-
pd_frame = dc.flatten_frame(pd_frame)
56+
pd_frame = pd_frame.explode('tracts')
5757
print(pd_frame)
5858

5959

datacommons/examples/populations.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,16 @@ def main():
5757
# DataFrame with Santa Clara and Montgomery County.
5858
utils._print_header('Initialize the DataFrame')
5959
pd_frame = pd.DataFrame({'state': ['geoId/06', 'geoId/21', 'geoId/24']})
60-
pd_frame['state_name'] = dc.get_property_values(pd_frame['state'], 'name')
61-
pd_frame = dc.flatten_frame(pd_frame)
62-
print(pd_frame)
60+
pd_frame['state_name'] = pd_frame['state'].map(
61+
dc.get_property_values(pd_frame['state'], 'name'))
62+
pd_frame = pd_frame.explode('state_name').reset_index(drop=True)
6363

6464
# Get populations for employed individuals
6565
utils._print_header('Add Population and Observation to DataFrame')
66-
pd_frame['employed_pop'] = dc.get_populations(
66+
pd_frame['employed_pop'] = pd_frame['state'].map(dc.get_populations(
6767
pd_frame['state'],
6868
'Person',
69-
constraining_properties={'employment': 'BLS_Employed'})
69+
constraining_properties={'employment': 'BLS_Employed'}))
7070

7171
# Add the observation for employed individuals
7272
pd_frame['employed_count'] = dc.get_observations(
@@ -81,7 +81,7 @@ def main():
8181
# Final dataframe. Use the convenience function "clean_frame" to convert
8282
# columns to numerical types.
8383
utils._print_header('Final Data Frame')
84-
pd_frame = dc.clean_frame(pd_frame)
84+
pd_frame = pd_frame.dropna().reset_index(drop=True)
8585
print(pd_frame)
8686

8787

datacommons/examples/query.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from __future__ import print_function
2222

2323
import datacommons as dc
24-
import pandas as pd
2524

2625

2726
def main():
@@ -37,12 +36,9 @@ def main():
3736
''')
3837
print('> Issuing query.\n{}'.format(query))
3938

40-
# Initialize the Query instance.
41-
dc_query = dc.Query(sparql=query)
42-
4339
# Iterate through all the rows in the results.
4440
print('> Printing results.\n')
45-
for row in dc_query.rows():
41+
for row in dc.query(query_string=query):
4642
print(' {}'.format(row))
4743

4844

datacommons/places.py

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
from __future__ import print_function
2424

2525
import datacommons.utils as utils
26-
import pandas as pd
2726

2827
import requests
2928

@@ -33,21 +32,13 @@ def get_places_in(dcids, place_type):
3332
:code:`place_type`.
3433
3534
Args:
36-
dcids (Union[:obj:`list` of :obj:`str`, :obj:`pandas.Series`]): Dcids to get
37-
contained in places.
35+
dcids (:obj:`iterable` of :obj:`str`): Dcids to get contained in places.
3836
place_type (:obj:`str`): The type of places contained in the given dcids to
3937
filter by.
4038
4139
Returns:
42-
When :code:`dcids` is an instance of :obj:`list`, the returned
43-
:obj:`Place`'s are formatted as a :obj:`dict` from a given dcid to a list of
44-
places identified by dcids of the given `place_type`.
45-
46-
When :code:`dcids` is an instance of :obj:`pandas.Series`, the returned
47-
:obj:`Place`'s are formatted as a :obj:`pandas.Series` where the `i`-th
48-
entry corresponds to places contained in the place identified by the dcid
49-
in `i`-th cell if :code:`dcids`. The cells of the returned series will always
50-
contain a :obj:`list` of place dcids of the given `place_type`.
40+
The returned :obj:`Place`'s are formatted as a :obj:`dict` from a given
41+
dcid to a list of places identified by dcids of the given `place_type`.
5142
5243
Raises:
5344
ValueError: If the payload returned by the Data Commons REST API is
@@ -70,26 +61,14 @@ def get_places_in(dcids, place_type):
7061
# and 53 more
7162
]
7263
}
73-
74-
We can also specify the :code:`dcids` as a :obj:`pandas.Series` like so.
75-
76-
>>> import pandas as pd
77-
>>> dcids = pd.Series(["geoId/06"])
78-
>>> get_places_in(dcids, "County")
79-
0 [geoId/06041, geoId/06089, geoId/06015, geoId/...
80-
dtype: object
81-
8264
"""
83-
# Convert the dcids field and format the request to GetPlacesIn
84-
dcids, req_dcids = utils._convert_dcids_type(dcids)
65+
dcids = list(dcids)
8566
url = utils._API_ROOT + utils._API_ENDPOINTS['get_places_in']
8667
payload = utils._send_request(url, req_json={
87-
'dcids': req_dcids,
68+
'dcids': dcids,
8869
'place_type': place_type,
8970
})
9071

9172
# Create the results and format it appropriately
9273
result = utils._format_expand_payload(payload, 'place', must_exist=dcids)
93-
if isinstance(dcids, pd.Series):
94-
return pd.Series([result[dcid] for dcid in dcids], index=dcids.index)
9574
return result

0 commit comments

Comments
 (0)