Skip to content

Commit bda1cff

Browse files
authored
Merge pull request #89
Issues/37 Add function for returning an iterator instead of sequence
2 parents 69946fc + 8ce8e64 commit bda1cff

14 files changed

Lines changed: 32332 additions & 10291 deletions

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
88

99
## [Unreleased]
1010

11+
### Added
12+
- New function `get_iter` for returning results as an iterator instead of sequence ([#37](https://github.com/nasa/python_cmr/issues/37))
13+
14+
### Deprecated
15+
- Function `get` has been marked as deprecated in favor of the new `get_iter` function. `get` will likely be removed for the 1.0.0 release. ([#37](https://github.com/nasa/python_cmr/issues/37))
16+
1117
## [0.13.0]
1218

1319
### Added

cmr/queries.py

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from datetime import date, datetime, timezone
88
from inspect import getmembers, ismethod
99
from re import search
10+
from typing import Iterator
11+
1012
from typing_extensions import (
1113
Any,
1214
List,
@@ -20,7 +22,7 @@
2022
Tuple,
2123
TypeAlias,
2224
Union,
23-
override,
25+
override, deprecated,
2426
)
2527
from urllib.parse import quote
2628

@@ -58,11 +60,12 @@ def __init__(self, route: str, mode: str = CMR_OPS):
5860
self.concept_id_chars: Set[str] = set()
5961
self.headers: MutableMapping[str, str] = {}
6062

63+
@deprecated("Use get_iter() instead")
6164
def get(self, limit: int = 2000) -> Sequence[Any]:
6265
"""
6366
Get all results up to some limit, even if spanning multiple pages.
6467
65-
:limit: The number of results to return
68+
:param limit: The number of results to return
6669
:returns: query results as a list
6770
"""
6871

@@ -117,14 +120,56 @@ def hits(self) -> int:
117120

118121
def get_all(self) -> Sequence[Any]:
119122
"""
120-
Returns all of the results for the query. This will call hits() first to determine how many
121-
results their are, and then calls get() with that number. This method could take quite
123+
Returns all of the results for the query. This method could take quite
122124
awhile if many requests have to be made.
123125
124126
:returns: query results as a list
125127
"""
126128

127-
return self.get(self.hits())
129+
return list(self.get_iter())
130+
131+
def get_iter(self, limit: int = -1, page_size: int = 2000) -> Iterator[Any]:
132+
"""
133+
Returns all results for the query as an iterator (generator)
134+
135+
:param limit: The maximum number of results to return. Negative value means no limit.
136+
:param page_size: The page size (min 0, max 2000) of results retrieved from CMR. Smaller page size means
137+
fewer items in memory and more cmr queries. Larger page size means more items in memory and fewer cmr queries.
138+
:returns: query results as an iterator (generator)
139+
"""
140+
141+
url = self._build_url()
142+
143+
headers = dict(self.headers or {})
144+
more_results = True
145+
page_size = min(max(0, page_size), 2000)
146+
n_results = 0
147+
if limit < 0:
148+
limit = self.hits()
149+
150+
while more_results:
151+
# Only get what we need on the last page.
152+
page_size = min(limit - n_results, page_size)
153+
response = requests.get(
154+
url, headers=headers, params={"page_size": page_size}
155+
)
156+
response.raise_for_status()
157+
158+
# Explicitly track the number of results we have because the length
159+
# of the results list will only match the number of entries fetched
160+
# when the format is JSON. Otherwise, the length of the results
161+
# list is the number of *pages* fetched, not the number of *items*.
162+
n_results += page_size
163+
164+
if self._format == "json":
165+
yield from response.json()["feed"]["entry"]
166+
else:
167+
yield response.text
168+
169+
if cmr_search_after := response.headers.get("cmr-search-after"):
170+
headers["cmr-search-after"] = cmr_search_after
171+
172+
more_results = n_results < limit and cmr_search_after is not None
128173

129174
def parameters(self, **kwargs: Any) -> Self:
130175
"""

poetry.lock

Lines changed: 102 additions & 99 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/fixtures/vcr_cassettes/MOD02QKM_2000.yaml renamed to tests/fixtures/vcr_cassettes/TestMultipleQueries.test_get.yaml

Lines changed: 4015 additions & 4013 deletions
Large diffs are not rendered by default.

tests/fixtures/vcr_cassettes/TELLUS_GRAC.yaml renamed to tests/fixtures/vcr_cassettes/TestMultipleQueries.test_get_all_less_than_2k.yaml

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,11 @@ interactions:
88
- gzip, deflate
99
Connection:
1010
- keep-alive
11-
User-Agent:
12-
- python-requests/2.31.0
1311
method: GET
1412
uri: https://cmr.earthdata.nasa.gov/search/granules.json?short_name=TELLUS_GRAC_L3_JPL_RL06_LND_v04&page_size=0
1513
response:
1614
body:
17-
string: '{"feed":{"updated":"2023-08-14T17:02:36.801Z","id":"https://cmr.earthdata.nasa.gov:443/search/granules.json?short_name=TELLUS_GRAC_L3_JPL_RL06_LND_v04&page_size=0","title":"ECHO
15+
string: '{"feed":{"updated":"2024-09-24T00:24:58.509Z","id":"https://cmr.earthdata.nasa.gov:443/search/granules.json?short_name=TELLUS_GRAC_L3_JPL_RL06_LND_v04&page_size=0","title":"ECHO
1816
granule metadata","entry":[]}}'
1917
headers:
2018
Access-Control-Allow-Origin:
@@ -25,37 +23,41 @@ interactions:
2523
CMR-Hits:
2624
- '163'
2725
CMR-Request-Id:
28-
- 5855d714-8aff-4d0f-b4cc-e556f02ef96a
26+
- 64ce09ea-2037-48a1-b5e3-1e9706811229
2927
CMR-Took:
30-
- '52'
28+
- '131'
3129
Connection:
3230
- keep-alive
31+
Content-MD5:
32+
- 3c1bb7d108b84325434e60a36dda1159
33+
Content-SHA1:
34+
- 3d871ed3d2791fefc0fb58701b67811f121caa63
3335
Content-Type:
3436
- application/json;charset=utf-8
3537
Date:
36-
- Mon, 14 Aug 2023 17:02:36 GMT
38+
- Tue, 24 Sep 2024 00:24:58 GMT
3739
Server:
3840
- ServerTokens ProductOnly
3941
Strict-Transport-Security:
40-
- max-age=31536000
42+
- max-age=31536000; includeSubDomains; preload
4143
Transfer-Encoding:
4244
- chunked
4345
Vary:
4446
- Accept-Encoding, User-Agent
4547
Via:
46-
- 1.1 cc58556a6e846289f4d3105969536e4c.cloudfront.net (CloudFront)
48+
- 1.1 b837267595110a1135bf4fb036d71e1e.cloudfront.net (CloudFront)
4749
X-Amz-Cf-Id:
48-
- qj9VuAc1JQu-rnMVDg3mGwstR-jGQA4rd7MKVRAEpXeTDbKZT5p5jg==
50+
- nCF7mfer1omvbZi5CTMRTv9-9uPozEm7zBM8NhFZ8nJ_sXVz-tBAgw==
4951
X-Amz-Cf-Pop:
50-
- SFO53-C1
52+
- LAX50-C1
5153
X-Cache:
5254
- Miss from cloudfront
5355
X-Content-Type-Options:
5456
- nosniff
5557
X-Frame-Options:
5658
- SAMEORIGIN
5759
X-Request-Id:
58-
- qj9VuAc1JQu-rnMVDg3mGwstR-jGQA4rd7MKVRAEpXeTDbKZT5p5jg==
60+
- nCF7mfer1omvbZi5CTMRTv9-9uPozEm7zBM8NhFZ8nJ_sXVz-tBAgw==
5961
X-XSS-Protection:
6062
- 1; mode=block
6163
content-length:
@@ -72,13 +74,11 @@ interactions:
7274
- gzip, deflate
7375
Connection:
7476
- keep-alive
75-
User-Agent:
76-
- python-requests/2.31.0
7777
method: GET
7878
uri: https://cmr.earthdata.nasa.gov/search/granules.json?short_name=TELLUS_GRAC_L3_JPL_RL06_LND_v04&page_size=163
7979
response:
8080
body:
81-
string: '{"feed":{"updated":"2023-08-14T17:02:40.416Z","id":"https://cmr.earthdata.nasa.gov:443/search/granules.json?short_name=TELLUS_GRAC_L3_JPL_RL06_LND_v04&page_size=163","title":"ECHO
81+
string: '{"feed":{"updated":"2024-09-24T00:24:58.790Z","id":"https://cmr.earthdata.nasa.gov:443/search/granules.json?short_name=TELLUS_GRAC_L3_JPL_RL06_LND_v04&page_size=163","title":"ECHO
8282
granule metadata","entry":[{"boxes":["-89.5 0.5 89.5 180","-89.5 -180 89.5
8383
-0.5"],"time_start":"2002-04-04T00:00:00.000Z","updated":"2023-04-17T15:27:21.022Z","dataset_id":"JPL
8484
TELLUS GRACE Level-3 Monthly Land Water-Equivalent-Thickness Surface Mass
@@ -2045,39 +2045,43 @@ interactions:
20452045
CMR-Hits:
20462046
- '163'
20472047
CMR-Request-Id:
2048-
- 60eb29b2-95e1-453c-8efe-6e59cf649eb5
2048+
- b82d5198-a729-42bd-b597-a1132b2652c3
20492049
CMR-Search-After:
20502050
- '["pocloud",1495497600000,2658328520]'
20512051
CMR-Took:
2052-
- '4959'
2052+
- '125'
20532053
Connection:
20542054
- keep-alive
2055+
Content-MD5:
2056+
- 2f2981275f193e1579bea1c3e9f1acf5
2057+
Content-SHA1:
2058+
- 4fa7e296cc7b77f83dcf2bcda4237d29dc885fd1
20552059
Content-Type:
20562060
- application/json;charset=utf-8
20572061
Date:
2058-
- Mon, 14 Aug 2023 17:02:42 GMT
2062+
- Tue, 24 Sep 2024 00:24:58 GMT
20592063
Server:
20602064
- ServerTokens ProductOnly
20612065
Strict-Transport-Security:
2062-
- max-age=31536000
2066+
- max-age=31536000; includeSubDomains; preload
20632067
Transfer-Encoding:
20642068
- chunked
20652069
Vary:
20662070
- Accept-Encoding, User-Agent
20672071
Via:
2068-
- 1.1 44933b72098305e9c31fc50b2e6554a0.cloudfront.net (CloudFront)
2072+
- 1.1 be66acbcc5d85e825abf1047b034d722.cloudfront.net (CloudFront)
20692073
X-Amz-Cf-Id:
2070-
- 9TJ3JRMGc6mUxKegR4f2HSLC_1Cfwei5QHZuicg_aLsWEJS3T6XCNg==
2074+
- JIeDUJvd8TodeetWYvcK5xBnxBTh8jvsNt8if-ZsMjTUWW4sbZ9P2A==
20712075
X-Amz-Cf-Pop:
2072-
- SFO53-C1
2076+
- LAX50-C1
20732077
X-Cache:
20742078
- Miss from cloudfront
20752079
X-Content-Type-Options:
20762080
- nosniff
20772081
X-Frame-Options:
20782082
- SAMEORIGIN
20792083
X-Request-Id:
2080-
- 9TJ3JRMGc6mUxKegR4f2HSLC_1Cfwei5QHZuicg_aLsWEJS3T6XCNg==
2084+
- JIeDUJvd8TodeetWYvcK5xBnxBTh8jvsNt8if-ZsMjTUWW4sbZ9P2A==
20812085
X-XSS-Protection:
20822086
- 1; mode=block
20832087
content-length:

tests/fixtures/vcr_cassettes/CYGNSS.yaml renamed to tests/fixtures/vcr_cassettes/TestMultipleQueries.test_get_all_more_than_2k.yaml

Lines changed: 1106 additions & 42 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)