Skip to content

Commit 14a56f4

Browse files
committed
Deprecate defunct NWIS functions, update tests, and improve 5xx error handling
1 parent 5b3766a commit 14a56f4

3 files changed

Lines changed: 151 additions & 109 deletions

File tree

dataretrieval/nwis.py

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -481,11 +481,20 @@ def get_dv(
481481
kwargs["multi_index"] = multi_index
482482

483483
response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs)
484-
df = _read_json(response.json())
484+
try:
485+
df = _read_json(response.json())
486+
except Exception as e:
487+
if "<html>" in response.text.lower():
488+
raise ValueError(
489+
"Received HTML response instead of JSON. This often indicates "
490+
"that the service is currently unavailable."
491+
) from e
492+
raise e
485493

486494
return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
487495

488496

497+
489498
def get_info(ssl_check: bool = True, **kwargs) -> tuple[pd.DataFrame, BaseMetadata]:
490499
"""
491500
Get site description information from NWIS.
@@ -667,10 +676,19 @@ def get_iv(
667676
service="iv", format="json", ssl_check=ssl_check, **kwargs
668677
)
669678

670-
df = _read_json(response.json())
679+
try:
680+
df = _read_json(response.json())
681+
except Exception as e:
682+
if "<html>" in response.text.lower():
683+
raise ValueError(
684+
"Received HTML response instead of JSON. This often indicates "
685+
"that the service is currently unavailable."
686+
) from e
687+
raise e
671688
return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
672689

673690

691+
674692
def get_pmcodes(
675693
parameterCd: str | list[str] = "All",
676694
partial: bool = True,
@@ -840,11 +858,11 @@ def get_record(
840858
- 'iv' : instantaneous data
841859
- 'dv' : daily mean data
842860
- 'site' : site description
843-
- 'measurements' : discharge measurements
861+
- 'measurements' : (defunct) use `waterdata.get_field_measurements`
844862
- 'peaks': discharge peaks
845-
- 'gwlevels': groundwater levels
846-
- 'pmcodes': get parameter codes
847-
- 'water_use': get water use data
863+
- 'gwlevels': (defunct) use `waterdata.get_field_measurements`
864+
- 'pmcodes': (defunct) use `get_reference_table`
865+
- 'water_use': (defunct) defunct
848866
- 'ratings': get rating table
849867
- 'stat': get statistics
850868
ssl_check: bool, optional
@@ -870,29 +888,12 @@ def get_record(
870888
>>> # Get site description for site 01585200
871889
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="site")
872890
873-
>>> # Get discharge measurements for site 01585200
874-
>>> df = dataretrieval.nwis.get_record(
875-
... sites="01585200", service="measurements"
876-
... )
891+
>>> # Get site description for site 01585200
892+
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="site")
877893
878894
>>> # Get discharge peaks for site 01585200
879895
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="peaks")
880896
881-
>>> # Get latest groundwater level for site 434400121275801
882-
>>> df = dataretrieval.nwis.get_record(
883-
... sites="434400121275801", service="gwlevels"
884-
... )
885-
886-
>>> # Get information about the discharge parameter code
887-
>>> df = dataretrieval.nwis.get_record(
888-
... service="pmcodes", parameterCd="00060"
889-
... )
890-
891-
>>> # Get water use data for livestock nationally in 2010
892-
>>> df = dataretrieval.nwis.get_record(
893-
... service="water_use", years="2010", categories="L"
894-
... )
895-
896897
>>> # Get rating table for USGS streamgage 01585200
897898
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="ratings")
898899
@@ -907,7 +908,8 @@ def get_record(
907908
"""
908909
_check_sites_value_types(sites)
909910

910-
if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES:
911+
defunct_services = ["measurements", "gwlevels", "pmcodes", "water_use"]
912+
if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES + defunct_services:
911913
raise TypeError(f"Unrecognized service: {service}")
912914

913915
if service == "iv":
@@ -1235,4 +1237,10 @@ def site_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
12351237
def variable_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
12361238
# define variable_info metadata based on parameterCd if available
12371239
if "parameterCd" in self._parameters:
1238-
return get_pmcodes(parameterCd=self._parameters["parameterCd"])
1240+
warnings.warn(
1241+
"Accessing variable_info via NWIS_Metadata is deprecated as "
1242+
"it relies on the defunct get_pmcodes function.",
1243+
DeprecationWarning,
1244+
stacklevel=2,
1245+
)
1246+
return None

dataretrieval/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,12 @@ def query(url, payload, delimiter=",", ssl_check=True):
212212
+ f"API response reason: {_reason}. Pseudo-code example of how to "
213213
+ f"split your query: \n {_example}"
214214
)
215+
elif response.status_code in [500, 502, 503]:
216+
raise ValueError(
217+
f"Service Unavailable: {response.status_code} {response.reason}. "
218+
+ f"The service at {response.url} may be down or experiencing issues."
219+
)
220+
215221

216222
if response.text.startswith("No sites/data"):
217223
raise NoSitesError(response.url)

tests/nwis_test.py

Lines changed: 110 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,14 @@
77

88
from dataretrieval.nwis import (
99
NWIS_Metadata,
10+
get_discharge_measurements,
11+
get_gwlevels,
1012
get_info,
1113
get_iv,
14+
get_pmcodes,
15+
get_qwdata,
1216
get_record,
17+
get_water_use,
1318
preformat_peaks_response,
1419
what_sites,
1520
)
@@ -21,24 +26,69 @@
2126
SITENO_COL = "site_no"
2227

2328

24-
def test_iv_service():
25-
"""Unit test of instantaneous value service"""
29+
def _test_iv_service(requests_mock):
30+
"""Mocked test of instantaneous value service"""
2631
start = START_DATE
2732
end = END_DATE
2833
service = "iv"
2934
site = ["03339000", "05447500", "03346500"]
35+
36+
# Minimal mock response
37+
mock_url = (
38+
"https://waterservices.usgs.gov/nwis/iv?format=json&"
39+
f"startDT={start}&endDT={end}&sites=03339000%2C05447500%2C03346500"
40+
)
41+
# We use a very simple JSON structure just to satisfy the parser
42+
mock_json = {
43+
"value": {
44+
"timeSeries": [
45+
{
46+
"sourceInfo": {"siteCode": [{"value": "03339000"}]},
47+
"variable": {"variableCode": [{"value": "00060"}], "options": {"option": [{"value": "mean"}]}},
48+
"values": [{"method": [{"methodDescription": "mean"}], "value": [{"value": "1.0", "dateTime": "2018-01-24T00:00:00Z", "qualifiers": "A"}]}]
49+
},
50+
{
51+
"sourceInfo": {"siteCode": [{"value": "05447500"}]},
52+
"variable": {"variableCode": [{"value": "00060"}], "options": {"option": [{"value": "mean"}]}},
53+
"values": [{"method": [{"methodDescription": "mean"}], "value": [{"value": "2.0", "dateTime": "2018-01-24T00:00:00Z", "qualifiers": "A"}]}]
54+
}
55+
]
56+
}
57+
}
58+
59+
requests_mock.get(mock_url, json=mock_json)
60+
3061
return get_record(site, start, end, service=service)
3162

3263

33-
def test_iv_service_answer():
34-
df = test_iv_service()
64+
def test_iv_service_answer(requests_mock):
65+
df = _test_iv_service(requests_mock)
3566
# check multiindex function
3667
assert df.index.names == [
3768
SITENO_COL,
3869
DATETIME_COL,
3970
], f"iv service returned incorrect index: {df.index.names}"
4071

4172

73+
def test_nwis_service_live():
74+
"""Live sanity check of NWIS service, tolerant of 502/503."""
75+
site = "01491000"
76+
try:
77+
# Minimal query: just most recent record
78+
get_iv(sites=site)
79+
except ValueError as e:
80+
# Catch our custom 5xx error from utils.py
81+
if any(err in str(e) for err in ["502", "503", "Service Unavailable"]):
82+
pytest.skip(f"Service is currently unavailable (transient 502/503): {e}")
83+
raise e
84+
except Exception as e:
85+
# Fallback for other potential transient network issues
86+
if "Expecting value" in str(e) or "JSON" in str(e):
87+
pytest.skip(f"Service returned invalid response (likely 502/503): {e}")
88+
raise e
89+
90+
91+
4292
def test_preformat_peaks_response():
4393
# make a data frame with a "peak_dt" datetime column
4494
# it will have some nan and none values
@@ -61,82 +111,48 @@ def test_preformat_peaks_response():
61111
# incomplete date-time information
62112

63113

64-
@pytest.mark.xfail(reason="Modern service does not return incomplete dates.")
65-
def test_inc_date_01():
66-
"""Test based on GitHub Issue #47 - lack of timestamp for measurement."""
67-
site = "403451073585601"
68-
# make call expecting a warning to be thrown due to incomplete dates
69-
with pytest.warns(UserWarning) as record:
70-
df = get_record(site, "1980-01-01", "1990-01-01", service="gwlevels")
71-
72-
if len(df) == 0:
73-
pytest.skip(f"Site {site} returned no data.")
74-
75-
assert len(record) > 0
76-
# assert that there are indeed incomplete dates
77-
assert pd.isna(df.index).any()
78-
# assert that the datetime index is there
79-
assert df.index.name == "datetime"
80-
# make call without defining a datetime index and check that it isn't there
81-
df2 = get_record(
82-
site, "1980-01-01", "1990-01-01", service="gwlevels", datetime_index=False
83-
)
84-
# assert shape of both dataframes is the same (contain the same data)
85-
assert df.shape == df2.shape
86-
# assert that the datetime index is not there
87-
assert df2.index.name != "datetime"
88-
89-
90-
@pytest.mark.xfail(reason="Modern service does not return incomplete dates.")
91-
def test_inc_date_02():
92-
"""Test based on GitHub Issue #47 - lack of month, day, or time."""
93-
site = "180049066381200"
94-
# make call expecting a warning to be thrown due to incomplete dates
95-
with pytest.warns(UserWarning) as record:
96-
df = get_record(site, "1900-01-01", "2013-01-01", service="gwlevels")
97-
98-
if len(df) == 0:
99-
pytest.skip(f"Site {site} returned no data.")
100-
101-
assert len(record) > 0
102-
# assert that there are indeed incomplete dates
103-
assert pd.isna(df.index).any()
104-
# assert that the datetime index is there
105-
assert df.index.name == "datetime"
106-
# make call without defining a datetime index and check that it isn't there
107-
df2 = get_record(
108-
site, "1900-01-01", "2013-01-01", service="gwlevels", datetime_index=False
109-
)
110-
# assert shape of both dataframes is the same (contain the same data)
111-
assert df.shape == df2.shape
112-
# assert that the datetime index is not there
113-
assert df2.index.name != "datetime"
114-
115-
116-
@pytest.mark.xfail(reason="Modern service does not return incomplete dates.")
117-
def test_inc_date_03():
118-
"""Test based on GitHub Issue #47 - lack of day, and times."""
119-
site = "290000095192602"
120-
# make call expecting a warning to be thrown due to incomplete dates
121-
with pytest.warns(UserWarning) as record:
122-
df = get_record(site, "1975-01-01", "2000-01-01", service="gwlevels")
123-
124-
if len(df) == 0:
125-
pytest.skip(f"Site {site} returned no data.")
126-
127-
assert len(record) > 0
128-
# assert that there are indeed incomplete dates
129-
assert pd.isna(df.index).any()
130-
# assert that the datetime index is there
131-
assert df.index.name == "datetime"
132-
# make call without defining a datetime index and check that it isn't there
133-
df2 = get_record(
134-
site, "1975-01-01", "2000-01-01", service="gwlevels", datetime_index=False
135-
)
136-
# assert shape of both dataframes is the same (contain the same data)
137-
assert df.shape == df2.shape
138-
# assert that the datetime index is not there
139-
assert df2.index.name != "datetime"
114+
# Removed defunct gwlevels tests.
115+
116+
117+
class TestDefunct:
118+
"""Verify that defunct functions raise NameError."""
119+
120+
def test_get_qwdata_raises(self):
121+
with pytest.raises(NameError, match="get_qwdata"):
122+
get_qwdata()
123+
124+
def test_get_discharge_measurements_raises(self):
125+
with pytest.raises(NameError, match="get_discharge_measurements"):
126+
get_discharge_measurements()
127+
128+
def test_get_gwlevels_raises(self):
129+
with pytest.raises(NameError, match="get_gwlevels"):
130+
get_gwlevels()
131+
132+
def test_get_pmcodes_raises(self):
133+
with pytest.raises(NameError, match="get_pmcodes"):
134+
get_pmcodes()
135+
136+
def test_get_water_use_raises(self):
137+
with pytest.raises(NameError, match="get_water_use"):
138+
get_water_use()
139+
140+
def test_get_record_defunct_service_measurements(self):
141+
with pytest.raises(NameError, match="get_discharge_measurements"):
142+
get_record(service="measurements")
143+
144+
def test_get_record_defunct_service_gwlevels(self):
145+
with pytest.raises(NameError, match="get_gwlevels"):
146+
get_record(service="gwlevels")
147+
148+
def test_get_record_defunct_service_pmcodes(self):
149+
with pytest.raises(NameError, match="get_pmcodes"):
150+
get_record(service="pmcodes")
151+
152+
def test_get_record_defunct_service_water_use(self):
153+
with pytest.raises(NameError, match="get_water_use"):
154+
get_record(service="water_use")
155+
140156

141157

142158
class TestTZ:
@@ -211,14 +227,26 @@ def test_expandedrdb_get_info(self):
211227
assert "count_nu" not in data.columns
212228

213229

214-
def test_empty_timeseries():
230+
def test_empty_timeseries(requests_mock):
215231
"""Test based on empty case from GitHub Issue #26."""
232+
sites = "011277906"
233+
start = "2010-07-20"
234+
end = "2010-07-20"
235+
236+
mock_url = (
237+
f"https://waterservices.usgs.gov/nwis/iv?format=json&"
238+
f"startDT={start}&endDT={end}&sites={sites}"
239+
)
240+
mock_json = {"value": {"timeSeries": []}}
241+
requests_mock.get(mock_url, json=mock_json)
242+
216243
df = get_record(
217-
sites="011277906", service="iv", start="2010-07-20", end="2010-07-20"
244+
sites=sites, service="iv", start=start, end=end
218245
)
219246
assert df.empty is True
220247

221248

249+
222250
class TestMetaData:
223251
"""Tests of NWIS metadata setting,
224252

0 commit comments

Comments
 (0)