Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@

## Latest Announcements

:mega: **01/16/2025:** `dataretrieval` now features the `waterdata` module,
**02/24/2026** The `get_gwlevels`, `get_discharge_measurements` in the `nwis` module are deprecated and have been replaced with the `get_field_measurements` function in the `waterdata` module. The `get_pmcodes` function in the `nwis` module has been replaced with the `get_reference_table(collection='parameter_code)` function.

:mega: **01/16/2026:** `dataretrieval` now features the `waterdata` module,
which provides access to USGS's modernized [Water Data
APIs](https://api.waterdata.usgs.gov/). The Water Data API endpoints include
daily values, **instantaneous values**, field measurements, time series metadata,
daily values, instantaneous values, field measurements, time series metadata, statistics,
and discrete water quality data from the [Samples database](https://waterdata.usgs.gov/download-samples/#dataProfile=site). This new module replaces the `nwis` module, which provides access to the legacy [NWIS
Water Services](https://waterservices.usgs.gov/). Take a look at the new [`waterdata` module demo notebook](demos/WaterData_demo.ipynb), which walks through an extended example using a majority of the available `waterdata` functions.

Expand Down Expand Up @@ -219,6 +221,7 @@ print(f"Found {len(flowlines)} upstream tributaries within 50km")
- **Time series metadata**: Information about available data parameters
- **Latest daily values**: Most recent daily statistical summary data
- **Latest instantaneous values**: Most recent high-frequency continuous data
- **Daily, monthly, and annual statistics**: Median, maximum, minimum, arithmetic mean, and percentile statistics
- **Samples data**: Discrete USGS water quality data

### Legacy NWIS Services (Deprecated)
Expand All @@ -227,7 +230,6 @@ print(f"Found {len(flowlines)} upstream tributaries within 50km")
- **Site info (site)**: Basic site information
- **Statistics (stat)**: Statistical summaries
- **Discharge peaks (peaks)**: Annual peak discharge events
- **Discharge measurements (measurements)**: Direct flow measurements

### Water Quality Portal
- **Results**: Water quality analytical results from USGS, EPA, and other agencies
Expand Down
245 changes: 16 additions & 229 deletions dataretrieval/nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,7 @@

WATERSERVICES_SERVICES = ["dv", "iv", "site", "stat"]
WATERDATA_SERVICES = [
"gwlevels",
"measurements",
"peaks",
"pmcodes",
"water_use",
"ratings",
]
Expand Down Expand Up @@ -152,54 +149,15 @@ def get_discharge_measurements(
**kwargs,
) -> Tuple[pd.DataFrame, BaseMetadata]:
"""
Get discharge measurements from the waterdata service.

Parameters
----------
sites: string or list of strings, optional, default is None
start: string, optional, default is None
Supply date in the format: YYYY-MM-DD
end: string, optional, default is None
Supply date in the format: YYYY-MM-DD
ssl_check: bool, optional
If True, check SSL certificates, if False, do not check SSL,
default is True
**kwargs: optional
If supplied, will be used as query parameters

Returns
-------
df: ``pandas.DataFrame``
Times series data from the NWIS JSON
md: :obj:`dataretrieval.utils.Metadata`
A custom metadata object

Examples
--------
.. doctest::

>>> # Get discharge measurements for site 05114000
>>> df, md = dataretrieval.nwis.get_discharge_measurements(
... sites="05114000", start="2000-01-01", end="2000-01-30"
... )

>>> # Get discharge measurements for sites in Alaska
>>> df, md = dataretrieval.nwis.get_discharge_measurements(
... start="2012-01-09", end="2012-01-10", stateCd="AK"
... )
Get discharge measurements from the waterdata service -
deprecated, use `get_field_measurements()` in the waterdata
module.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Get discharge measurements from the waterdata service -
deprecated, use `get_field_measurements()` in the waterdata
module.
This endpoint is defunct, use `get_field_measurements()` in the waterdata
module.

Rephrase as you like.


"""
_check_sites_value_types(sites)

kwargs["site_no"] = kwargs.pop("site_no", sites)
kwargs["begin_date"] = kwargs.pop("begin_date", start)
kwargs["end_date"] = kwargs.pop("end_date", end)

if "format" not in kwargs:
kwargs["format"] = "rdb"
raise NameError(
"`nwis.get_discharge_measurements` has been replaced with `waterdata.get_field_measurements`."
)

response = query_waterdata("measurements", ssl_check=ssl_check, **kwargs)
return _read_rdb(response.text), NWIS_Metadata(response, **kwargs)


def get_discharge_peaks(
Expand Down Expand Up @@ -281,78 +239,15 @@ def get_gwlevels(
**kwargs,
) -> Tuple[pd.DataFrame, BaseMetadata]:
"""
Queries the groundwater level service from waterservices

Parameters
----------
sites: string or list of strings, optional, default is None
If the waterdata parameter site_no is supplied, it will overwrite the
sites parameter
start: string, optional, default is '1851-01-01'
If the waterdata parameter begin_date is supplied, it will overwrite
the start parameter
end: string, optional, default is None
If the waterdata parameter end_date is supplied, it will overwrite the
end parameter (YYYY-MM-DD)
multi_index: bool, optional
If False, a dataframe with a single-level index (datetime) is returned,
default is True
datetime_index : bool, optional
If True, create a datetime index, default is True
ssl_check: bool, optional
If True, check SSL certificates, if False, do not check SSL,
default is True
**kwargs: optional
If supplied, will be used as query parameters

Returns
-------
df: ``pandas.DataFrame``
Times series data from the NWIS JSON
md: :obj:`dataretrieval.utils.Metadata`
A custom metadata object

Examples
--------
.. doctest::

>>> # Get groundwater levels for site 434400121275801
>>> df, md = dataretrieval.nwis.get_gwlevels(sites="434400121275801")
Queries the groundwater level service from waterservices -
deprecated, use `get_field_measurements()` in the waterdata
module.

"""
_check_sites_value_types(sites)

# Make kwargs backwards compatible with waterservices
# vocabulary
if "startDT" in kwargs:
kwargs["begin_date"] = kwargs.pop("startDT")
if "endDT" in kwargs:
kwargs["end_date"] = kwargs.pop("endDT")
if "sites" in kwargs:
kwargs["site_no"] = kwargs.pop("sites")
if "stateCd" in kwargs:
kwargs["state_cd"] = kwargs.pop("stateCd")

kwargs["begin_date"] = kwargs.pop("begin_date", start)
kwargs["end_date"] = kwargs.pop("end_date", end)
kwargs["site_no"] = kwargs.pop("site_no", sites)
kwargs["multi_index"] = multi_index

response = query_waterdata("gwlevels", format="rdb", ssl_check=ssl_check, **kwargs)

df = _read_rdb(response.text)

if datetime_index is True:
df = format_datetime(df, "lev_dt", "lev_tm", "lev_tz_cd")

# Filter by kwarg parameterCd because the service doesn't do it
if "parameterCd" in kwargs:
pcodes = kwargs["parameterCd"]
if isinstance(pcodes, str):
pcodes = [pcodes]
df = df[df["parameter_cd"].isin(pcodes)]
raise NameError(
"`nwis.get_gwlevels` has been replaced with `waterdata.get_field_measurements()`."
)

return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)


def get_stats(
Expand Down Expand Up @@ -785,78 +680,15 @@ def get_pmcodes(
ssl_check: bool = True,
) -> Tuple[pd.DataFrame, BaseMetadata]:
"""
Return a ``pandas.DataFrame`` containing all NWIS parameter codes.

Parameters
----------
parameterCd: string or list of strings, default is 'All'
Accepts parameter codes or names
partial: bool, optional
Default is True (partial querying). If False, the function will query
only exact matches, default is True
ssl_check: bool, optional
If True, check SSL certificates, if False, do not check SSL,
default is True

Returns
-------
df: ``pandas.DataFrame``
Data retrieved from the NWIS web service.
md: :obj:`dataretrieval.utils.Metadata`
A custom metadata object

Examples
--------
.. doctest::

>>> # Get information about the '00060' pcode
>>> df, md = dataretrieval.nwis.get_pmcodes(
... parameterCd="00060", partial=False
... )

>>> # Get information about all 'Discharge' pcodes
>>> df, md = dataretrieval.nwis.get_pmcodes(
... parameterCd="Discharge", partial=True
... )
Return a ``pandas.DataFrame`` containing all NWIS parameter codes -
deprecated, use `get_reference_table(collection="parameter-codes")`.

"""

payload = {"fmt": "rdb"}
url = PARAMCODES_URL

if isinstance(parameterCd, str): # when a single code or name is given
if parameterCd.lower() == "all":
payload.update({"group_cd": "%"})
url = ALLPARAMCODES_URL
response = query(url, payload, ssl_check=ssl_check)
return _read_rdb(response.text), NWIS_Metadata(response)

else:
parameterCd = [parameterCd]

if not isinstance(parameterCd, list):
raise TypeError(
"Parameter information (code or name) must be type string or list"
raise NameError(
"`nwis.get_pmcodes` has been replaced with `get_reference_table(collection='parameter-codes')`."
)

# Querying with a list of parameters names, codes, or mixed
return_list = []
for param in parameterCd:
if isinstance(param, str):
if partial:
param = f"%{param}%"
payload.update({"parm_nm_cd": param})
response = query(url, payload, ssl_check=ssl_check)
if len(response.text.splitlines()) < 10: # empty query
raise TypeError(
"One of the parameter codes or names entered does not"
"return any information, please try a different value"
)
return_list.append(_read_rdb(response.text))
else:
raise TypeError("Parameter information (code or name) must be type string")
return pd.concat(return_list), NWIS_Metadata(response)


def get_water_use(
years: Union[str, List[str]] = "ALL",
Expand Down Expand Up @@ -1075,10 +907,7 @@ def get_record(
- 'iv' : instantaneous data
- 'dv' : daily mean data
- 'site' : site description
- 'measurements' : discharge measurements
- 'peaks': discharge peaks
- 'gwlevels': groundwater levels
- 'pmcodes': get parameter codes
- 'water_use': get water use data
- 'ratings': get rating table
- 'stat': get statistics
Expand All @@ -1105,24 +934,9 @@ def get_record(
>>> # Get site description for site 01585200
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="site")

>>> # Get discharge measurements for site 01585200
>>> df = dataretrieval.nwis.get_record(
... sites="01585200", service="measurements"
... )

>>> # Get discharge peaks for site 01585200
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="peaks")

>>> # Get latest groundwater level for site 434400121275801
>>> df = dataretrieval.nwis.get_record(
... sites="434400121275801", service="gwlevels"
... )

>>> # Get information about the discharge parameter code
>>> df = dataretrieval.nwis.get_record(
... service="pmcodes", parameterCd="00060"
... )

>>> # Get water use data for livestock nationally in 2010
>>> df = dataretrieval.nwis.get_record(
... service="water_use", years="2010", categories="L"
Expand Down Expand Up @@ -1171,12 +985,6 @@ def get_record(
df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs)
return df

elif service == "measurements":
df, _ = get_discharge_measurements(
site_no=sites, begin_date=start, end_date=end, ssl_check=ssl_check, **kwargs
)
return df

elif service == "peaks":
df, _ = get_discharge_peaks(
site_no=sites,
Expand All @@ -1188,22 +996,6 @@ def get_record(
)
return df

elif service == "gwlevels":
df, _ = get_gwlevels(
sites=sites,
startDT=start,
endDT=end,
multi_index=multi_index,
datetime_index=datetime_index,
ssl_check=ssl_check,
**kwargs,
)
return df

elif service == "pmcodes":
df, _ = get_pmcodes(ssl_check=ssl_check, **kwargs)
return df

elif service == "water_use":
df, _ = get_water_use(state=state, ssl_check=ssl_check, **kwargs)
return df
Expand Down Expand Up @@ -1461,8 +1253,3 @@ def site_info(self) -> Optional[Tuple[pd.DataFrame, BaseMetadata]]:
else:
return None # don't set metadata site_info attribute

@property
def variable_info(self) -> Optional[Tuple[pd.DataFrame, BaseMetadata]]:
# define variable_info metadata based on parameterCd if available
if "parameterCd" in self._parameters:
return get_pmcodes(parameterCd=self._parameters["parameterCd"])
Loading