Skip to content

Commit 4c46ead

Browse files
committed
merge with main, add in percentiles for max, min, median, add example
2 parents 98e3e86 + 9e64dd1 commit 4c46ead

10 files changed

Lines changed: 926 additions & 79 deletions

File tree

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,12 @@
66

77
## Latest Announcements
88

9-
:mega: **12/04/2025:** `dataretrieval` now features the new `waterdata` module,
9+
:mega: **01/16/2025:** `dataretrieval` now features the `waterdata` module,
1010
which provides access to USGS's modernized [Water Data
1111
APIs](https://api.waterdata.usgs.gov/). The Water Data API endpoints include
1212
daily values, **instantaneous values**, field measurements, time series metadata,
13-
and discrete water quality data from the Samples database. This new module will
14-
eventually replace the `nwis` module, which provides access to the legacy [NWIS
15-
Water Services](https://waterservices.usgs.gov/).
13+
and discrete water quality data from the [Samples database](https://waterdata.usgs.gov/download-samples/#dataProfile=site). This new module replaces the `nwis` module, which provides access to the legacy [NWIS
14+
Water Services](https://waterservices.usgs.gov/). Take a look at the new [`waterdata` module demo notebook](demos/WaterData_demo.ipynb), which walks through an extended example using a majority of the available `waterdata` functions.
1615

1716
Check out the [NEWS](NEWS.md) file for all updates and announcements.
1817

dataretrieval/waterdata/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
# Public API exports
1313
from .api import (
14-
_check_profiles,
1514
get_codes,
1615
get_continuous,
1716
get_daily,
@@ -43,7 +42,6 @@
4342
"get_por_stats",
4443
"get_samples",
4544
"get_time_series_metadata",
46-
"_check_profiles",
4745
"CODE_SERVICES",
4846
"SERVICES",
4947
"PROFILES",

dataretrieval/waterdata/api.py

Lines changed: 131 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@
1616
from dataretrieval.utils import BaseMetadata, to_str
1717
from dataretrieval.waterdata.types import (
1818
CODE_SERVICES,
19-
PROFILE_LOOKUP,
19+
METADATA_COLLECTIONS,
2020
PROFILES,
2121
SERVICES,
2222
)
2323
from dataretrieval.waterdata.utils import (
2424
SAMPLES_URL,
2525
get_ogc_data,
26-
get_stats_data
26+
get_stats_data,
27+
_check_profiles
2728
)
2829

2930
# Set up logger for this module
@@ -689,9 +690,13 @@ def get_time_series_metadata(
689690
parameter_name: Optional[Union[str, List[str]]] = None,
690691
properties: Optional[Union[str, List[str]]] = None,
691692
statistic_id: Optional[Union[str, List[str]]] = None,
693+
hydrologic_unit_code: Optional[Union[str, List[str]]] = None,
694+
state_name: Optional[Union[str, List[str]]] = None,
692695
last_modified: Optional[Union[str, List[str]]] = None,
693696
begin: Optional[Union[str, List[str]]] = None,
694697
end: Optional[Union[str, List[str]]] = None,
698+
begin_utc: Optional[Union[str, List[str]]] = None,
699+
end_utc: Optional[Union[str, List[str]]] = None,
695700
unit_of_measure: Optional[Union[str, List[str]]] = None,
696701
computation_period_identifier: Optional[Union[str, List[str]]] = None,
697702
computation_identifier: Optional[Union[str, List[str]]] = None,
@@ -740,6 +745,17 @@ def get_time_series_metadata(
740745
Example codes include 00001 (max), 00002 (min), and 00003 (mean).
741746
A complete list of codes and their descriptions can be found at
742747
https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html.
748+
hydrologic_unit_code : string or list of strings, optional
749+
The United States is divided and sub-divided into successively smaller
750+
hydrologic units which are classified into four levels: regions,
751+
sub-regions, accounting units, and cataloging units. The hydrologic
752+
units are arranged within each other, from the smallest (cataloging units)
753+
to the largest (regions). Each hydrologic unit is identified by a unique
754+
hydrologic unit code (HUC) consisting of two to eight digits based on the
755+
four levels of classification in the hydrologic unit system.
756+
state_name : string or list of strings, optional
757+
The name of the state or state equivalent in which the monitoring location
758+
is located.
743759
last_modified : string, optional
744760
The last time a record was refreshed in our database. This may happen
745761
due to regular operational processes and does not necessarily indicate
@@ -758,6 +774,14 @@ def get_time_series_metadata(
758774
for the last 36 hours
759775
760776
begin : string or list of strings, optional
777+
This field contains the same information as "begin_utc", but in the
778+
local time of the monitoring location. It is retained for backwards
779+
compatibility, but will be removed in V1 of these APIs.
780+
end : string or list of strings, optional
781+
This field contains the same information as "end_utc", but in the
782+
local time of the monitoring location. It is retained for backwards
783+
compatibility, but will be removed in V1 of these APIs.
784+
begin_utc : string or list of strings, optional
761785
The datetime of the earliest observation in the time series. Together
762786
with end, this field represents the period of record of a time series.
763787
Note that some time series may have large gaps in their collection
@@ -774,7 +798,7 @@ def get_time_series_metadata(
774798
* Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z"
775799
* Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours
776800
777-
end : string or list of strings, optional
801+
end_utc : string or list of strings, optional
778802
The datetime of the most recent observation in the time series. Data returned by
779803
this endpoint updates at most once per day, and potentially less frequently than
780804
that, and as such there may be more recent observations within a time series
@@ -1393,6 +1417,84 @@ def get_field_measurements(
13931417
return get_ogc_data(args, output_id, service)
13941418

13951419

1420+
def get_reference_table(
1421+
collection: str,
1422+
limit: Optional[int] = None,
1423+
query: Optional[dict] = {},
1424+
) -> Tuple[pd.DataFrame, BaseMetadata]:
1425+
"""Get metadata reference tables for the USGS Water Data API.
1426+
1427+
Reference tables provide the range of allowable values for parameter
1428+
arguments in the waterdata module.
1429+
1430+
Parameters
1431+
----------
1432+
collection : string
1433+
One of the following options: "agency-codes", "altitude-datums",
1434+
"aquifer-codes", "aquifer-types", "coordinate-accuracy-codes",
1435+
"coordinate-datum-codes", "coordinate-method-codes", "counties",
1436+
"hydrologic-unit-codes", "medium-codes", "national-aquifer-codes",
1437+
"parameter-codes", "reliability-codes", "site-types", "states",
1438+
"statistic-codes", "topographic-codes", "time-zone-codes"
1439+
limit : numeric, optional
1440+
The optional limit parameter is used to control the subset of the
1441+
selected features that should be returned in each page. The maximum
1442+
allowable limit is 50000. It may be beneficial to set this number lower
1443+
if your internet connection is spotty. The default (None) will set the
1444+
limit to the maximum allowable limit for the service.
1445+
query: dictionary, optional
1446+
The optional args parameter can be used to pass a dictionary of
1447+
query parameters to the collection API call.
1448+
1449+
Returns
1450+
-------
1451+
df : ``pandas.DataFrame`` or ``geopandas.GeoDataFrame``
1452+
Formatted data returned from the API query. The primary metadata
1453+
of each reference table will show up in the first column, where
1454+
the name of the column is the singular form of the collection name,
1455+
separated by underscores (e.g. the "medium-codes" reference table
1456+
has a column called "medium_code", which contains all possible
1457+
medium code values).
1458+
md: :obj:`dataretrieval.utils.Metadata`
1459+
A custom metadata object including the URL request and query time.
1460+
1461+
Examples
1462+
--------
1463+
.. code::
1464+
1465+
>>> # Get table of USGS parameter codes
1466+
>>> ref, md = dataretrieval.waterdata.get_reference_table(
1467+
... collection="parameter-codes"
1468+
... )
1469+
1470+
>>> # Get table of selected USGS parameter codes
1471+
>>> ref, md = dataretrieval.waterdata.get_reference_table(
1472+
... collection="parameter-codes"
1473+
... query={'id': '00001,00002'}
1474+
... )
1475+
"""
1476+
valid_code_services = get_args(METADATA_COLLECTIONS)
1477+
if collection not in valid_code_services:
1478+
raise ValueError(
1479+
f"Invalid code service: '{collection}'. "
1480+
f"Valid options are: {valid_code_services}."
1481+
)
1482+
1483+
# Give ID column the collection name with underscores
1484+
if collection.endswith("s") and collection != "counties":
1485+
output_id = f"{collection[:-1].replace('-', '_')}"
1486+
elif collection == "counties":
1487+
output_id = "county"
1488+
else:
1489+
output_id = f"{collection.replace('-', '_')}"
1490+
1491+
return get_ogc_data(
1492+
args=query,
1493+
output_id=output_id,
1494+
service=collection
1495+
)
1496+
1497+
13961498
def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame:
13971499
"""Return codes from a Samples code service.
13981500
@@ -1661,7 +1763,8 @@ def get_por_stats(
16611763
parameter_code: Optional[Union[str, list[str]]] = None,
16621764
expand_percentiles: bool = True
16631765
) -> Tuple[pd.DataFrame, BaseMetadata]:
1664-
"""Get water data statistics from the USGS Water Data API.
1766+
"""Get day-of-year and month-of-year water data statistics from the
1767+
USGS Water Data API.
16651768
This service provides endpoints for access to computations on the
16661769
historical record regarding water conditions, including minimum, maximum,
16671770
mean, median, and percentiles for day of year, month, month-year, and
@@ -1762,7 +1865,7 @@ def get_date_range_stats(
17621865
parameter_code: Optional[Union[str, list[str]]] = None,
17631866
expand_percentiles: bool = True
17641867
) -> Tuple[pd.DataFrame, BaseMetadata]:
1765-
"""Get water data statistics from the USGS Water Data API.
1868+
"""Get monthly and annual water data statistics from the USGS Water Data API.
17661869
This service provides endpoints for access to computations on the
17671870
historical record regarding water conditions, including minimum, maximum,
17681871
mean, median, and percentiles for day of year, month, month-year, and
@@ -1836,6 +1939,29 @@ def get_date_range_stats(
18361939
individual rows in the dataframe. Missing percentile values expressed
18371940
as 'nan' in the list of string values are removed from the dataframe
18381941
to save space.
1942+
1943+
Examples
1944+
--------
1945+
.. code::
1946+
1947+
>>> # Get monthly and yearly medians for streamflow at streams in Rhode Island
1948+
>>> # from calendar year 2024.
1949+
>>> df, md = dataretrieval.waterdata.get_date_range_stats(
1950+
... state_code="US:44", # State code for Rhode Island
1951+
... parameter_code="00060",
1952+
... site_type_code="ST",
1953+
... start_date="2024-01-01",
1954+
... end_date="2024-12-31",
1955+
... computation_type="median"
1956+
... )
1957+
1958+
>>> # Get monthly and yearly minimum and maximums for gage height at
1959+
>>> # a monitoring location of interest
1960+
>>> df, md = dataretrieval.waterdata.get_date_range_stats(
1961+
... monitoring_location_id="USGS-05114000",
1962+
... parameter_code="00065",
1963+
... computation_type=["minimum", "maximum"]
1964+
... )
18391965
"""
18401966
params = {
18411967
k: v
@@ -1849,31 +1975,3 @@ def get_date_range_stats(
18491975
expand_percentiles=expand_percentiles
18501976
)
18511977

1852-
1853-
def _check_profiles(
1854-
service: SERVICES,
1855-
profile: PROFILES,
1856-
) -> None:
1857-
"""Check whether a service profile is valid.
1858-
1859-
Parameters
1860-
----------
1861-
service : string
1862-
One of the service names from the "services" list.
1863-
profile : string
1864-
One of the profile names from "results_profiles",
1865-
"locations_profiles", "activities_profiles",
1866-
"projects_profiles" or "organizations_profiles".
1867-
"""
1868-
valid_services = get_args(SERVICES)
1869-
if service not in valid_services:
1870-
raise ValueError(
1871-
f"Invalid service: '{service}'. Valid options are: {valid_services}."
1872-
)
1873-
1874-
valid_profiles = PROFILE_LOOKUP[service]
1875-
if profile not in valid_profiles:
1876-
raise ValueError(
1877-
f"Invalid profile: '{profile}' for service '{service}'. "
1878-
f"Valid options are: {valid_profiles}."
1879-
)

dataretrieval/waterdata/types.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,27 @@
1111
"states",
1212
]
1313

14+
METADATA_COLLECTIONS = Literal[
15+
"agency-codes",
16+
"altitude-datums",
17+
"aquifer-codes",
18+
"aquifer-types",
19+
"coordinate-accuracy-codes",
20+
"coordinate-datum-codes",
21+
"coordinate-method-codes",
22+
"counties",
23+
"hydrologic-unit-codes",
24+
"medium-codes",
25+
"national-aquifer-codes",
26+
"parameter-codes",
27+
"reliability-codes",
28+
"site-types",
29+
"states",
30+
"statistic-codes",
31+
"topographic-codes",
32+
"time-zone-codes",
33+
]
34+
1435
SERVICES = Literal[
1536
"activities",
1637
"locations",

0 commit comments

Comments
 (0)