Skip to content

Commit 4dc9f6a

Browse files
authored
Merge pull request #207 from ehinman/add-stats-service
Add stats service
2 parents 9e64dd1 + 8ff1ae5 commit 4dc9f6a

4 files changed

Lines changed: 523 additions & 9 deletions

File tree

dataretrieval/waterdata/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
get_monitoring_locations,
2121
get_reference_table,
2222
get_samples,
23+
get_stats_date_range,
24+
get_stats_por,
2325
get_time_series_metadata,
2426
)
2527
from .types import (
@@ -39,6 +41,8 @@
3941
"get_monitoring_locations",
4042
"get_reference_table",
4143
"get_samples",
44+
"get_stats_date_range",
45+
"get_stats_por",
4246
"get_time_series_metadata",
4347
"CODE_SERVICES",
4448
"SERVICES",

dataretrieval/waterdata/api.py

Lines changed: 259 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@
2323
from dataretrieval.waterdata.utils import (
2424
SAMPLES_URL,
2525
get_ogc_data,
26-
_construct_api_requests,
27-
_walk_pages,
26+
get_stats_data,
2827
_check_profiles
2928
)
3029

@@ -1748,3 +1747,261 @@ def get_samples(
17481747

17491748
return df, BaseMetadata(response)
17501749

1750+
def get_stats_por(
1751+
approval_status: Optional[str] = None,
1752+
computation_type: Optional[Union[str, list[str]]] = None,
1753+
country_code: Optional[Union[str, list[str]]] = None,
1754+
state_code: Optional[Union[str, list[str]]] = None,
1755+
county_code: Optional[Union[str, list[str]]] = None,
1756+
start_date: Optional[str] = None,
1757+
end_date: Optional[str] = None,
1758+
monitoring_location_id: Optional[Union[str, list[str]]] = None,
1759+
page_size: int = 1000,
1760+
parent_time_series_id: Optional[Union[str, list[str]]] = None,
1761+
site_type_code: Optional[Union[str, list[str]]] = None,
1762+
site_type_name: Optional[Union[str, list[str]]] = None,
1763+
parameter_code: Optional[Union[str, list[str]]] = None,
1764+
expand_percentiles: bool = True
1765+
) -> Tuple[pd.DataFrame, BaseMetadata]:
1766+
"""Get day-of-year and month-of-year water data statistics from the
1767+
USGS Water Data API.
1768+
This service (called the "observationNormals" endpoint on api.waterdata.usgs.gov)
1769+
provides endpoints for access to computations on the historical record regarding
1770+
water conditions, including minimum, maximum, mean, median, and percentiles for
1771+
day of year and month of year. For more information regarding the calculation of
1772+
statistics and other details, please visit the Statistics documentation page:
1773+
https://waterdata.usgs.gov/statistics-documentation/.
1774+
1775+
Note: This API is under active beta development and subject to
1776+
change. Improved handling of significant figures will be
1777+
addressed in a future release.
1778+
1779+
Parameters
1780+
----------
1781+
approval_status: string, optional
1782+
Whether to include approved and/or provisional observations.
1783+
At this time, only approved observations are returned.
1784+
computation_type: string, optional
1785+
Desired statistical computation method. Available values are:
1786+
arithmetic_mean, maximum, median, minimum, percentile.
1787+
country_code: string, optional
1788+
Country query parameter. API defaults to "US".
1789+
state_code: string, optional
1790+
State query parameter. Takes the format "US:XX", where XX is
1791+
the two-digit state code. API defaults to "US:42" (Pennsylvania).
1792+
county_code: string, optional
1793+
County query parameter. Takes the format "US:XX:YYY", where XX is
1794+
the two-digit state code and YYY is the three-digit county code.
1795+
API defaults to "US:42:103" (Pennsylvania, Pike County).
1796+
start_date: string or datetime, optional
1797+
Start day for the query in the month-day format (MM-DD).
1798+
end_date: string or datetime, optional
1799+
End day for the query in the month-day format (MM-DD).
1800+
monitoring_location_id : string or list of strings, optional
1801+
A unique identifier representing a single monitoring location. This
1802+
corresponds to the id field in the monitoring-locations endpoint.
1803+
Monitoring location IDs are created by combining the agency code of the
1804+
agency responsible for the monitoring location (e.g. USGS) with the ID
1805+
number of the monitoring location (e.g. 02238500), separated by a hyphen
1806+
(e.g. USGS-02238500).
1807+
page_size : int, optional
1808+
The number of results to return per page, where one result represents a
1809+
monitoring location. The default is 1000.
1810+
parent_time_series_id: string, optional
1811+
The parent_time_series_id returns statistics tied to a particular datbase entry.
1812+
site_type_code: string, optional
1813+
Site type code query parameter. You can see a list of valid site type codes here:
1814+
https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items.
1815+
Example: "GW" (Groundwater site)
1816+
site_type_name: string, optional
1817+
Site type name query parameter. You can see a list of valid site type names here:
1818+
https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items.
1819+
Example: "Well"
1820+
parameter_code : string or list of strings, optional
1821+
Parameter codes are 5-digit codes used to identify the constituent
1822+
measured and the units of measure. A complete list of parameter codes
1823+
and associated groupings can be found at
1824+
https://help.waterdata.usgs.gov/codes-and-parameters/parameters.
1825+
expand_percentiles : boolean
1826+
Percentile data for a given day of year or month of year by default
1827+
are returned from the service as lists of string values and percentile
1828+
thresholds in the "values" and "percentiles" columns, respectively.
1829+
When `expand_percentiles` is set to True (default), each value and
1830+
percentile threshold specific to a computation id are returned as
1831+
individual rows in the dataframe, with the value reported in the
1832+
"value" column and the corresponding percentile reported in a
1833+
"percentile" column (and the "values" and "percentiles" columns
1834+
are removed). Missing percentile values expressed as 'nan' in the
1835+
list of string values are removed from the dataframe to save space.
1836+
Setting `expand_percentiles` to False retains the "values" and
1837+
"percentiles" columns produced by the service. Including
1838+
both 'percentiles' and one or more other statistics ('median',
1839+
'minimum', 'maximum', or 'arithmetic_mean') in the `computation_type`
1840+
argument will return both the "values" column, containing the list
1841+
of percentile threshold values, and a "value" column, containing
1842+
the singular summary value for the other statistics.
1843+
1844+
Examples
1845+
--------
1846+
.. code::
1847+
1848+
>>> # Get daily, monthly, and annual percentiles for streamflow at
1849+
>>> # a monitoring location of interest
1850+
>>> df, md = dataretrieval.waterdata.get_stats_por(
1851+
... monitoring_location_id="USGS-05114000",
1852+
... parameter_code="00060",
1853+
... computation_type="percentile"
1854+
... )
1855+
1856+
>>> # Get all daily and monthly statistics for the month of January
1857+
>>> # over the entire period of record for streamflow and gage height
1858+
>>> # at a monitoring location of interest
1859+
>>> df, md = dataretrieval.waterdata.get_stats_por(
1860+
... monitoring_location_id="USGS-05114000",
1861+
... parameter_code=["00060", "00065"],
1862+
... start_date="01-01",
1863+
... end_date="01-31"
1864+
... )
1865+
"""
1866+
params = {
1867+
k: v
1868+
for k, v in locals().items()
1869+
if k not in ["expand_percentiles"] and v is not None
1870+
}
1871+
1872+
return get_stats_data(
1873+
args=params,
1874+
service="observationNormals",
1875+
expand_percentiles=expand_percentiles
1876+
)
1877+
1878+
def get_stats_date_range(
1879+
approval_status: Optional[str] = None,
1880+
computation_type: Optional[Union[str, list[str]]] = None,
1881+
country_code: Optional[Union[str, list[str]]] = None,
1882+
state_code: Optional[Union[str, list[str]]] = None,
1883+
county_code: Optional[Union[str, list[str]]] = None,
1884+
start_date: Optional[str] = None,
1885+
end_date: Optional[str] = None,
1886+
monitoring_location_id: Optional[Union[str, list[str]]] = None,
1887+
page_size: int = 1000,
1888+
parent_time_series_id: Optional[Union[str, list[str]]] = None,
1889+
site_type_code: Optional[Union[str, list[str]]] = None,
1890+
site_type_name: Optional[Union[str, list[str]]] = None,
1891+
parameter_code: Optional[Union[str, list[str]]] = None,
1892+
expand_percentiles: bool = True
1893+
) -> Tuple[pd.DataFrame, BaseMetadata]:
1894+
"""Get monthly and annual water data statistics from the USGS Water Data API.
1895+
This service (called the "observationIntervals" endpoint on api.waterdata.usgs.gov)
1896+
provides endpoints for access to computations on the historical record regarding
1897+
water conditions, including minimum, maximum, mean, median, and percentiles for
1898+
month-year, and water/calendar years. For more information regarding the calculation
1899+
of statistics and other details, please visit the Statistics documentation page:
1900+
https://waterdata.usgs.gov/statistics-documentation/.
1901+
1902+
Note: This API is under active beta development and subject to
1903+
change. Improved handling of significant figures will be
1904+
addressed in a future release.
1905+
1906+
Parameters
1907+
----------
1908+
approval_status: string, optional
1909+
Whether to include approved and/or provisional observations.
1910+
At this time, only approved observations are returned.
1911+
computation_type: string, optional
1912+
Desired statistical computation method. Available values are:
1913+
arithmetic_mean, maximum, median, minimum, percentile.
1914+
country_code: string, optional
1915+
Country query parameter. API defaults to "US".
1916+
state_code: string, optional
1917+
State query parameter. Takes the format "US:XX", where XX is
1918+
the two-digit state code. API defaults to "US:42" (Pennsylvania).
1919+
county_code: string, optional
1920+
County query parameter. Takes the format "US:XX:YYY", where XX is
1921+
the two-digit state code and YYY is the three-digit county code.
1922+
API defaults to "US:42:103" (Pennsylvania, Pike County).
1923+
start_date: string or datetime, optional
1924+
Start date for the query in the year-month-day format
1925+
(YYYY-MM-DD).
1926+
end_date: string or datetime, optional
1927+
End date for the query in the year-month-day format
1928+
(YYYY-MM-DD).
1929+
monitoring_location_id : string or list of strings, optional
1930+
A unique identifier representing a single monitoring location. This
1931+
corresponds to the id field in the monitoring-locations endpoint.
1932+
Monitoring location IDs are created by combining the agency code of the
1933+
agency responsible for the monitoring location (e.g. USGS) with the ID
1934+
number of the monitoring location (e.g. 02238500), separated by a hyphen
1935+
(e.g. USGS-02238500).
1936+
page_size : int, optional
1937+
The number of results to return per page, where one result represents a
1938+
monitoring location. The default is 1000.
1939+
parent_time_series_id: string, optional
1940+
The parent_time_series_id returns statistics tied to a particular datbase entry.
1941+
site_type_code: string, optional
1942+
Site type code query parameter. You can see a list of valid site type codes here:
1943+
https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items.
1944+
Example: "GW" (Groundwater site)
1945+
site_type_name: string, optional
1946+
Site type name query parameter. You can see a list of valid site type names here:
1947+
https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items.
1948+
Example: "Well"
1949+
parameter_code : string or list of strings, optional
1950+
Parameter codes are 5-digit codes used to identify the constituent
1951+
measured and the units of measure. A complete list of parameter codes
1952+
and associated groupings can be found at
1953+
https://help.waterdata.usgs.gov/codes-and-parameters/parameters.
1954+
expand_percentiles : boolean
1955+
Percentile data for a given day of year or month of year by default
1956+
are returned from the service as lists of string values and percentile
1957+
thresholds in the "values" and "percentiles" columns, respectively.
1958+
When `expand_percentiles` is set to True (default), each value and
1959+
percentile threshold specific to a computation id are returned as
1960+
individual rows in the dataframe, with the value reported in the
1961+
"value" column and the corresponding percentile reported in a
1962+
"percentile" column (and the "values" and "percentiles" columns
1963+
are removed). Missing percentile values expressed as 'nan' in the
1964+
list of string values are removed from the dataframe to save space.
1965+
Setting `expand_percentiles` to False retains the "values" and
1966+
"percentiles" columns produced by the service. Including
1967+
both 'percentiles' and one or more other statistics ('median',
1968+
'minimum', 'maximum', or 'arithmetic_mean') in the `computation_type`
1969+
argument will return both the "values" column, containing the list
1970+
of percentile threshold values, and a "value" column, containing
1971+
the singular summary value for the other statistics.
1972+
1973+
Examples
1974+
--------
1975+
.. code::
1976+
1977+
>>> # Get monthly and yearly medians for streamflow at streams in Rhode Island
1978+
>>> # from calendar year 2024.
1979+
>>> df, md = dataretrieval.waterdata.get_stats_date_range(
1980+
... state_code="US:44", # State code for Rhode Island
1981+
... parameter_code="00060",
1982+
... site_type_code="ST",
1983+
... start_date="2024-01-01",
1984+
... end_date="2024-12-31",
1985+
... computation_type="median"
1986+
... )
1987+
1988+
>>> # Get monthly and yearly minimum and maximums for gage height at
1989+
>>> # a monitoring location of interest
1990+
>>> df, md = dataretrieval.waterdata.get_stats_date_range(
1991+
... monitoring_location_id="USGS-05114000",
1992+
... parameter_code="00065",
1993+
... computation_type=["minimum", "maximum"]
1994+
... )
1995+
"""
1996+
params = {
1997+
k: v
1998+
for k, v in locals().items()
1999+
if k not in ["expand_percentiles"] and v is not None
2000+
}
2001+
2002+
return get_stats_data(
2003+
args=params,
2004+
service="observationIntervals",
2005+
expand_percentiles=expand_percentiles
2006+
)
2007+

0 commit comments

Comments
 (0)