|
23 | 23 | from dataretrieval.waterdata.utils import ( |
24 | 24 | SAMPLES_URL, |
25 | 25 | get_ogc_data, |
26 | | - _construct_api_requests, |
27 | | - _walk_pages, |
| 26 | + get_stats_data, |
28 | 27 | _check_profiles |
29 | 28 | ) |
30 | 29 |
|
@@ -1748,3 +1747,261 @@ def get_samples( |
1748 | 1747 |
|
1749 | 1748 | return df, BaseMetadata(response) |
1750 | 1749 |
|
| 1750 | +def get_stats_por( |
| 1751 | + approval_status: Optional[str] = None, |
| 1752 | + computation_type: Optional[Union[str, list[str]]] = None, |
| 1753 | + country_code: Optional[Union[str, list[str]]] = None, |
| 1754 | + state_code: Optional[Union[str, list[str]]] = None, |
| 1755 | + county_code: Optional[Union[str, list[str]]] = None, |
| 1756 | + start_date: Optional[str] = None, |
| 1757 | + end_date: Optional[str] = None, |
| 1758 | + monitoring_location_id: Optional[Union[str, list[str]]] = None, |
| 1759 | + page_size: int = 1000, |
| 1760 | + parent_time_series_id: Optional[Union[str, list[str]]] = None, |
| 1761 | + site_type_code: Optional[Union[str, list[str]]] = None, |
| 1762 | + site_type_name: Optional[Union[str, list[str]]] = None, |
| 1763 | + parameter_code: Optional[Union[str, list[str]]] = None, |
| 1764 | + expand_percentiles: bool = True |
| 1765 | + ) -> Tuple[pd.DataFrame, BaseMetadata]: |
| 1766 | + """Get day-of-year and month-of-year water data statistics from the |
| 1767 | + USGS Water Data API. |
| 1768 | + This service (called the "observationNormals" endpoint on api.waterdata.usgs.gov) |
| 1769 | + provides endpoints for access to computations on the historical record regarding |
| 1770 | + water conditions, including minimum, maximum, mean, median, and percentiles for |
| 1771 | + day of year and month of year. For more information regarding the calculation of |
| 1772 | + statistics and other details, please visit the Statistics documentation page: |
| 1773 | + https://waterdata.usgs.gov/statistics-documentation/. |
| 1774 | + |
| 1775 | + Note: This API is under active beta development and subject to |
| 1776 | + change. Improved handling of significant figures will be |
| 1777 | + addressed in a future release. |
| 1778 | +
|
| 1779 | + Parameters |
| 1780 | + ---------- |
| 1781 | + approval_status: string, optional |
| 1782 | + Whether to include approved and/or provisional observations. |
| 1783 | + At this time, only approved observations are returned. |
| 1784 | + computation_type: string, optional |
| 1785 | + Desired statistical computation method. Available values are: |
| 1786 | + arithmetic_mean, maximum, median, minimum, percentile. |
| 1787 | + country_code: string, optional |
| 1788 | + Country query parameter. API defaults to "US". |
| 1789 | + state_code: string, optional |
| 1790 | + State query parameter. Takes the format "US:XX", where XX is |
| 1791 | + the two-digit state code. API defaults to "US:42" (Pennsylvania). |
| 1792 | + county_code: string, optional |
| 1793 | + County query parameter. Takes the format "US:XX:YYY", where XX is |
| 1794 | + the two-digit state code and YYY is the three-digit county code. |
| 1795 | + API defaults to "US:42:103" (Pennsylvania, Pike County). |
| 1796 | + start_date: string or datetime, optional |
| 1797 | + Start day for the query in the month-day format (MM-DD). |
| 1798 | + end_date: string or datetime, optional |
| 1799 | + End day for the query in the month-day format (MM-DD). |
| 1800 | + monitoring_location_id : string or list of strings, optional |
| 1801 | + A unique identifier representing a single monitoring location. This |
| 1802 | + corresponds to the id field in the monitoring-locations endpoint. |
| 1803 | + Monitoring location IDs are created by combining the agency code of the |
| 1804 | + agency responsible for the monitoring location (e.g. USGS) with the ID |
| 1805 | + number of the monitoring location (e.g. 02238500), separated by a hyphen |
| 1806 | + (e.g. USGS-02238500). |
| 1807 | + page_size : int, optional |
| 1808 | + The number of results to return per page, where one result represents a |
| 1809 | + monitoring location. The default is 1000. |
| 1810 | + parent_time_series_id: string, optional |
| 1811 | + The parent_time_series_id returns statistics tied to a particular datbase entry. |
| 1812 | + site_type_code: string, optional |
| 1813 | + Site type code query parameter. You can see a list of valid site type codes here: |
| 1814 | + https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. |
| 1815 | + Example: "GW" (Groundwater site) |
| 1816 | + site_type_name: string, optional |
| 1817 | + Site type name query parameter. You can see a list of valid site type names here: |
| 1818 | + https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. |
| 1819 | + Example: "Well" |
| 1820 | + parameter_code : string or list of strings, optional |
| 1821 | + Parameter codes are 5-digit codes used to identify the constituent |
| 1822 | + measured and the units of measure. A complete list of parameter codes |
| 1823 | + and associated groupings can be found at |
| 1824 | + https://help.waterdata.usgs.gov/codes-and-parameters/parameters. |
| 1825 | + expand_percentiles : boolean |
| 1826 | + Percentile data for a given day of year or month of year by default |
| 1827 | + are returned from the service as lists of string values and percentile |
| 1828 | + thresholds in the "values" and "percentiles" columns, respectively. |
| 1829 | + When `expand_percentiles` is set to True (default), each value and |
| 1830 | + percentile threshold specific to a computation id are returned as |
| 1831 | + individual rows in the dataframe, with the value reported in the |
| 1832 | + "value" column and the corresponding percentile reported in a |
| 1833 | + "percentile" column (and the "values" and "percentiles" columns |
| 1834 | + are removed). Missing percentile values expressed as 'nan' in the |
| 1835 | + list of string values are removed from the dataframe to save space. |
| 1836 | + Setting `expand_percentiles` to False retains the "values" and |
| 1837 | + "percentiles" columns produced by the service. Including |
| 1838 | + both 'percentiles' and one or more other statistics ('median', |
| 1839 | + 'minimum', 'maximum', or 'arithmetic_mean') in the `computation_type` |
| 1840 | + argument will return both the "values" column, containing the list |
| 1841 | + of percentile threshold values, and a "value" column, containing |
| 1842 | + the singular summary value for the other statistics. |
| 1843 | +
|
| 1844 | + Examples |
| 1845 | + -------- |
| 1846 | + .. code:: |
| 1847 | +
|
| 1848 | + >>> # Get daily, monthly, and annual percentiles for streamflow at |
| 1849 | + >>> # a monitoring location of interest |
| 1850 | + >>> df, md = dataretrieval.waterdata.get_stats_por( |
| 1851 | + ... monitoring_location_id="USGS-05114000", |
| 1852 | + ... parameter_code="00060", |
| 1853 | + ... computation_type="percentile" |
| 1854 | + ... ) |
| 1855 | +
|
| 1856 | + >>> # Get all daily and monthly statistics for the month of January |
| 1857 | + >>> # over the entire period of record for streamflow and gage height |
| 1858 | + >>> # at a monitoring location of interest |
| 1859 | + >>> df, md = dataretrieval.waterdata.get_stats_por( |
| 1860 | + ... monitoring_location_id="USGS-05114000", |
| 1861 | + ... parameter_code=["00060", "00065"], |
| 1862 | + ... start_date="01-01", |
| 1863 | + ... end_date="01-31" |
| 1864 | + ... ) |
| 1865 | + """ |
| 1866 | + params = { |
| 1867 | + k: v |
| 1868 | + for k, v in locals().items() |
| 1869 | + if k not in ["expand_percentiles"] and v is not None |
| 1870 | + } |
| 1871 | + |
| 1872 | + return get_stats_data( |
| 1873 | + args=params, |
| 1874 | + service="observationNormals", |
| 1875 | + expand_percentiles=expand_percentiles |
| 1876 | + ) |
| 1877 | + |
| 1878 | +def get_stats_date_range( |
| 1879 | + approval_status: Optional[str] = None, |
| 1880 | + computation_type: Optional[Union[str, list[str]]] = None, |
| 1881 | + country_code: Optional[Union[str, list[str]]] = None, |
| 1882 | + state_code: Optional[Union[str, list[str]]] = None, |
| 1883 | + county_code: Optional[Union[str, list[str]]] = None, |
| 1884 | + start_date: Optional[str] = None, |
| 1885 | + end_date: Optional[str] = None, |
| 1886 | + monitoring_location_id: Optional[Union[str, list[str]]] = None, |
| 1887 | + page_size: int = 1000, |
| 1888 | + parent_time_series_id: Optional[Union[str, list[str]]] = None, |
| 1889 | + site_type_code: Optional[Union[str, list[str]]] = None, |
| 1890 | + site_type_name: Optional[Union[str, list[str]]] = None, |
| 1891 | + parameter_code: Optional[Union[str, list[str]]] = None, |
| 1892 | + expand_percentiles: bool = True |
| 1893 | + ) -> Tuple[pd.DataFrame, BaseMetadata]: |
| 1894 | + """Get monthly and annual water data statistics from the USGS Water Data API. |
| 1895 | + This service (called the "observationIntervals" endpoint on api.waterdata.usgs.gov) |
| 1896 | + provides endpoints for access to computations on the historical record regarding |
| 1897 | + water conditions, including minimum, maximum, mean, median, and percentiles for |
| 1898 | + month-year, and water/calendar years. For more information regarding the calculation |
| 1899 | + of statistics and other details, please visit the Statistics documentation page: |
| 1900 | + https://waterdata.usgs.gov/statistics-documentation/. |
| 1901 | + |
| 1902 | + Note: This API is under active beta development and subject to |
| 1903 | + change. Improved handling of significant figures will be |
| 1904 | + addressed in a future release. |
| 1905 | +
|
| 1906 | + Parameters |
| 1907 | + ---------- |
| 1908 | + approval_status: string, optional |
| 1909 | + Whether to include approved and/or provisional observations. |
| 1910 | + At this time, only approved observations are returned. |
| 1911 | + computation_type: string, optional |
| 1912 | + Desired statistical computation method. Available values are: |
| 1913 | + arithmetic_mean, maximum, median, minimum, percentile. |
| 1914 | + country_code: string, optional |
| 1915 | + Country query parameter. API defaults to "US". |
| 1916 | + state_code: string, optional |
| 1917 | + State query parameter. Takes the format "US:XX", where XX is |
| 1918 | + the two-digit state code. API defaults to "US:42" (Pennsylvania). |
| 1919 | + county_code: string, optional |
| 1920 | + County query parameter. Takes the format "US:XX:YYY", where XX is |
| 1921 | + the two-digit state code and YYY is the three-digit county code. |
| 1922 | + API defaults to "US:42:103" (Pennsylvania, Pike County). |
| 1923 | + start_date: string or datetime, optional |
| 1924 | + Start date for the query in the year-month-day format |
| 1925 | + (YYYY-MM-DD). |
| 1926 | + end_date: string or datetime, optional |
| 1927 | + End date for the query in the year-month-day format |
| 1928 | + (YYYY-MM-DD). |
| 1929 | + monitoring_location_id : string or list of strings, optional |
| 1930 | + A unique identifier representing a single monitoring location. This |
| 1931 | + corresponds to the id field in the monitoring-locations endpoint. |
| 1932 | + Monitoring location IDs are created by combining the agency code of the |
| 1933 | + agency responsible for the monitoring location (e.g. USGS) with the ID |
| 1934 | + number of the monitoring location (e.g. 02238500), separated by a hyphen |
| 1935 | + (e.g. USGS-02238500). |
| 1936 | + page_size : int, optional |
| 1937 | + The number of results to return per page, where one result represents a |
| 1938 | + monitoring location. The default is 1000. |
| 1939 | + parent_time_series_id: string, optional |
| 1940 | + The parent_time_series_id returns statistics tied to a particular datbase entry. |
| 1941 | + site_type_code: string, optional |
| 1942 | + Site type code query parameter. You can see a list of valid site type codes here: |
| 1943 | + https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. |
| 1944 | + Example: "GW" (Groundwater site) |
| 1945 | + site_type_name: string, optional |
| 1946 | + Site type name query parameter. You can see a list of valid site type names here: |
| 1947 | + https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. |
| 1948 | + Example: "Well" |
| 1949 | + parameter_code : string or list of strings, optional |
| 1950 | + Parameter codes are 5-digit codes used to identify the constituent |
| 1951 | + measured and the units of measure. A complete list of parameter codes |
| 1952 | + and associated groupings can be found at |
| 1953 | + https://help.waterdata.usgs.gov/codes-and-parameters/parameters. |
| 1954 | + expand_percentiles : boolean |
| 1955 | + Percentile data for a given day of year or month of year by default |
| 1956 | + are returned from the service as lists of string values and percentile |
| 1957 | + thresholds in the "values" and "percentiles" columns, respectively. |
| 1958 | + When `expand_percentiles` is set to True (default), each value and |
| 1959 | + percentile threshold specific to a computation id are returned as |
| 1960 | + individual rows in the dataframe, with the value reported in the |
| 1961 | + "value" column and the corresponding percentile reported in a |
| 1962 | + "percentile" column (and the "values" and "percentiles" columns |
| 1963 | + are removed). Missing percentile values expressed as 'nan' in the |
| 1964 | + list of string values are removed from the dataframe to save space. |
| 1965 | + Setting `expand_percentiles` to False retains the "values" and |
| 1966 | + "percentiles" columns produced by the service. Including |
| 1967 | + both 'percentiles' and one or more other statistics ('median', |
| 1968 | + 'minimum', 'maximum', or 'arithmetic_mean') in the `computation_type` |
| 1969 | + argument will return both the "values" column, containing the list |
| 1970 | + of percentile threshold values, and a "value" column, containing |
| 1971 | + the singular summary value for the other statistics. |
| 1972 | +
|
| 1973 | + Examples |
| 1974 | + -------- |
| 1975 | + .. code:: |
| 1976 | +
|
| 1977 | + >>> # Get monthly and yearly medians for streamflow at streams in Rhode Island |
| 1978 | + >>> # from calendar year 2024. |
| 1979 | + >>> df, md = dataretrieval.waterdata.get_stats_date_range( |
| 1980 | + ... state_code="US:44", # State code for Rhode Island |
| 1981 | + ... parameter_code="00060", |
| 1982 | + ... site_type_code="ST", |
| 1983 | + ... start_date="2024-01-01", |
| 1984 | + ... end_date="2024-12-31", |
| 1985 | + ... computation_type="median" |
| 1986 | + ... ) |
| 1987 | +
|
| 1988 | + >>> # Get monthly and yearly minimum and maximums for gage height at |
| 1989 | + >>> # a monitoring location of interest |
| 1990 | + >>> df, md = dataretrieval.waterdata.get_stats_date_range( |
| 1991 | + ... monitoring_location_id="USGS-05114000", |
| 1992 | + ... parameter_code="00065", |
| 1993 | + ... computation_type=["minimum", "maximum"] |
| 1994 | + ... ) |
| 1995 | + """ |
| 1996 | + params = { |
| 1997 | + k: v |
| 1998 | + for k, v in locals().items() |
| 1999 | + if k not in ["expand_percentiles"] and v is not None |
| 2000 | + } |
| 2001 | + |
| 2002 | + return get_stats_data( |
| 2003 | + args=params, |
| 2004 | + service="observationIntervals", |
| 2005 | + expand_percentiles=expand_percentiles |
| 2006 | + ) |
| 2007 | + |
0 commit comments