sdmlua
diff --git a/‎GeoGLOWS/GeoGLOWS.ipynb‎
Lines changed: 152 additions & 0 deletions b/‎GeoGLOWS/GeoGLOWS.ipynb‎
Lines changed: 152 additions & 0 deletions
@@ -0,0 +1,152 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4f8cad8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Import Libraries\n",
+    "import os\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "import s3fs\n",
+    "import xarray"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "5cda56c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Hydrotable of the HUC8 with Spatial Joined GeoGLOWS Flowlines reaches\n",
+    "hydrotable = Path('./hydrotable/fim45geoglows_12060202.csv')\n",
+    "output_dir = Path('./streamflow')\n",
+    "huc = '12060202'\n",
+    "\n",
+    "#start and end date\n",
+    "start_date = '2016-01-01'\n",
+    "end_date = '2016-12-30'\n",
+    "value_time = '2016-10-15'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4bcde6bc",
+   "metadata": {},
+   "source": [
+    "**Get all the Streamflow for all feature ID based on LINKNO within specified date**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "cf622efb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_geoglowsdatafromS3():\n",
+    "    bucket_uri = 's3://geoglows-v2-retrospective/retrospective.zarr'\n",
+    "    region_name = 'us-west-2'\n",
+    "    s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name=region_name))\n",
+    "    s3store = s3fs.S3Map(root=bucket_uri, s3=s3, check=False)\n",
+    "    \n",
+    "    #All data\n",
+    "    ds = xarray.open_zarr(s3store)\n",
+    "    return ds\n",
+    "\n",
+    "def get_rivID(hydrotable):\n",
+    "    df = pd.read_csv(hydrotable)\n",
+    "    return df\n",
+    "\n",
+    "def GetGLOWSStreamflow(start_date, end_date, value_time, hydrotable, output_dir, huc, time_column='time'):\n",
+    "    # Get the retrospective dataset\n",
+    "    ds = get_geoglowsdatafromS3()\n",
+    "    hydro_df = pd.read_csv(hydrotable)\n",
+    "    \n",
+    "    # Map LINKNO to feature_id\n",
+    "    linkno_to_featureid = hydro_df.set_index('LINKNO')['feature_id'].to_dict()\n",
+    "    riv_ids = hydro_df['LINKNO'].tolist()\n",
+    "    filtered_ds = ds['Qout'].sel(rivid=riv_ids).to_dataframe()\n",
+    "    filtered_ds.reset_index(inplace=True)\n",
+    "    filtered_ds['time'] = pd.to_datetime(filtered_ds['time'])\n",
+    "    filtered_df = filtered_ds[(filtered_ds['time'] >= start_date) & (filtered_ds['time'] <= end_date)]\n",
+    "    \n",
+    "    # Map rivid (LINKNO) to feature_id\n",
+    "    filtered_df['feature_id'] = filtered_df['rivid'].map(linkno_to_featureid)\n",
+    "    output_df = filtered_df[['feature_id', 'Qout', 'time']]\n",
+    "    \n",
+    "    output_df.rename(columns={'Qout': 'discharge'}, inplace=True)\n",
+    "    \n",
+    "    # Export the filtered data to a CSV file\n",
+    "    out_dir = Path(output_dir) / 'combinedStreamflow'\n",
+    "    out_dir.mkdir(parents=True, exist_ok=True)\n",
+    "    output_file = out_dir / f'{huc}_{start_date}_{end_date}.csv'\n",
+    "    output_df.to_csv(output_file, index=False)\n",
+    "    \n",
+    "    #Filter based on value_time\n",
+    "    value_time_df = output_df[output_df['time'] == value_time]\n",
+    "    value_time_df = value_time_df[['feature_id', 'discharge']]\n",
+    "    \n",
+    "    # Export the value_time data to a separate CSV file\n",
+    "    value_timeSTR = pd.to_datetime(value_time).strftime('%Y%m%d')\n",
+    "    value_time_file = Path(output_dir) / f'{value_timeSTR}_{huc}.csv'\n",
+    "    value_time_df.to_csv(value_time_file, index=False)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "e4515e36",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/3g/sycd83_j0fb1l3sf8r5n5j000000gn/T/ipykernel_2243/122790853.py:29: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  filtered_df['feature_id'] = filtered_df['rivid'].map(linkno_to_featureid)\n",
+      "/var/folders/3g/sycd83_j0fb1l3sf8r5n5j000000gn/T/ipykernel_2243/122790853.py:32: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  output_df.rename(columns={'Qout': 'discharge'}, inplace=True)\n"
+     ]
+    }
+   ],
+   "source": [
+    "GetGLOWSStreamflow(start_date, end_date, value_time, hydrotable, output_dir, huc)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "OWPHANDFIM",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}