InfeRes/main.py at main · Critical-Infrastructure-Systems-Lab/InfeRes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon May 12 16:48:31 2025

@author: Hisham Eldardiry
"""


# main.py

"""
Main driver script for InfeRes: Inference of Reservoir Storage using remote sensing.
Orchestrates the full workflow:
- Download static base layers (DEM, GSW)
- Isolate reservoir from DEM
- Generate hypsometric (EAS) curve
- Extract NDWI composites
- Estimate water surface area
- Convert to storage/elevation

Dependencies: ee, pandas, modules in src/
"""

# %% Load Packages
import sys
import os
# Add the src directory to the system path
# project_root = os.path.dirname(os.path.abspath(__file__))
project_root = os.getcwd()
src_path = os.path.join(project_root, "src")
sys.path.append(src_path)


import ee
import configparser
import pandas as pd
import numpy as np
from osgeo import gdal_array
import geemap
import calendar
from datetime import datetime, timedelta


from utils import (boundary_to_geometry,setup_reservoir_folders,load_reservoir_metadata,parse_reservoir_ids)
from metadata_builder import generate_reservoir_metadata
from download_baselayers import (download_dem,download_gsw_frequency,download_gsw_extent)
from reservoir_delineation import delineate_reservoir
from reservoir_curve import generate_curve_pre_srtm, generate_curve_post_srtm
from satellite_composite import get_landsat_composite, get_sentinel_composite
from satellite_water_area import estimate_water_area
from area_postprocessing import generate_inferes_products


# %% Load config.ini
config = configparser.ConfigParser()
config.read("config.ini")

grand_ids_raw = config["Reservoir Selection"]["RESERVOIR_GRAND_ID"]
grand_ids = parse_reservoir_ids(grand_ids_raw)

ref_curves = config["Data Sources"]["REFERENCE_CURVES"]
ee_project = config["Data Sources"]["EE_PROJECT"]
start_year = int(config["Simulation Period"]["START_YEAR"])
end_year = int(config["Simulation Period"]["END_YEAR"])

# Initialize Earth Engine
# ee.Authenticate()
ee.Initialize(project=ee_project)


# %% Process InfeRes for Active Reservoirs
count_res=0
for gid in grand_ids:
    count_res=count_res+1
    print("=========================================================================================================")
    print(f"[RES {count_res}] Processing Reservoir (GRAND ID: {gid}) for the period  {start_year}-{end_year}")
    print("=========================================================================================================")
    metadata_path = os.path.join("input/reservoir_metadata")

    # Step 1: Generate metadata CSV
    print(f"[Step 1] Generating Reservoir Metadata (InfeRes Input) for GRanD ID {gid}...")
    generate_reservoir_metadata(
        grand_id=gid,
        grand_shapefile_dir=project_root+"/input/grand_dataset/",
        metadata_dir=metadata_path,
        save_plot=True
    )

    # Load reservoir metadata
    reservoirs_df = load_reservoir_metadata(metadata_path+f'/inferes_input_{gid:04d}.csv')
    res = reservoirs_df.iloc[0]  # direct access
    res_name = res.Name

    res_year = res.Year
    grand_id = res.GRAND_ID
    grand_capacity = res.GRAND_Capacity
    max_water_level = round(res.MAX_WL)
    max_water_area = round(res.MAX_AREA)
    point = [float(x) for x in res.Point.split(",")]
    boundary = [float(x) for x in res.Boundary.split(",")]
    region = boundary_to_geometry(boundary)


    # Step 2: Download DEM, Frequency, MaxExtent
    print(f"[Step 2] Downloading base layers for reservoir: {res_name} ...")
    # Create folder structure
    baselayers_dir, output_res_dir = setup_reservoir_folders(res_name)
    dem_array, dem_image = download_dem(region, os.path.join(baselayers_dir, "DEM.tif"))
    frequency =download_gsw_frequency(region, os.path.join(baselayers_dir, "frequency.tif"),dem_image)
    extent=download_gsw_extent(region, os.path.join(baselayers_dir, "max_extent.tif"),dem_image)


    # Step 3: Reservoir isolation
    print("[Step 3] Delineating reservoir boundaries...")
    delineate_reservoir(res_name, max_water_level, point, boundary, baselayers_dir,plot=False)


    # Step 4: Generate Reservoir Hypsometric Curve (Elevation–Area–Storage Relationship)
    print("[Step 4] Generating Reservoir Hypsometric Curve (Elevation–Area–Storage Relationship) ...")

    reference_curves_dir = os.path.join(project_root, ref_curves)
    if res_year <= 2000:
        generate_curve_pre_srtm(
            res_name,
            point,
            boundary,
            max_water_level,
            baselayers_dir,
            output_res_dir,
            reference_curves_dir,
            grand_id,
            grand_capacity,
            plot_curve=False
        )
    else:
        generate_curve_post_srtm(
            res_name,
            max_water_level,
            baselayers_dir,
            output_res_dir,
            plot_curve=False
        )

    # Step 5: Estimate surface area from NDWI composites & append results to output file
    print("[Step 5] Estimating Water Surface Area...")
    # Define output CSV path early so we can append during processing
    output_csv = os.path.join(output_res_dir, "inferes_area.csv")
    if os.path.exists(output_csv):
        os.remove(output_csv)
    # Initialize CSV file with header if it doesn't exist
    with open(output_csv, "w") as f:
        f.write("sensor,date,cloud_percentage,quality_flag,raw_area,clahe_area,water_cluster_area,zone_filtered_area, local_filtered_area,pre_filtering_area_km2,post_filtering_area_km2\n")

    curve_path = os.path.join(output_res_dir, "reservoir_hypsometry.csv")
    count_img=0
    for year in range(start_year, end_year + 1):
        for month in range(1, 13):
            days_in_month = calendar.monthrange(year, month)[1]
            for start_day in range(1, days_in_month, 11):
                count_img=count_img+1
                end_day = min(start_day + 10, days_in_month)
                start_date = f"{year}-{month:02d}-{start_day:02d}"
                end_date = f"{year}-{month:02d}-{end_day:02d}"
                print(count_img,f'----- Processing Satellite Images between {start_date} and {end_date}')
                landsat_comp = get_landsat_composite(start_date,end_date, region,reference_image=dem_image)
                sentinel_comp = get_sentinel_composite(start_date,end_date, region,reference_image=dem_image)

                for sensor_label, comp in zip(["landsat", "sentinel"], [landsat_comp, sentinel_comp]):
                    if comp:
                        sensor_id = "LS" if sensor_label == "landsat" else "S2"
                        cloud_pct,raw_area,clahe_area,water_cluster_area,zone_filtered_area, local_filtered_area,quality_flag = estimate_water_area(comp, region, baselayers_dir, res_name, sensor_id)

                        # Set pre_filtering_area
                        pre_filtering_area=water_cluster_area
                        post_filtering_area = local_filtered_area

                        if cloud_pct is None or pre_filtering_area is None or post_filtering_area is None:
                            print("Skipping due to invalid image or too much cloud.")
                        else:
                            # Create temporary DataFrame for the current row
                            # assign the date of the calculation to the midpoint date between start_date and end_date
                            start_dt = datetime.strptime(start_date, "%Y-%m-%d")
                            end_dt = datetime.strptime(end_date, "%Y-%m-%d")
                            mid_dt = start_dt + (end_dt - start_dt) / 2
                            mid_date = mid_dt.strftime("%Y-%m-%d")
                            row_data = {
                                "sensor": sensor_label,
                                "date": mid_date,
                                "cloud_percentage": round(cloud_pct, 2) if cloud_pct is not None else None,
                                "quality_flag": quality_flag,
                                "raw_area":raw_area,
                                "clahe_area":clahe_area,
                                "water_cluster_area":water_cluster_area,
                                "zone_filtered_area":zone_filtered_area,
                                "local_filtered_area":local_filtered_area,
                                "pre_filtering_area_km2": pre_filtering_area,
                                "post_filtering_area_km2": post_filtering_area,
                            }

                            #  Convert area to storage using post-filtering area
                            area_df = pd.DataFrame([row_data])   # create dataframe from results
                            # Append to CSV
                            area_df.to_csv(output_csv, mode="a", index=False, header=False)

    # Step 6: Postprocessing surface area
    print("[Step 6] Postprocessing Water Surface Area...")
    df_area = pd.read_csv(output_csv)
    # estimate maximum water area extent for bias correction
    # mask_path = os.path.join(baselayers_dir, "reservoir_mask.tif")
    # mask = gdal_array.LoadFile(mask_path).astype(np.float32)
    # reservoir_area_km2 = round(np.count_nonzero(mask == 1) * 0.0009, 2)
    reservoir_area_km2 =max_water_area

    # Generate levels and updated area_df with level3 & level4 area values
    products, updated_area_df = generate_inferes_products(
        df_area=df_area,
        curve_path=curve_path,
        year_of_commission=res_year,
        sim_start_year=start_year,
        sim_end_year=end_year,
        res_max_area_km2=reservoir_area_km2,
        apply_bias_correction=True,
        rolling_window=15,
    )


    # Save updated area CSV with postprocessed area columns
    updated_area_path = os.path.join(output_res_dir, "inferes_area.csv")
    updated_area_df.to_csv(updated_area_path, index=False)

    # Save full product levels
    for i in range(5):
        product_df = products[f"level{i}"]
        product_path = os.path.join(output_res_dir, f"inferes_level{i}_product.csv")
        product_df.to_csv(product_path, index=False)


    print(f"InfeRes results saved: {os.path.basename(output_csv)}")
    print(f"InfeRes completed for: {res_name}")

print("\n ✅ InfeRes completed for all reservoirs.")