|
3 | 3 | import csv |
4 | 4 | import os |
5 | 5 | from concurrent.futures import ThreadPoolExecutor |
6 | | - |
| 6 | +import geopandas as gpd |
7 | 7 | import netCDF4 as nc |
8 | 8 | import numpy as np |
| 9 | +import pandas as pd |
9 | 10 | import requests |
10 | 11 | import reverse_geocode |
| 12 | +from shapely.geometry import Point |
11 | 13 |
|
12 | 14 | from bfm_data.config import paths |
13 | 15 | from bfm_data.utils.geo import ( |
@@ -63,6 +65,10 @@ def load_region_bounding_boxes(self, region: str): |
63 | 65 | region (str): The region name (e.g., 'Europe', 'Latin America'). |
64 | 66 | """ |
65 | 67 | _, iso_codes = get_countries_by_continent(region) |
| 68 | + |
| 69 | + if 'CY' not in iso_codes: |
| 70 | + iso_codes.append('CY') |
| 71 | + |
66 | 72 | self.country_rectangles = get_bounding_boxes_for_countries(iso_codes) |
67 | 73 |
|
68 | 74 | def filter_11th_day_files(self, file_urls: list) -> list: |
@@ -140,61 +146,122 @@ def extract_ndvi_locations(self, nc_file_path: str): |
140 | 146 | lon_points = np.arange(-180, 180 + 0.25, 0.25) |
141 | 147 |
|
142 | 148 | if self.global_mode: |
143 | | - ndvi_data = {} |
144 | | - |
145 | | - for lat_val in lat_points: |
146 | | - for lon_val in lon_points: |
147 | | - i = np.abs(lat - lat_val).argmin() |
148 | | - j = np.abs(lon - lon_val).argmin() |
149 | | - ndvi_value = ndvi[i, j] |
150 | | - |
151 | | - if ndvi_value != 255 and ndvi_value > self.ndvi_threshold: |
152 | | - transformed_lon = lon_val if lon_val >= 0 else lon_val + 360 |
153 | | - coord = (lat_val, transformed_lon) |
154 | | - country = reverse_geocode.get(coord)["country"] |
155 | | - if country not in ndvi_data: |
156 | | - ndvi_data[country] = [] |
157 | | - ndvi_data[country].append( |
158 | | - (lat_val, transformed_lon, ndvi_value) |
159 | | - ) |
| 149 | + world_gdf = gpd.read_file("/projects/prjs1134/data/projects/biodt/storage/geoBoundaries/geoBoundaries CGAZ ADM0.geojson").set_crs("EPSG:4326") |
| 150 | + world_gdf["shapeName"] = world_gdf["shapeName"].str.strip() |
| 151 | + |
| 152 | + lat_points = np.arange(-90, 90.25, 0.1) |
| 153 | + lon_points = np.arange(-180, 180.25, 0.1) |
| 154 | + |
| 155 | + grid_points = [Point(lon, lat) for lat in lat_points for lon in lon_points] |
| 156 | + grid_df = pd.DataFrame({ |
| 157 | + "Latitude": [pt.y for pt in grid_points], |
| 158 | + "Longitude": [pt.x for pt in grid_points], |
| 159 | + "geometry": grid_points |
| 160 | + }) |
| 161 | + grid_gdf = gpd.GeoDataFrame(grid_df, geometry="geometry", crs="EPSG:4326") |
| 162 | + |
| 163 | + joined = gpd.sjoin(grid_gdf, world_gdf[["geometry", "shapeName"]], predicate="within", how="inner") |
| 164 | + |
| 165 | + def snap_to_grid(x, res=0.25): |
| 166 | + return np.round(x / res) * res |
| 167 | + |
| 168 | + ndvi_data = {country: [] for country in joined["shapeName"].unique()} |
| 169 | + tmp_country_data = {country: [] for country in joined["shapeName"].unique()} |
| 170 | + |
| 171 | + for _, row in joined.iterrows(): |
| 172 | + lat_val = row["Latitude"] |
| 173 | + lon_val = row["Longitude"] |
| 174 | + country = row["shapeName"] |
| 175 | + |
| 176 | + i = np.abs(lat - lat_val).argmin() |
| 177 | + j = np.abs(lon - lon_val).argmin() |
| 178 | + ndvi_value = ndvi[i, j] |
| 179 | + |
| 180 | + if ndvi_value != 255 and ndvi_value > self.ndvi_threshold: |
| 181 | + lat_025 = snap_to_grid(lat_val, 0.25) |
| 182 | + lon_025 = snap_to_grid(lon_val, 0.25) |
| 183 | + lon_025 = lon_025 if lon_025 >= 0 else lon_025 + 360 |
| 184 | + tmp_country_data[country].append((lat_025, lon_025, ndvi_value)) |
| 185 | + |
| 186 | + for country, values in tmp_country_data.items(): |
| 187 | + if values: |
| 188 | + df = pd.DataFrame(values, columns=["lat", "lon", "ndvi"]) |
| 189 | + df = df.groupby(["lat", "lon"], as_index=False)["ndvi"].mean() |
| 190 | + ndvi_data[country] = list(df.itertuples(index=False, name=None)) |
| 191 | + |
| 192 | + return month_year, ndvi_data |
160 | 193 |
|
161 | 194 | else: |
162 | | - ndvi_data = { |
163 | | - get_country_name_from_iso(country): [] |
164 | | - for country in self.country_rectangles.keys() |
| 195 | + world_gdf = gpd.read_file("/projects/prjs1134/data/projects/biodt/storage/geoBoundaries/geoBoundaries CGAZ ADM0.geojson").set_crs("EPSG:4326") |
| 196 | + world_gdf["shapeName"] = world_gdf["shapeName"].str.strip() |
| 197 | + |
| 198 | + target_names = [get_country_name_from_iso(code).strip() for code in self.country_rectangles] |
| 199 | + if "Cyprus" not in target_names: |
| 200 | + target_names.append("Cyprus") |
| 201 | + |
| 202 | + region_gdf = world_gdf[world_gdf["shapeName"].isin(target_names)].reset_index(drop=True) |
| 203 | + |
| 204 | + if "Cyprus" not in region_gdf["shapeName"].values: |
| 205 | + cyprus_row = world_gdf[world_gdf["shapeName"] == "Cyprus"] |
| 206 | + if not cyprus_row.empty: |
| 207 | + region_gdf = pd.concat([region_gdf, cyprus_row], ignore_index=True) |
| 208 | + |
| 209 | + manual_countries = { |
| 210 | + "Bosnia and Herzegovina": "Bosnia and Herzegovina", |
| 211 | + "North Macedonia": "North Macedonia", |
| 212 | + "Moldova": "Moldova" |
165 | 213 | } |
| 214 | + for country in manual_countries: |
| 215 | + if country not in region_gdf["shapeName"].values: |
| 216 | + row = world_gdf[world_gdf["shapeName"] == country] |
| 217 | + if not row.empty: |
| 218 | + region_gdf = pd.concat([region_gdf, row], ignore_index=True) |
| 219 | + |
| 220 | + lat_points = np.arange(32, 72, 0.1) |
| 221 | + lon_points = np.arange(-25, 45, 0.1) |
| 222 | + |
| 223 | + grid_points = [Point(lon, lat) for lat in lat_points for lon in lon_points] |
| 224 | + grid_df = pd.DataFrame({ |
| 225 | + "Latitude": [pt.y for pt in grid_points], |
| 226 | + "Longitude": [pt.x for pt in grid_points], |
| 227 | + "geometry": grid_points |
| 228 | + }) |
| 229 | + grid_gdf = gpd.GeoDataFrame(grid_df, geometry="geometry", crs="EPSG:4326") |
| 230 | + |
| 231 | + joined = gpd.sjoin(grid_gdf, region_gdf[["geometry", "shapeName"]], predicate="within", how="inner") |
| 232 | + |
| 233 | + matched_countries = sorted(joined["shapeName"].unique()) |
| 234 | + expected_countries = sorted(region_gdf["shapeName"].unique()) |
| 235 | + |
| 236 | + def snap_to_grid(x, res=0.25): |
| 237 | + return np.round(x / res) * res |
| 238 | + |
| 239 | + ndvi_data = {country: [] for country in expected_countries} |
| 240 | + tmp_country_data = {country: [] for country in expected_countries} |
| 241 | + |
| 242 | + for _, row in joined.iterrows(): |
| 243 | + lat_val = row["Latitude"] |
| 244 | + lon_val = row["Longitude"] |
| 245 | + country = row["shapeName"] |
| 246 | + |
| 247 | + i = np.abs(lat - lat_val).argmin() |
| 248 | + j = np.abs(lon - lon_val).argmin() |
| 249 | + ndvi_value = ndvi[i, j] |
| 250 | + |
| 251 | + if ndvi_value != 255 and ndvi_value > self.ndvi_threshold: |
| 252 | + lat_025 = snap_to_grid(lat_val, 0.25) |
| 253 | + lon_025 = snap_to_grid(lon_val, 0.25) |
| 254 | + lon_025 = lon_025 if lon_025 >= 0 else lon_025 + 360 |
| 255 | + tmp_country_data[country].append((lat_025, lon_025, ndvi_value)) |
| 256 | + |
| 257 | + for country, values in tmp_country_data.items(): |
| 258 | + if values: |
| 259 | + df = pd.DataFrame(values, columns=["lat", "lon", "ndvi"]) |
| 260 | + df = df.groupby(["lat", "lon"], as_index=False)["ndvi"].mean() |
| 261 | + ndvi_data[country] = list(df.itertuples(index=False, name=None)) |
166 | 262 |
|
167 | | - for country_iso, bbox in self.country_rectangles.items(): |
168 | | - min_lon, min_lat, max_lon, max_lat = bbox |
169 | | - |
170 | | - for lat_val in lat_points: |
171 | | - for lon_val in lon_points: |
172 | | - if min_lon > max_lon: |
173 | | - in_lon_range = lon_val >= min_lon or lon_val <= max_lon |
174 | | - else: |
175 | | - in_lon_range = min_lon <= lon_val <= max_lon |
176 | | - |
177 | | - if min_lat <= lat_val <= max_lat and in_lon_range: |
178 | | - i = np.abs(lat - lat_val).argmin() |
179 | | - j = np.abs(lon - lon_val).argmin() |
180 | | - ndvi_value = ndvi[i, j] |
181 | | - |
182 | | - if ( |
183 | | - ndvi_value != 255 |
184 | | - and ndvi_value > self.ndvi_threshold |
185 | | - ): |
186 | | - transformed_lon = ( |
187 | | - lon_val if lon_val >= 0 else lon_val + 360 |
188 | | - ) |
189 | | - country_name = get_country_name_from_iso( |
190 | | - country_iso |
191 | | - ) |
192 | | - ndvi_data[country_name].append( |
193 | | - (lat_val, transformed_lon, ndvi_value) |
194 | | - ) |
195 | | - |
196 | | - dataset.close() |
197 | | - return month_year, ndvi_data |
| 263 | + dataset.close() |
| 264 | + return month_year, ndvi_data |
198 | 265 |
|
199 | 266 | except Exception as e: |
200 | 267 | print(f"Error processing the file {nc_file_path}: {e}") |
@@ -313,12 +380,12 @@ def run_data_download( |
313 | 380 | land_dir = paths.LAND_DIR |
314 | 381 |
|
315 | 382 | if global_mode: |
316 | | - csv_file = f"{land_dir}/global_ndvi.csv" |
| 383 | + csv_file = f"{land_dir}/global_ndvi_data.csv" |
317 | 384 | elif region: |
318 | 385 | region_cleaned = region.replace(" ", "_") |
319 | | - csv_file = f"{land_dir}/{region_cleaned}_ndvi_test.csv" |
| 386 | + csv_file = f"{land_dir}/{region_cleaned}_ndvi_data.csv" |
320 | 387 | else: |
321 | | - csv_file = f"{land_dir}/default_ndvi.csv" |
| 388 | + csv_file = f"{land_dir}/default_ndvi_data.csv" |
322 | 389 |
|
323 | 390 | downloader = CopernicusLandDownloader( |
324 | 391 | links_url=links_url, |
|
0 commit comments