1313from .abc import Dataset
1414
1515NUM_RETRIES = 3
16- # Bounding box for an area in mainland France
17- FRANCE_BBOX = {"T" : slice (0 , 1 ), "X" : slice (202531 , 207531 ), "Y" : slice (35469 , 40469 )}
16+
17+ # Define rough bounding box coordinates for mainland France.
18+ # Format: [min_longitude, min_latitude, max_longitude, max_latitude].
19+ FRANCE_BBOX = [- 5.5 , 42.3 , 9.6 , 51.1 ]
20+
21+ # Biomass estimate for the year 2020.
22+ BIOMASS_URL = "https://dap.ceda.ac.uk/neodc/esacci/biomass/data/agb/maps/v5.01/netcdf/ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv5.01.nc"
1823
1924
2025class EsaBiomassCciDataset (Dataset ):
2126 name = "esa-biomass-cci"
2227
2328 @staticmethod
2429 def download (download_path : Path , progress : bool = True ):
25- urls = [
26- f"https://dap.ceda.ac.uk/neodc/esacci/biomass/data/agb/maps/v5.01/netcdf/ESACCI-BIOMASS-L4-AGB-MERGED-100m-{ year } -fv5.01.nc"
27- # Restrict to 2 years for now for smaller download.
28- for year in [2010 , 2015 ]
29- ]
30- for url in urls :
31- output_path = download_path / Path (url ).name
32- for _ in range (NUM_RETRIES ):
33- success = _download_netcdf (url , output_path , progress )
34- if success :
35- break
36- if not success :
37- logging .info (f"Failed to download { url } " )
38- return
30+ output_path = download_path / Path (BIOMASS_URL ).name
31+ for _ in range (NUM_RETRIES ):
32+ success = _download_netcdf (BIOMASS_URL , output_path , progress )
33+ if success :
34+ break
35+ if not success :
36+ logging .info (f"Failed to download { BIOMASS_URL } " )
37+ return
3938
4039 @staticmethod
4140 def open (download_path : Path ) -> xr .Dataset :
@@ -44,12 +43,28 @@ def open(download_path: Path) -> xr.Dataset:
4443 # Needed to make the dataset CF-compliant.
4544 ds .lon .attrs ["axis" ] = "X"
4645 ds .lat .attrs ["axis" ] = "Y"
46+ # We are constraining the dataset to mainland France to reduce its overall size.
47+ # The global snapshot would be around 20 GB, which is too large for our use case.
48+ # We chose France because it should have a fairly diverse set of biomass estimates
49+ # but the choice is overall somewhat arbitrary.
50+ ds = ds .sel (
51+ lon = slice (FRANCE_BBOX [0 ], FRANCE_BBOX [2 ]),
52+ lat = slice (FRANCE_BBOX [3 ], FRANCE_BBOX [1 ]),
53+ ).chunk (- 1 )
4754 return ds [["agb" ]]
4855
4956
5057if __name__ == "__main__" :
5158 ds = open_downloaded_canonicalized_dataset (EsaBiomassCciDataset )
52- open_downloaded_tiny_canonicalized_dataset (EsaBiomassCciDataset , slices = FRANCE_BBOX )
59+ num_lon , num_lat = ds .lon .size , ds .lat .size
60+ open_downloaded_tiny_canonicalized_dataset (
61+ EsaBiomassCciDataset ,
62+ # Use a smaller spatial subset for the tiny dataset.
63+ slices = {
64+ "X" : slice (num_lon // 2 , (num_lon // 2 ) + 500 ),
65+ "Y" : slice (num_lat // 2 , (num_lat // 2 ) + 500 ),
66+ },
67+ )
5368
5469 for v , da in ds .items ():
5570 print (f"- { v } : { da .dims } " )
0 commit comments