Skip to content

Commit 5775533

Browse files
committed
Add IFS humidity data set
1 parent 9595e3f commit 5775533

5 files changed

Lines changed: 109 additions & 23 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ To download all the data used for the benchmark run the following commands:
1919
uv run python -m climatebenchpress.data_loader.datasets.esa_biomass_cci
2020
uv run python -m climatebenchpress.data_loader.datasets.cams
2121
uv run python -m climatebenchpress.data_loader.datasets.ifs_uncompressed
22+
uv run python -m climatebenchpress.data_loader.datasets.ifs_humidity
2223
uv run python -m climatebenchpress.data_loader.datasets.nextgems
2324
uv run python -m climatebenchpress.data_loader.datasets.cmip6.access_ta
2425
uv run python -m climatebenchpress.data_loader.datasets.cmip6.access_tos

pyproject.toml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,9 @@ dependencies = [
88
"cf-xarray~=0.10.0",
99
"cftime~=1.6.0",
1010
"dask>=2024.12.0,<2025.4",
11-
<<<<<<< HEAD
1211
"earthkit-regrid~=0.5.0",
1312
"fsspec>=2024.10.0,<2025.4",
1413
"gribscan~=0.0.14",
15-
=======
16-
"earthkit-regrid>=0.5.0",
17-
"fsspec>=2024.10.0,<2025.4",
18-
"gribscan>=0.0.14",
19-
>>>>>>> 868ed98 (Add IFS uncompressed data)
2014
"healpy~=1.18.0",
2115
# These versions need to be pinned to be compatible with the NextGEMS
2216
# catalog at https://data.nextgems-h2020.eu/online.yaml.

src/climatebenchpress/data_loader/datasets/all.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@
44
from .cmip6.all import *
55
from .era5 import *
66
from .esa_biomass_cci import *
7+
from .ifs_humidity import *
78
from .ifs_uncompressed import *
89
from .nextgems import *
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
__all__ = ["IFSHumidityDataset"]
2+
3+
import argparse
4+
from pathlib import Path
5+
6+
import xarray as xr
7+
8+
from .. import (
9+
monitor,
10+
open_downloaded_canonicalized_dataset,
11+
open_downloaded_tiny_canonicalized_dataset,
12+
)
13+
from .abc import Dataset
14+
from .ifs_uncompressed import load_hplp_data, regrid_to_regular
15+
16+
BASE_URL = "https://object-store.os-api.cci1.ecmwf.int/esiwacebucket"
17+
18+
19+
class IFSHumidityDataset(Dataset):
20+
"""Dataset for the humidity field of the uncompressed IFS data.
21+
22+
Contains data from the [hplp](https://apps.ecmwf.int/ifs-experiments/rd/hplp/)
23+
experiment from the Integrated Forecasting System (IFS) model. Crucially,
24+
this dataset contains uncompressed 64-bit floating point data.
25+
"""
26+
27+
name = "ifs-humidity"
28+
29+
@staticmethod
30+
def download(download_path: Path, progress: bool = True):
31+
donefile = download_path / "download.done"
32+
if donefile.exists():
33+
return
34+
35+
ds = load_hplp_data(leveltype="ml", gridtype="reduced_gg", step=0)
36+
ds = ds[["q"]]
37+
ds_regridded = regrid_to_regular(
38+
ds,
39+
in_grid={"grid": "O400"},
40+
out_grid={"grid": [0.25, 0.25]},
41+
)
42+
downloadfile = download_path / "ifs_humidity.zarr"
43+
with monitor.progress_bar(progress):
44+
ds_regridded.to_zarr(
45+
downloadfile, mode="w", encoding=dict(), compute=False
46+
).compute()
47+
48+
@staticmethod
49+
def open(download_path: Path) -> xr.Dataset:
50+
ds = xr.open_dataset(download_path / "ifs_humidity.zarr")
51+
ds = ds.isel(time=slice(0, 1)).chunk(-1)
52+
53+
# Needed to make the dataset CF-compliant.
54+
ds.longitude.attrs["axis"] = "X"
55+
ds.latitude.attrs["axis"] = "Y"
56+
ds.level.attrs["axis"] = "Z"
57+
ds.time.attrs["standard_name"] = "time"
58+
return ds
59+
60+
61+
if __name__ == "__main__":
62+
parser = argparse.ArgumentParser()
63+
parser.add_argument("--basepath", type=Path, default=Path())
64+
args = parser.parse_args()
65+
66+
ds = open_downloaded_canonicalized_dataset(
67+
IFSHumidityDataset, basepath=args.basepath
68+
)
69+
open_downloaded_tiny_canonicalized_dataset(
70+
IFSHumidityDataset, basepath=args.basepath
71+
)
72+
73+
for v, da in ds.items():
74+
print(f"- {v}: {da.dims}")

src/climatebenchpress/data_loader/datasets/ifs_uncompressed.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -126,33 +126,49 @@ def regrid_to_regular(ds, in_grid, out_grid):
126126
"""
127127
out_data = {var: [] for var in ds.data_vars}
128128
for var in ds.data_vars:
129+
var_has_level = "level" in ds[var].dims
129130
for time in ds.time:
130-
r = earthkit.regrid.interpolate(
131-
ds[var].sel(time=time).values,
132-
in_grid=in_grid,
133-
out_grid=out_grid,
134-
method="linear",
135-
)
136-
out_data[var].append(r)
131+
if var_has_level:
132+
level_data = []
133+
for level in ds[var].level:
134+
r = earthkit.regrid.interpolate(
135+
ds[var].sel(time=time, level=level).values,
136+
in_grid=in_grid,
137+
out_grid=out_grid,
138+
method="linear",
139+
)
140+
level_data.append(r)
141+
out_data[var].append(level_data)
142+
else:
143+
r = earthkit.regrid.interpolate(
144+
ds[var].sel(time=time).values,
145+
in_grid=in_grid,
146+
out_grid=out_grid,
147+
method="linear",
148+
)
149+
out_data[var].append(r)
137150

138151
dx = out_grid["grid"][0]
139-
assert out_grid["grid"][0] == out_grid["grid"][1], (
140-
"Only grids with equal latitude and longitude spacing are supported."
141-
)
152+
assert (
153+
out_grid["grid"][0] == out_grid["grid"][1]
154+
), "Only grids with equal latitude and longitude spacing are supported."
142155
lats = np.linspace(90, -90, int(180 / dx) + 1)
143156
lons = np.linspace(0, 360 - dx, int(360 / dx))
144157
coords = {
145158
"time": ds.time,
146159
"latitude": lats,
147160
"longitude": lons,
148161
}
149-
out_ds = xr.Dataset(
150-
{
151-
var: (("time", "latitude", "longitude"), out_data[var])
152-
for var in ds.data_vars
153-
},
154-
coords=coords,
155-
)
162+
163+
data_vars = {}
164+
for var in ds.data_vars:
165+
if "level" in ds[var].dims:
166+
coords["level"] = ds[var].level
167+
data_vars[var] = (("time", "level", "latitude", "longitude"), out_data[var])
168+
else:
169+
data_vars[var] = (("time", "latitude", "longitude"), out_data[var])
170+
171+
out_ds = xr.Dataset(data_vars, coords=coords)
156172
return out_ds
157173

158174

0 commit comments

Comments
 (0)