Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ To download all the data used for the benchmark run the following commands:
uv run python -m climatebenchpress.data_loader.datasets.esa_biomass_cci
uv run python -m climatebenchpress.data_loader.datasets.cams
uv run python -m climatebenchpress.data_loader.datasets.ifs_uncompressed
uv run python -m climatebenchpress.data_loader.datasets.ifs_humidity
uv run python -m climatebenchpress.data_loader.datasets.nextgems
uv run python -m climatebenchpress.data_loader.datasets.cmip6.access_ta
uv run python -m climatebenchpress.data_loader.datasets.cmip6.access_tos
Expand Down
1 change: 1 addition & 0 deletions src/climatebenchpress/data_loader/datasets/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
from .cmip6.all import *
from .era5 import *
from .esa_biomass_cci import *
from .ifs_humidity import *
from .ifs_uncompressed import *
from .nextgems import *
74 changes: 74 additions & 0 deletions src/climatebenchpress/data_loader/datasets/ifs_humidity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
__all__ = ["IFSHumidityDataset"]

import argparse
from pathlib import Path

import xarray as xr

from .. import (
monitor,
open_downloaded_canonicalized_dataset,
open_downloaded_tiny_canonicalized_dataset,
)
from .abc import Dataset
from .ifs_uncompressed import load_hplp_data, regrid_to_regular

BASE_URL = "https://object-store.os-api.cci1.ecmwf.int/esiwacebucket"
Comment thread
treigerm marked this conversation as resolved.
Outdated


class IFSHumidityDataset(Dataset):
"""Dataset for the humidity field of the uncompressed IFS data.

Contains data from the [hplp](https://apps.ecmwf.int/ifs-experiments/rd/hplp/)
experiment from the Integrated Forecasting System (IFS) model. Crucially,
this dataset contains uncompressed 64-bit floating point data.
"""

name = "ifs-humidity"

@staticmethod
def download(download_path: Path, progress: bool = True):
donefile = download_path / "download.done"
if donefile.exists():
return

ds = load_hplp_data(leveltype="ml", gridtype="reduced_gg", step=0)
ds = ds[["q"]]
ds_regridded = regrid_to_regular(
ds,
in_grid={"grid": "O400"},
out_grid={"grid": [0.25, 0.25]},
)
downloadfile = download_path / "ifs_humidity.zarr"
with monitor.progress_bar(progress):
ds_regridded.to_zarr(
downloadfile, mode="w", encoding=dict(), compute=False
).compute()

@staticmethod
def open(download_path: Path) -> xr.Dataset:
ds = xr.open_dataset(download_path / "ifs_humidity.zarr")
ds = ds.isel(time=slice(0, 1)).chunk(-1)

# Needed to make the dataset CF-compliant.
ds.longitude.attrs["axis"] = "X"
ds.latitude.attrs["axis"] = "Y"
ds.level.attrs["axis"] = "Z"
ds.time.attrs["standard_name"] = "time"
return ds


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--basepath", type=Path, default=Path())
args = parser.parse_args()

ds = open_downloaded_canonicalized_dataset(
IFSHumidityDataset, basepath=args.basepath
)
open_downloaded_tiny_canonicalized_dataset(
IFSHumidityDataset, basepath=args.basepath
)

for v, da in ds.items():
print(f"- {v}: {da.dims}")
44 changes: 30 additions & 14 deletions src/climatebenchpress/data_loader/datasets/ifs_uncompressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,27 @@ def regrid_to_regular(ds, in_grid, out_grid):
"""
out_data = {var: [] for var in ds.data_vars}
for var in ds.data_vars:
var_has_level = "level" in ds[var].dims
for time in ds.time:
r = earthkit.regrid.interpolate(
ds[var].sel(time=time).values,
in_grid=in_grid,
out_grid=out_grid,
method="linear",
)
out_data[var].append(r)
if var_has_level:
level_data = []
for level in ds[var].level:
r = earthkit.regrid.interpolate(
ds[var].sel(time=time, level=level).values,
in_grid=in_grid,
out_grid=out_grid,
method="linear",
)
level_data.append(r)
out_data[var].append(level_data)
else:
r = earthkit.regrid.interpolate(
ds[var].sel(time=time).values,
in_grid=in_grid,
out_grid=out_grid,
method="linear",
)
out_data[var].append(r)

dx = out_grid["grid"][0]
assert (
Expand All @@ -146,13 +159,16 @@ def regrid_to_regular(ds, in_grid, out_grid):
"latitude": lats,
"longitude": lons,
}
out_ds = xr.Dataset(
{
var: (("time", "latitude", "longitude"), out_data[var])
for var in ds.data_vars
},
coords=coords,
)

data_vars = {}
for var in ds.data_vars:
if "level" in ds[var].dims:
coords["level"] = ds[var].level
data_vars[var] = (("time", "level", "latitude", "longitude"), out_data[var])
else:
data_vars[var] = (("time", "latitude", "longitude"), out_data[var])

out_ds = xr.Dataset(data_vars, coords=coords)
return out_ds


Expand Down