Skip to content

Commit 94fd3fe

Browse files
committed
PR review and test_populate_empty_dataset
1 parent b7f3c40 commit 94fd3fe

5 files changed

Lines changed: 90 additions & 17 deletions

File tree

src/mdio/converters/segy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def populate_non_dim_coordinates(
291291
return dataset, drop_vars_delayed
292292

293293

294-
def get_horizontal_coordinate_unit(segy_headers: list[Dimension]) -> LengthUnitModel | None:
294+
def _get_horizontal_coordinate_unit(segy_headers: list[Dimension]) -> LengthUnitModel | None:
295295
"""Get the coordinate unit from the SEG-Y headers."""
296296
name = TraceHeaderFieldsRev0.COORDINATE_UNIT.name.upper()
297297
unit_hdr = next((c for c in segy_headers if c.name.upper() == name), None)
@@ -506,7 +506,7 @@ def segy_to_mdio( # noqa PLR0913
506506
logger.warning("MDIO__IMPORT__RAW_HEADERS is experimental and expected to change or be removed.")
507507
mdio_template = _add_raw_headers_to_template(mdio_template)
508508

509-
horizontal_unit = get_horizontal_coordinate_unit(segy_dimensions)
509+
horizontal_unit = _get_horizontal_coordinate_unit(segy_dimensions)
510510
mdio_ds: Dataset = mdio_template.build_dataset(
511511
name=mdio_template.name,
512512
sizes=grid.shape,

src/mdio/creators/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""MDIO Data creation API."""
22

3-
from mdio.creators.mdio import create_empty_mdio
3+
from mdio.creators.mdio import create_empty
44

5-
__all__ = ["create_empty_mdio"]
5+
__all__ = ["create_empty"]

src/mdio/creators/mdio.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from mdio.api.io import to_mdio
1111
from mdio.builder.template_registry import TemplateRegistry
1212
from mdio.builder.xarray_builder import to_xarray_dataset
13-
from mdio.converters.segy import get_horizontal_coordinate_unit
1413
from mdio.converters.segy import populate_dim_coordinates
1514
from mdio.converters.type_converter import to_structured_type
1615
from mdio.core.grid import Grid
@@ -25,7 +24,7 @@
2524
from mdio.core.dimension import Dimension
2625

2726

28-
def create_empty_mdio( # noqa PLR0913
27+
def create_empty( # noqa PLR0913
2928
mdio_template_name: str,
3029
dimensions: list[Dimension],
3130
output_path: UPath | Path | str,
@@ -52,12 +51,11 @@ def create_empty_mdio( # noqa PLR0913
5251

5352
header_dtype = to_structured_type(get_segy_standard(1.0).trace.header.dtype) if create_headers else None
5453
grid = Grid(dims=dimensions)
55-
horizontal_unit = get_horizontal_coordinate_unit(grid.dims)
5654
mdio_template = TemplateRegistry().get(mdio_template_name)
5755
mdio_ds: Dataset = mdio_template.build_dataset(
5856
name=mdio_template_name,
5957
sizes=grid.shape,
60-
horizontal_coord_unit=horizontal_unit,
58+
horizontal_coord_unit=None,
6159
header_dtype=header_dtype,
6260
)
6361

@@ -69,9 +67,6 @@ def create_empty_mdio( # noqa PLR0913
6967
drop_vars_delayed = []
7068
dataset, drop_vars_delayed = populate_dim_coordinates(xr_dataset, grid, drop_vars_delayed=drop_vars_delayed)
7169

72-
# Set the trace mask to indicate all traces are live (since this is an empty dataset)
73-
dataset.trace_mask.data[:] = True
74-
7570
# Create the Zarr store with the correct structure but with empty arrays
7671
to_mdio(dataset, output_path=output_path, mode="w", compute=False)
7772

tests/conftest.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ def segy_export_tmp(tmp_path_factory: pytest.TempPathFactory) -> Path:
7979
def empty_mdio_dir(tmp_path_factory: pytest.TempPathFactory) -> Path:
8080
"""Make a temp file for empty MDIO testing."""
8181
if DEBUG_MODE:
82-
tmp_dir = Path("TMP/empty_mdio")
82+
tmp_dir = Path("tmp/empty_mdio")
8383
tmp_dir.mkdir(parents=True, exist_ok=True)
84-
return tmp_path_factory.mktemp(r"empty_mdio")
84+
else:
85+
tmp_dir = tmp_path_factory.mktemp(r"empty_mdio")
86+
return tmp_dir

tests/integration/test_create_empty_mdio.py renamed to tests/integration/test_create_empty.py

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,30 @@
22

33
from __future__ import annotations
44

5+
import math
6+
from turtle import speed
57
from typing import TYPE_CHECKING
68

79
import numpy as np
810
import pytest
911
from segy.standards import get_segy_standard
1012

13+
from mdio.builder.schemas.v1.units import LengthUnitEnum, LengthUnitModel, SpeedUnitEnum, SpeedUnitModel, TimeUnitEnum, TimeUnitModel
14+
1115
if TYPE_CHECKING:
1216
from pathlib import Path
1317

1418
from xarray import Dataset as xr_Dataset
1519

20+
from mdio.builder.schemas.v1.stats import CenteredBinHistogram, SummaryStatistics
21+
from tests.integration.test_segy_roundtrip_teapot import text_header_teapot_dome
1622
from tests.integration.testing_helpers import get_values
1723
from tests.integration.testing_helpers import validate_variable
1824

1925
from mdio import __version__
20-
from mdio.api.io import open_mdio
26+
from mdio.api.io import open_mdio, to_mdio
2127
from mdio.core import Dimension
22-
from mdio.creators.mdio import create_empty_mdio
28+
from mdio.creators.mdio import create_empty
2329

2430

2531
class TestCreateEmptyPostStack3DTimeMdio:
@@ -51,7 +57,7 @@ def _validate_empty_mdio_dataset(cls, ds: xr_Dataset, has_headers: bool) -> None
5157
# Validate the trace mask (should be all True for empty dataset)
5258
validate_variable(ds, "trace_mask", (200, 300), ("inline", "crossline"), np.bool_, None, None)
5359
trace_mask = ds["trace_mask"].values
54-
assert np.all(trace_mask), "All traces should be marked as live in empty dataset"
60+
assert not np.any(trace_mask), "All traces should be marked as dead in empty dataset"
5561

5662
# Validate the amplitude data (should be empty)
5763
validate_variable(ds, "amplitude", (200, 300, 750), ("inline", "crossline", "time"), np.float32, None, None)
@@ -67,7 +73,7 @@ def _create_empty_mdio(cls, create_headers: bool, output_path: Path, overwrite:
6773
]
6874

6975
# Call create_empty_mdio
70-
create_empty_mdio(
76+
create_empty(
7177
mdio_template_name="PostStack3DTime",
7278
dimensions=dims,
7379
output_path=output_path,
@@ -166,3 +172,73 @@ def test_overwrite_behavior(self, empty_mdio_dir: Path) -> None:
166172
# Verify the garbage data was overwritten (should not exist)
167173
assert not garbage_file.exists(), "Garbage file should have been overwritten"
168174
assert not garbage_dir.exists(), "Garbage directory should have been overwritten"
175+
176+
177+
def test_populate_empty_dataset(self, mdio_with_headers: Path) -> None:
178+
"""Test showing how to populate empty dataset."""
179+
180+
# Open an empty PostStack3DTime dataset with SEG-Y 1.0 headers
181+
# NOTES:
182+
# When this empty dataset was created from the 'PostStack3DTime' template and dimensions,
183+
# * 'inline', 'crossline', and 'time' dimension coordinate variables were created and pre-populated
184+
# * 'cdp_x', 'cdp_y' non-dimensional coordinate variables were created
185+
# * 'amplitude' variable was created (the name of this variable is specified in the template)
186+
# HACK: in this example, we will use this variable to store the velocity data
187+
# * 'trace_mask' variable was created and pre-populated with 'False' fill values
188+
# (all traces are marked as dead)
189+
# * 'headers' segy trace headers variable was created (if the dataset was created with create_headers=true)
190+
# * dataset attribute called 'attributes' was created
191+
ds = open_mdio(mdio_with_headers)
192+
193+
# 1.A) Populate dataset's velocity
194+
var_name = ds.attrs["attributes"]["defaultVariableName"]
195+
velocity = ds[var_name]
196+
velocity[:5,:,:] = 1
197+
velocity[5:10,:,:] = 2
198+
velocity[50:100,:,:] = 3
199+
velocity[150:175,:,:] = -1
200+
201+
# 1.B) Populate dataset's velocity statistics (optional)
202+
nonzero_samples = np.ma.masked_invalid(velocity, copy=False)
203+
stats = SummaryStatistics(
204+
count=nonzero_samples.count(),
205+
min=nonzero_samples.min(),
206+
max=nonzero_samples.max(),
207+
sum=nonzero_samples.sum(dtype="float64"),
208+
sum_squares=(np.ma.power(nonzero_samples, 2).sum(dtype="float64")),
209+
histogram=CenteredBinHistogram(bin_centers=[], counts=[]),
210+
)
211+
velocity.attrs["statsV1"] = stats.model_dump_json()
212+
213+
# 1.C) Set coordinate and data variable units (optional)
214+
ds.time["unitsV1"] = TimeUnitModel(time=TimeUnitEnum.MILLISECOND).model_dump_json()
215+
216+
ds.cdp_x.attrs["unitsV1"] = LengthUnitModel(length=LengthUnitEnum.FOOT).model_dump_json()
217+
ds.cdp_x.attrs["unitsV1"] = LengthUnitModel(length=LengthUnitEnum.FOOT).model_dump_json()
218+
219+
velocity.attrs["unitsV1"] = SpeedUnitModel(speed=SpeedUnitEnum.FEET_PER_SECOND).model_dump_json()
220+
221+
# 3) Populate the non-dimensional coordinate variables 'cdp_x' and 'cdp_y' (optional)
222+
origin = [270000, 3290000] # survey x, y origin
223+
inline_azimuth_rad = 0.523599 # survey orientation, in radians, from the north to the east (30 degrees)
224+
spacing = [50, 50] # survey inline, crossline spacing
225+
inline_grid, xline_grid = np.meshgrid(ds.inline.values, ds.crossline.values, indexing='ij')
226+
sin_azimuth = math.sin(inline_azimuth_rad)
227+
cos_azimuth = math.cos(inline_azimuth_rad)
228+
ds.cdp_x[:] = origin[0] + inline_grid * spacing[0] * sin_azimuth + xline_grid * spacing[1] * cos_azimuth
229+
ds.cdp_y[:] = origin[1] + inline_grid * spacing[0] * cos_azimuth - xline_grid * spacing[1] * sin_azimuth
230+
231+
# 4) Populate dataset's trace mask (optional)
232+
ds.trace_mask[:] = ~np.isnan(velocity[:,:,0])
233+
234+
# 5) Populate dataset's segy trace headers, if those were created (optional)
235+
if "headers" in ds.variables:
236+
ds.headers["cdp_x"][:] = ds.cdp_x
237+
ds.headers["cdp_y"][:] = ds.cdp_y
238+
239+
# 5) Create dataset's custom attributes (optional)
240+
ds.attrs["attributes"]["createdBy"] = "John Doe"
241+
242+
output_path = mdio_with_headers.parent / "populated_empty.mdio"
243+
to_mdio(ds, output_path=output_path, mode="w", compute=True)
244+

0 commit comments

Comments
 (0)