Skip to content

Commit e1e3ce2

Browse files
committed
Add export to segy to test_populate_empty_dataset
1 parent bf2e41f commit e1e3ce2

2 files changed

Lines changed: 88 additions & 24 deletions

File tree

src/mdio/creators/mdio.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ def create_empty( # noqa PLR0913
6666
drop_vars_delayed = []
6767
dataset, drop_vars_delayed = populate_dim_coordinates(xr_dataset, grid, drop_vars_delayed=drop_vars_delayed)
6868

69+
if headers:
70+
# Since the headers were provided, the user wants to export to SEG-Y
71+
# Add a dummy segy_file_header variable used to export to SEG-Y
72+
dataset["segy_file_header"] = ((), "")
73+
6974
# Create the Zarr store with the correct structure but with empty arrays
7075
to_mdio(dataset, output_path=output_path, mode="w", compute=False)
7176

tests/integration/test_create_empty.py

Lines changed: 83 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import pytest
1010
from segy.schema import HeaderField
1111
from segy.schema import HeaderSpec
12+
from segy.schema import ScalarType
13+
from segy.standards import get_segy_standard
1214

1315
from mdio.builder.schemas.v1.units import LengthUnitEnum
1416
from mdio.builder.schemas.v1.units import LengthUnitModel
@@ -22,6 +24,7 @@
2224

2325
from xarray import Dataset as xr_Dataset
2426

27+
2528
from tests.integration.testing_helpers import get_values
2629
from tests.integration.testing_helpers import validate_variable
2730

@@ -30,6 +33,7 @@
3033
from mdio.api.io import to_mdio
3134
from mdio.builder.schemas.v1.stats import CenteredBinHistogram
3235
from mdio.builder.schemas.v1.stats import SummaryStatistics
36+
from mdio.converters.mdio import mdio_to_segy
3337
from mdio.core import Dimension
3438
from mdio.creators.mdio import create_empty
3539

@@ -38,15 +42,18 @@ class TestCreateEmptyPostStack3DTimeMdio:
3842
"""Tests for create_empty_mdio function."""
3943

4044
@classmethod
41-
def _get_header_spec(cls) -> HeaderSpec:
45+
def _get_customized_v10_trace_header_spec(cls) -> HeaderSpec:
4246
"""Get the header spec for the MDIO dataset."""
4347
trace_header_fields = [
44-
HeaderField(name="inline", byte=17, format="int32"),
45-
HeaderField(name="crossline", byte=13, format="int32"),
46-
HeaderField(name="cdp_x", byte=181, format="int32"),
47-
HeaderField(name="cdp_y", byte=185, format="int32"),
48+
HeaderField(name="inline", byte=17, format=ScalarType.INT32),
49+
HeaderField(name="crossline", byte=13, format=ScalarType.INT32),
50+
HeaderField(name="cdp_x", byte=181, format=ScalarType.INT32),
51+
HeaderField(name="cdp_y", byte=185, format=ScalarType.INT32),
52+
HeaderField(name="coordinate_scalar", byte=71, format=ScalarType.INT16),
4853
]
49-
return HeaderSpec(fields=trace_header_fields)
54+
hs: HeaderSpec = get_segy_standard(1.0).trace.header
55+
hs.customize(fields=trace_header_fields)
56+
return hs
5057

5158
@classmethod
5259
def _validate_empty_mdio_dataset(cls, ds: xr_Dataset, has_headers: bool) -> None:
@@ -66,10 +73,12 @@ def _validate_empty_mdio_dataset(cls, ds: xr_Dataset, has_headers: bool) -> None
6673
if has_headers:
6774
# Validate the headers (should be empty for empty dataset)
6875
# Infer the dtype from segy_spec and ignore endianness
69-
header_dtype = cls._get_header_spec().dtype.newbyteorder("native")
76+
header_dtype = cls._get_customized_v10_trace_header_spec().dtype.newbyteorder("native")
7077
validate_variable(ds, "headers", (200, 300), ("inline", "crossline"), header_dtype, None, None)
78+
validate_variable(ds, "segy_file_header", (), (), np.dtype("U1"), None, None)
7179
else:
7280
assert "headers" not in ds.variables
81+
assert "segy_file_header" not in ds.variables
7382

7483
# Validate the trace mask (should be all True for empty dataset)
7584
validate_variable(ds, "trace_mask", (200, 300), ("inline", "crossline"), np.bool_, None, None)
@@ -89,7 +98,9 @@ def _create_empty_mdio(cls, create_headers: bool, output_path: Path, overwrite:
8998
Dimension(name="time", coords=range(0, 3000, 4)), # 0-3 seconds 4ms sample rate
9099
]
91100

92-
headers = cls._get_header_spec() if create_headers else None
101+
# If later on, we want to export to SEG-Y, we need to provide the trace header spec.
102+
# The HeaderSpec can be either standard or customized.
103+
headers = cls._get_customized_v10_trace_header_spec() if create_headers else None
93104
create_empty(
94105
mdio_template_name="PostStack3DTime",
95106
dimensions=dims,
@@ -167,7 +178,7 @@ def test_overwrite_behavior(self, empty_mdio_dir: Path) -> None:
167178
garbage_file = empty_mdio / "garbage.txt"
168179
garbage_file.write_text("This is garbage data that should be overwritten")
169180
garbage_dir = empty_mdio / "garbage_dir"
170-
garbage_dir.mkdir()
181+
garbage_dir.mkdir(exist_ok=True)
171182
(garbage_dir / "nested_garbage.txt").write_text("More garbage")
172183

173184
# Verify the directory exists with garbage data
@@ -201,7 +212,9 @@ def test_populate_empty_dataset(self, mdio_with_headers: Path) -> None:
201212
# HACK: in this example, we will use this variable to store the velocity data
202213
# * 'trace_mask' variable was created and pre-populated with 'False' fill values
203214
# (all traces are marked as dead)
204-
# * 'headers' segy trace headers variable was created (if the dataset was created with headers not None)
215+
# * 'headers' and 'segy_file_header' variables were created (if the dataset was created with
216+
# headers not None). The 'headers' variable structured datatype is defined by the HeaderSpec
217+
# that was used to create the empty MDIO
205218
# * dataset attribute called 'attributes' was created
206219
ds = open_mdio(mdio_with_headers)
207220

@@ -223,7 +236,7 @@ def test_populate_empty_dataset(self, mdio_with_headers: Path) -> None:
223236
sum_squares=(np.ma.power(nonzero_samples, 2).sum(dtype="float64")),
224237
histogram=CenteredBinHistogram(bin_centers=[], counts=[]),
225238
)
226-
velocity.attrs["statsV1"] = stats.model_dump_json()
239+
velocity.attrs["statsV1"] = stats.model_dump(mode="json")
227240

228241
# 3) Populate the non-dimensional coordinate variables 'cdp_x' and 'cdp_y' (optional)
229242
origin = [270000, 3290000] # survey x, y origin
@@ -239,24 +252,70 @@ def test_populate_empty_dataset(self, mdio_with_headers: Path) -> None:
239252
ds.trace_mask[:] = ~np.isnan(velocity[:, :, 0])
240253

241254
# 5) Set coordinate and data variable units (optional)
242-
ds.time["unitsV1"] = TimeUnitModel(time=TimeUnitEnum.MILLISECOND).model_dump_json()
255+
ds.time.attrs["unitsV1"] = TimeUnitModel(time=TimeUnitEnum.MILLISECOND).model_dump(mode="json")
243256

244-
ds.cdp_x.attrs["unitsV1"] = LengthUnitModel(length=LengthUnitEnum.FOOT).model_dump_json()
245-
ds.cdp_x.attrs["unitsV1"] = LengthUnitModel(length=LengthUnitEnum.FOOT).model_dump_json()
257+
ds.cdp_x.attrs["unitsV1"] = LengthUnitModel(length=LengthUnitEnum.FOOT).model_dump(mode="json")
258+
ds.cdp_x.attrs["unitsV1"] = LengthUnitModel(length=LengthUnitEnum.FOOT).model_dump(mode="json")
246259

247-
velocity.attrs["unitsV1"] = SpeedUnitModel(speed=SpeedUnitEnum.FEET_PER_SECOND).model_dump_json()
260+
velocity.attrs["unitsV1"] = SpeedUnitModel(speed=SpeedUnitEnum.FEET_PER_SECOND).model_dump(mode="json")
248261

249-
# 6) Populate dataset's segy trace headers, if those were created (optional)
262+
# 6) Populate dataset's segy trace headers, if those were created (required only if we want to export to SEG-Y)
250263
if "headers" in ds.variables:
251-
# numpy broadcasting (200, 1) array to (200, 300) array
252-
ds["headers"].values["inline"] = ds.inline.values[:, np.newaxis]
253-
# numpy broadcasting (1, 300) array to (200, 300) array
254-
ds["headers"].values["crossline"] = ds.crossline.values[np.newaxis, :]
255-
ds["headers"]["cdp_x"][:] = ds.cdp_x
256-
ds["headers"]["cdp_y"][:] = ds.cdp_y
264+
# Both the structured "headers" and the dummy "segy_file_header" variables are
265+
# required to enable SEG-Y to MDIO conversion
266+
267+
# Populate the structured trace "headers" variable
268+
ds["headers"].values["inline"] = inline_grid
269+
ds["headers"].values["crossline"] = xline_grid
270+
# coordinate_scalar:
271+
# Scalar to be applied to all coordinates specified in Standard Trace Header bytes
272+
# 73–88 and to bytes Trace Header 181–188 to give the real value. Scalar = 1,
273+
# ±10, ±100, ±1000, or ±10,000. If positive, scalar is used as a multiplier; if
274+
# negative, scalar is used as divisor. A value of zero is assumed to be a scalar
275+
# value of 1.
276+
ds["headers"].values["coordinate_scalar"][:] = np.int16(-100)
277+
ds["headers"].values["cdp_x"][:] = np.int32(ds.cdp_x * 100)
278+
ds["headers"].values["cdp_y"][:] = np.int32(ds.cdp_y * 100)
279+
280+
# Fill its metadata (.attrs) with 'textHeader' and 'binaryHeader'.
281+
ds["segy_file_header"].attrs.update(
282+
{
283+
"textHeader": "\n".join(
284+
[
285+
"C01 BYTES 13-16: CROSSLINE " + " " * 47,
286+
"C02 BYTES 17-20: INLINE " + " " * 47,
287+
"C03 BYTES 71-74: COORDINATE SCALAR " + " " * 47,
288+
"C04 BYTES 181-184: CDP X " + " " * 47,
289+
"C05 BYTES 185-188: CDP Y " + " " * 47,
290+
*(f"C{i:02d}" + " " * 77 for i in range(6, 41)),
291+
]
292+
),
293+
"binaryHeader": {
294+
"data_sample_format": 1,
295+
"sample_interval": int(ds.time[1] - ds.time[0]),
296+
"samples_per_trace": ds.time.size,
297+
"segy_revision_major": 0,
298+
"segy_revision_minor": 0,
299+
},
300+
}
301+
)
257302

258303
# 7) Create dataset's custom attributes (optional)
259304
ds.attrs["attributes"]["createdBy"] = "John Doe"
260305

261-
output_path = mdio_with_headers.parent / "populated_empty.mdio"
262-
to_mdio(ds, output_path=output_path, mode="w", compute=True)
306+
# 8) Export to MDIO
307+
output_path_mdio = mdio_with_headers.parent / "populated_empty.mdio"
308+
to_mdio(ds, output_path=output_path_mdio, mode="w", compute=True)
309+
310+
# 9) Convert the populated emptyMDIO to SEG-Y
311+
if "headers" in ds.variables:
312+
# Select the SEG-Y standard to use for the conversion
313+
custom_segy_spec = get_segy_standard(1.0)
314+
# Customize to use the same HeaderSpec that was used to create the empty MDIO
315+
custom_segy_spec.trace.header = self._get_customized_v10_trace_header_spec()
316+
# Convert the MDIO file to SEG-Y
317+
mdio_to_segy(
318+
segy_spec=custom_segy_spec,
319+
input_path=output_path_mdio,
320+
output_path=mdio_with_headers.parent / "populated_empty.sgy",
321+
)

0 commit comments

Comments
 (0)