99import pytest
1010from segy .schema import HeaderField
1111from segy .schema import HeaderSpec
12+ from segy .schema import ScalarType
13+ from segy .standards import get_segy_standard
1214
1315from mdio .builder .schemas .v1 .units import LengthUnitEnum
1416from mdio .builder .schemas .v1 .units import LengthUnitModel
2224
2325 from xarray import Dataset as xr_Dataset
2426
27+
2528from tests .integration .testing_helpers import get_values
2629from tests .integration .testing_helpers import validate_variable
2730
3033from mdio .api .io import to_mdio
3134from mdio .builder .schemas .v1 .stats import CenteredBinHistogram
3235from mdio .builder .schemas .v1 .stats import SummaryStatistics
36+ from mdio .converters .mdio import mdio_to_segy
3337from mdio .core import Dimension
3438from mdio .creators .mdio import create_empty
3539
@@ -38,15 +42,18 @@ class TestCreateEmptyPostStack3DTimeMdio:
3842 """Tests for create_empty_mdio function."""
3943
4044 @classmethod
41- def _get_header_spec (cls ) -> HeaderSpec :
45+ def _get_customized_v10_trace_header_spec (cls ) -> HeaderSpec :
4246 """Get the header spec for the MDIO dataset."""
4347 trace_header_fields = [
44- HeaderField (name = "inline" , byte = 17 , format = "int32" ),
45- HeaderField (name = "crossline" , byte = 13 , format = "int32" ),
46- HeaderField (name = "cdp_x" , byte = 181 , format = "int32" ),
47- HeaderField (name = "cdp_y" , byte = 185 , format = "int32" ),
48+ HeaderField (name = "inline" , byte = 17 , format = ScalarType .INT32 ),
49+ HeaderField (name = "crossline" , byte = 13 , format = ScalarType .INT32 ),
50+ HeaderField (name = "cdp_x" , byte = 181 , format = ScalarType .INT32 ),
51+ HeaderField (name = "cdp_y" , byte = 185 , format = ScalarType .INT32 ),
52+ HeaderField (name = "coordinate_scalar" , byte = 71 , format = ScalarType .INT16 ),
4853 ]
49- return HeaderSpec (fields = trace_header_fields )
54+ hs : HeaderSpec = get_segy_standard (1.0 ).trace .header
55+ hs .customize (fields = trace_header_fields )
56+ return hs
5057
5158 @classmethod
5259 def _validate_empty_mdio_dataset (cls , ds : xr_Dataset , has_headers : bool ) -> None :
@@ -66,10 +73,12 @@ def _validate_empty_mdio_dataset(cls, ds: xr_Dataset, has_headers: bool) -> None
6673 if has_headers :
6774 # Validate the headers (should be empty for empty dataset)
6875 # Infer the dtype from segy_spec and ignore endianness
69- header_dtype = cls ._get_header_spec ().dtype .newbyteorder ("native" )
76+ header_dtype = cls ._get_customized_v10_trace_header_spec ().dtype .newbyteorder ("native" )
7077 validate_variable (ds , "headers" , (200 , 300 ), ("inline" , "crossline" ), header_dtype , None , None )
78+ validate_variable (ds , "segy_file_header" , (), (), np .dtype ("U1" ), None , None )
7179 else :
7280 assert "headers" not in ds .variables
81+ assert "segy_file_header" not in ds .variables
7382
7483 # Validate the trace mask (should be all True for empty dataset)
7584 validate_variable (ds , "trace_mask" , (200 , 300 ), ("inline" , "crossline" ), np .bool_ , None , None )
@@ -89,7 +98,9 @@ def _create_empty_mdio(cls, create_headers: bool, output_path: Path, overwrite:
8998 Dimension (name = "time" , coords = range (0 , 3000 , 4 )), # 0-3 seconds 4ms sample rate
9099 ]
91100
92- headers = cls ._get_header_spec () if create_headers else None
101+ # If later on, we want to export to SEG-Y, we need to provide the trace header spec.
102+ # The HeaderSpec can be either standard or customized.
103+ headers = cls ._get_customized_v10_trace_header_spec () if create_headers else None
93104 create_empty (
94105 mdio_template_name = "PostStack3DTime" ,
95106 dimensions = dims ,
@@ -167,7 +178,7 @@ def test_overwrite_behavior(self, empty_mdio_dir: Path) -> None:
167178 garbage_file = empty_mdio / "garbage.txt"
168179 garbage_file .write_text ("This is garbage data that should be overwritten" )
169180 garbage_dir = empty_mdio / "garbage_dir"
170- garbage_dir .mkdir ()
181+ garbage_dir .mkdir (exist_ok = True )
171182 (garbage_dir / "nested_garbage.txt" ).write_text ("More garbage" )
172183
173184 # Verify the directory exists with garbage data
@@ -201,7 +212,9 @@ def test_populate_empty_dataset(self, mdio_with_headers: Path) -> None:
201212 # HACK: in this example, we will use this variable to store the velocity data
202213 # * 'trace_mask' variable was created and pre-populated with 'False' fill values
203214 # (all traces are marked as dead)
204- # * 'headers' segy trace headers variable was created (if the dataset was created with headers not None)
215+ # * 'headers' and 'segy_file_header' variables were created (if the dataset was created with
216+ # headers not None). The 'headers' variable structured datatype is defined by the HeaderSpec
217+ # that was used to create the empty MDIO
205218 # * dataset attribute called 'attributes' was created
206219 ds = open_mdio (mdio_with_headers )
207220
@@ -223,7 +236,7 @@ def test_populate_empty_dataset(self, mdio_with_headers: Path) -> None:
223236 sum_squares = (np .ma .power (nonzero_samples , 2 ).sum (dtype = "float64" )),
224237 histogram = CenteredBinHistogram (bin_centers = [], counts = []),
225238 )
226- velocity .attrs ["statsV1" ] = stats .model_dump_json ( )
239+ velocity .attrs ["statsV1" ] = stats .model_dump ( mode = "json" )
227240
228241 # 3) Populate the non-dimensional coordinate variables 'cdp_x' and 'cdp_y' (optional)
229242 origin = [270000 , 3290000 ] # survey x, y origin
@@ -239,24 +252,70 @@ def test_populate_empty_dataset(self, mdio_with_headers: Path) -> None:
239252 ds .trace_mask [:] = ~ np .isnan (velocity [:, :, 0 ])
240253
241254 # 5) Set coordinate and data variable units (optional)
242- ds .time ["unitsV1" ] = TimeUnitModel (time = TimeUnitEnum .MILLISECOND ).model_dump_json ( )
255+ ds .time . attrs ["unitsV1" ] = TimeUnitModel (time = TimeUnitEnum .MILLISECOND ).model_dump ( mode = "json" )
243256
244- ds .cdp_x .attrs ["unitsV1" ] = LengthUnitModel (length = LengthUnitEnum .FOOT ).model_dump_json ( )
245- ds .cdp_x .attrs ["unitsV1" ] = LengthUnitModel (length = LengthUnitEnum .FOOT ).model_dump_json ( )
257+ ds .cdp_x .attrs ["unitsV1" ] = LengthUnitModel (length = LengthUnitEnum .FOOT ).model_dump ( mode = "json" )
258+ ds .cdp_x .attrs ["unitsV1" ] = LengthUnitModel (length = LengthUnitEnum .FOOT ).model_dump ( mode = "json" )
246259
247- velocity .attrs ["unitsV1" ] = SpeedUnitModel (speed = SpeedUnitEnum .FEET_PER_SECOND ).model_dump_json ( )
260+ velocity .attrs ["unitsV1" ] = SpeedUnitModel (speed = SpeedUnitEnum .FEET_PER_SECOND ).model_dump ( mode = "json" )
248261
249- # 6) Populate dataset's segy trace headers, if those were created (optional )
262+ # 6) Populate dataset's segy trace headers, if those were created (required only if we want to export to SEG-Y )
250263 if "headers" in ds .variables :
251- # numpy broadcasting (200, 1) array to (200, 300) array
252- ds ["headers" ].values ["inline" ] = ds .inline .values [:, np .newaxis ]
253- # numpy broadcasting (1, 300) array to (200, 300) array
254- ds ["headers" ].values ["crossline" ] = ds .crossline .values [np .newaxis , :]
255- ds ["headers" ]["cdp_x" ][:] = ds .cdp_x
256- ds ["headers" ]["cdp_y" ][:] = ds .cdp_y
264+ # Both the structured "headers" and the dummy "segy_file_header" variables are
265+ # required to enable SEG-Y to MDIO conversion
266+
267+ # Populate the structured trace "headers" variable
268+ ds ["headers" ].values ["inline" ] = inline_grid
269+ ds ["headers" ].values ["crossline" ] = xline_grid
270+ # coordinate_scalar:
271+ # Scalar to be applied to all coordinates specified in Standard Trace Header bytes
272+ # 73–88 and to bytes Trace Header 181–188 to give the real value. Scalar = 1,
273+ # ±10, ±100, ±1000, or ±10,000. If positive, scalar is used as a multiplier; if
274+ # negative, scalar is used as divisor. A value of zero is assumed to be a scalar
275+ # value of 1.
276+ ds ["headers" ].values ["coordinate_scalar" ][:] = np .int16 (- 100 )
277+ ds ["headers" ].values ["cdp_x" ][:] = np .int32 (ds .cdp_x * 100 )
278+ ds ["headers" ].values ["cdp_y" ][:] = np .int32 (ds .cdp_y * 100 )
279+
280+ # Fill its metadata (.attrs) with 'textHeader' and 'binaryHeader'.
281+ ds ["segy_file_header" ].attrs .update (
282+ {
283+ "textHeader" : "\n " .join (
284+ [
285+ "C01 BYTES 13-16: CROSSLINE " + " " * 47 ,
286+ "C02 BYTES 17-20: INLINE " + " " * 47 ,
287+ "C03 BYTES 71-74: COORDINATE SCALAR " + " " * 47 ,
288+ "C04 BYTES 181-184: CDP X " + " " * 47 ,
289+ "C05 BYTES 185-188: CDP Y " + " " * 47 ,
290+ * (f"C{ i :02d} " + " " * 77 for i in range (6 , 41 )),
291+ ]
292+ ),
293+ "binaryHeader" : {
294+ "data_sample_format" : 1 ,
295+ "sample_interval" : int (ds .time [1 ] - ds .time [0 ]),
296+ "samples_per_trace" : ds .time .size ,
297+ "segy_revision_major" : 0 ,
298+ "segy_revision_minor" : 0 ,
299+ },
300+ }
301+ )
257302
258303 # 7) Create dataset's custom attributes (optional)
259304 ds .attrs ["attributes" ]["createdBy" ] = "John Doe"
260305
261- output_path = mdio_with_headers .parent / "populated_empty.mdio"
262- to_mdio (ds , output_path = output_path , mode = "w" , compute = True )
306+ # 8) Export to MDIO
307+ output_path_mdio = mdio_with_headers .parent / "populated_empty.mdio"
308+ to_mdio (ds , output_path = output_path_mdio , mode = "w" , compute = True )
309+
310+ # 9) Convert the populated emptyMDIO to SEG-Y
311+ if "headers" in ds .variables :
312+ # Select the SEG-Y standard to use for the conversion
313+ custom_segy_spec = get_segy_standard (1.0 )
314+ # Customize to use the same HeaderSpec that was used to create the empty MDIO
315+ custom_segy_spec .trace .header = self ._get_customized_v10_trace_header_spec ()
316+ # Convert the MDIO file to SEG-Y
317+ mdio_to_segy (
318+ segy_spec = custom_segy_spec ,
319+ input_path = output_path_mdio ,
320+ output_path = mdio_with_headers .parent / "populated_empty.sgy" ,
321+ )
0 commit comments