Skip to content

Commit 22e1784

Browse files
markspecdmitriyrepinBrianMichelltasansal
authored
Add Seismic3DStreamerFieldRecords template (#675)
* Add template for efficient storage of pre-stack data. * Resolve linting issues in seismic_prestack.py * Add test to cover prestack template. * Linting update. * Fix docsting for SeismicPreStackTemplate. * Adjust PreStackGathers3DTime template * touch * Precomit * Fix unit tests * Fix import * Update attribute name * Rename source files * Rename template name * Update todo message * Alignment with current unit testing standards * Use more correct template name * pre-commit * Update doc string * rename streamer field data template * fix broken template tests * fix broken test * update docstring * add whole survey to docstring * modify for new dim names * fix geometry calculation by adding the new `shot_index` field * lint * update `shot_index` dtype to uint32 and add `calculated_dims` property * exclude `calculated_dimension_names` from `horizontal_coordinates` * update test cases and geometry calculation to reflect changes in cable and shot indexing * header -> grid * fix shot index calculation --------- Co-authored-by: Dmitriy Repin <drepin@hotmail.com> Co-authored-by: BrianMichell <brianm314@comcast.net> Co-authored-by: Altay Sansal <13684161+tasansal@users.noreply.github.com> Co-authored-by: Altay Sansal <tasansal@users.noreply.github.com>
1 parent 6d2d598 commit 22e1784

10 files changed

Lines changed: 358 additions & 54 deletions

File tree

src/mdio/builder/template_registry.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from mdio.builder.templates.seismic_3d_cdp import Seismic3DCdpGathersTemplate
2727
from mdio.builder.templates.seismic_3d_coca import Seismic3DCocaGathersTemplate
2828
from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate
29+
from mdio.builder.templates.seismic_3d_streamer_field import Seismic3DStreamerFieldRecordsTemplate
2930
from mdio.builder.templates.seismic_3d_streamer_shot import Seismic3DStreamerShotGathersTemplate
3031

3132
if TYPE_CHECKING:
@@ -135,6 +136,7 @@ def _register_default_templates(self) -> None:
135136
# Field (shot) data
136137
self.register(Seismic2DStreamerShotGathersTemplate())
137138
self.register(Seismic3DStreamerShotGathersTemplate())
139+
self.register(Seismic3DStreamerFieldRecordsTemplate())
138140

139141
def get(self, template_name: str) -> AbstractDatasetTemplate:
140142
"""Get an instance of a template from the registry by its name.

src/mdio/builder/templates/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def __init__(self, data_domain: SeismicDataDomain) -> None:
3939
raise ValueError(msg)
4040

4141
self._dim_names: tuple[str, ...] = ()
42+
self._calculated_dims: tuple[str, ...] = ()
4243
self._physical_coord_names: tuple[str, ...] = ()
4344
self._logical_coord_names: tuple[str, ...] = ()
4445
self._var_chunk_shape: tuple[int, ...] = ()
@@ -130,6 +131,11 @@ def dimension_names(self) -> tuple[str, ...]:
130131
"""Returns the names of the dimensions."""
131132
return copy.deepcopy(self._dim_names)
132133

134+
@property
135+
def calculated_dimension_names(self) -> tuple[str, ...]:
136+
"""Returns the names of the dimensions."""
137+
return copy.deepcopy(self._calculated_dims)
138+
133139
@property
134140
def physical_coordinate_names(self) -> tuple[str, ...]:
135141
"""Returns the names of the physical (world) coordinates."""
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
"""Seismic3DStreamerFieldRecordsTemplate MDIO v1 dataset templates."""
2+
3+
from typing import Any
4+
5+
from mdio.builder.schemas.dtype import ScalarType
6+
from mdio.builder.schemas.v1.variable import CoordinateMetadata
7+
from mdio.builder.templates.base import AbstractDatasetTemplate
8+
from mdio.builder.templates.types import SeismicDataDomain
9+
10+
11+
class Seismic3DStreamerFieldRecordsTemplate(AbstractDatasetTemplate):
12+
"""Seismic 3D streamer shot field records template.
13+
14+
A generalized template for streamer field records that are optimized for:
15+
- Common-shot access
16+
- Common-channel access
17+
18+
It can also store all the shot-lines of a survey in one MDIO if needed.
19+
20+
Args:
21+
data_domain: The domain of the dataset.
22+
"""
23+
24+
def __init__(self, data_domain: SeismicDataDomain = "time"):
25+
super().__init__(data_domain=data_domain)
26+
27+
self._spatial_dim_names = ("sail_line", "gun", "shot_index", "cable", "channel")
28+
self._calculated_dims = ("shot_index",)
29+
self._dim_names = (*self._spatial_dim_names, self._data_domain)
30+
self._physical_coord_names = ("source_coord_x", "source_coord_y", "group_coord_x", "group_coord_y")
31+
self._logical_coord_names = ("shot_point", "orig_field_record_num") # ffid
32+
self._var_chunk_shape = (1, 1, 16, 1, 32, 1024)
33+
34+
@property
35+
def _name(self) -> str:
36+
return "StreamerFieldRecords3D"
37+
38+
def _load_dataset_attributes(self) -> dict[str, Any]:
39+
return {"surveyDimensionality": "3D", "ensembleType": "common_source_by_gun"}
40+
41+
def _add_coordinates(self) -> None:
42+
# Add dimension coordinates
43+
# EXCLUDE: `shot_index` since its 0-N
44+
self._builder.add_coordinate(
45+
"sail_line",
46+
dimensions=("sail_line",),
47+
data_type=ScalarType.UINT32,
48+
)
49+
self._builder.add_coordinate(
50+
"gun",
51+
dimensions=("gun",),
52+
data_type=ScalarType.UINT8,
53+
)
54+
self._builder.add_coordinate(
55+
"cable",
56+
dimensions=("cable",),
57+
data_type=ScalarType.UINT8,
58+
)
59+
self._builder.add_coordinate(
60+
"channel",
61+
dimensions=("channel",),
62+
data_type=ScalarType.UINT16,
63+
)
64+
self._builder.add_coordinate(
65+
self._data_domain,
66+
dimensions=(self._data_domain,),
67+
data_type=ScalarType.INT32,
68+
)
69+
70+
# Add non-dimension coordinates
71+
self._builder.add_coordinate(
72+
"orig_field_record_num",
73+
dimensions=("sail_line", "gun", "shot_index"),
74+
data_type=ScalarType.UINT32,
75+
)
76+
self._builder.add_coordinate(
77+
"shot_point",
78+
dimensions=("sail_line", "gun", "shot_index"),
79+
data_type=ScalarType.UINT32,
80+
)
81+
self._builder.add_coordinate(
82+
"source_coord_x",
83+
dimensions=("sail_line", "gun", "shot_index"),
84+
data_type=ScalarType.FLOAT64,
85+
metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_x")),
86+
)
87+
self._builder.add_coordinate(
88+
"source_coord_y",
89+
dimensions=("sail_line", "gun", "shot_index"),
90+
data_type=ScalarType.FLOAT64,
91+
metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_y")),
92+
)
93+
self._builder.add_coordinate(
94+
"group_coord_x",
95+
dimensions=("sail_line", "gun", "shot_index", "cable", "channel"),
96+
data_type=ScalarType.FLOAT64,
97+
metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_x")),
98+
)
99+
self._builder.add_coordinate(
100+
"group_coord_y",
101+
dimensions=("sail_line", "gun", "shot_index", "cable", "channel"),
102+
data_type=ScalarType.FLOAT64,
103+
metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_y")),
104+
)

src/mdio/converters/segy.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,10 @@ def _populate_coordinates(
345345
"""
346346
drop_vars_delayed = []
347347
# Populate the dimension coordinate variables (1-D arrays)
348-
dataset, vars_to_drop_later = populate_dim_coordinates(dataset, grid, drop_vars_delayed=drop_vars_delayed)
348+
dataset, drop_vars_delayed = populate_dim_coordinates(dataset, grid, drop_vars_delayed=drop_vars_delayed)
349349

350350
# Populate the non-dimension coordinate variables (N-dim arrays)
351-
dataset, vars_to_drop_later = populate_non_dim_coordinates(
351+
dataset, drop_vars_delayed = populate_non_dim_coordinates(
352352
dataset,
353353
grid,
354354
coordinates=coords,
@@ -488,6 +488,7 @@ def _validate_spec_in_template(segy_spec: SegySpec, mdio_template: AbstractDatas
488488
header_fields = {field.name for field in segy_spec.trace.header.fields}
489489

490490
required_fields = set(mdio_template.spatial_dimension_names) | set(mdio_template.coordinate_names)
491+
required_fields = required_fields - set(mdio_template.calculated_dimension_names) # remove to be calculated
491492
required_fields = required_fields | {"coordinate_scalar"} # ensure coordinate scalar is always present
492493
missing_fields = required_fields - header_fields
493494

@@ -592,6 +593,9 @@ def segy_to_mdio( # noqa PLR0913
592593
to_mdio(xr_dataset, output_path=output_path, mode="w", compute=False)
593594

594595
# This will write the non-dimension coordinates and trace mask
596+
# We also remove dimensions that don't have associated coordinates
597+
unindexed_dims = [d for d in xr_dataset.dims if d not in xr_dataset.coords]
598+
[drop_vars_delayed.remove(d) for d in unindexed_dims]
595599
meta_ds = xr_dataset[drop_vars_delayed + ["trace_mask"]]
596600
to_mdio(meta_ds, output_path=output_path, mode="r+", compute=True)
597601

src/mdio/segy/geometry.py

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def analyze_streamer_headers(
149149
return unique_cables, cable_chan_min, cable_chan_max, geom_type
150150

151151

152-
def analyze_shotlines_for_guns(
152+
def analyze_saillines_for_guns(
153153
index_headers: HeaderArray,
154154
) -> tuple[NDArray, dict[str, list], ShotGunGeometryType]:
155155
"""Check input headers for SEG-Y input to help determine geometry of shots and guns.
@@ -161,27 +161,27 @@ def analyze_shotlines_for_guns(
161161
index_headers: numpy array with index headers
162162
163163
Returns:
164-
tuple of unique_shot_lines, unique_guns_in_shot_line, geom_type
164+
tuple of unique_sail_lines, unique_guns_in_sail_line, geom_type
165165
"""
166166
# Find unique cable ids
167-
unique_shot_lines = np.sort(np.unique(index_headers["shot_line"]))
167+
unique_sail_lines = np.sort(np.unique(index_headers["sail_line"]))
168168
unique_guns = np.sort(np.unique(index_headers["gun"]))
169-
logger.info("unique_shot_lines: %s", unique_shot_lines)
169+
logger.info("unique_sail_lines: %s", unique_sail_lines)
170170
logger.info("unique_guns: %s", unique_guns)
171171

172172
# Find channel min and max values for each cable
173-
unique_guns_in_shot_line = {}
173+
unique_guns_in_sail_line = {}
174174

175175
geom_type = ShotGunGeometryType.B
176176
# Check shot numbers are still unique if div/num_guns
177-
for shot_line in unique_shot_lines:
178-
shot_line_mask = index_headers["shot_line"] == shot_line
179-
shot_current_sl = index_headers["shot_point"][shot_line_mask]
180-
gun_current_sl = index_headers["gun"][shot_line_mask]
177+
for sail_line in unique_sail_lines:
178+
sail_line_mask = index_headers["sail_line"] == sail_line
179+
shot_current_sl = index_headers["shot_point"][sail_line_mask]
180+
gun_current_sl = index_headers["gun"][sail_line_mask]
181181

182182
unique_guns_sl = np.sort(np.unique(gun_current_sl))
183183
num_guns_sl = unique_guns_sl.shape[0]
184-
unique_guns_in_shot_line[str(shot_line)] = list(unique_guns_sl)
184+
unique_guns_in_sail_line[str(sail_line)] = list(unique_guns_sl)
185185

186186
for gun in unique_guns_sl:
187187
gun_mask = gun_current_sl == gun
@@ -190,10 +190,10 @@ def analyze_shotlines_for_guns(
190190
mod_shots = np.floor(shots_current_sl_gun / num_guns_sl)
191191
if len(np.unique(mod_shots)) != num_shots_sl:
192192
msg = "Shot line %s has %s when using div by %s %s has %s unique mod shots."
193-
logger.info(msg, shot_line, num_shots_sl, num_guns_sl, np.unique(mod_shots))
193+
logger.info(msg, sail_line, num_shots_sl, num_guns_sl, np.unique(mod_shots))
194194
geom_type = ShotGunGeometryType.A
195-
return unique_shot_lines, unique_guns_in_shot_line, geom_type
196-
return unique_shot_lines, unique_guns_in_shot_line, geom_type
195+
return unique_sail_lines, unique_guns_in_sail_line, geom_type
196+
return unique_sail_lines, unique_guns_in_sail_line, geom_type
197197

198198

199199
def create_counter(
@@ -459,7 +459,7 @@ def transform(
459459
class AutoShotWrap(GridOverrideCommand):
460460
"""Automatically determine ShotGun acquisition type."""
461461

462-
required_keys = {"shot_line", "gun", "shot_point", "cable", "channel"}
462+
required_keys = {"sail_line", "gun", "shot_point", "cable", "channel"}
463463
required_parameters = None
464464

465465
def validate(self, index_headers: HeaderArray, grid_overrides: dict[str, bool | int]) -> None:
@@ -475,24 +475,28 @@ def transform(
475475
"""Perform the grid transform."""
476476
self.validate(index_headers, grid_overrides)
477477

478-
result = analyze_shotlines_for_guns(index_headers)
479-
unique_shot_lines, unique_guns_in_shot_line, geom_type = result
478+
result = analyze_saillines_for_guns(index_headers)
479+
unique_sail_lines, unique_guns_in_sail_line, geom_type = result
480480
logger.info("Ingesting dataset as shot type: %s", geom_type.name)
481481

482482
max_num_guns = 1
483-
for shot_line in unique_shot_lines:
484-
logger.info("shot_line: %s has guns: %s", shot_line, unique_guns_in_shot_line[str(shot_line)])
485-
num_guns = len(unique_guns_in_shot_line[str(shot_line)])
483+
for sail_line in unique_sail_lines:
484+
logger.info("sail_line: %s has guns: %s", sail_line, unique_guns_in_sail_line[str(sail_line)])
485+
num_guns = len(unique_guns_in_sail_line[str(sail_line)])
486486
max_num_guns = max(num_guns, max_num_guns)
487487

488488
# This might be slow and potentially could be improved with a rewrite
489489
# to prevent so many lookups
490490
if geom_type == ShotGunGeometryType.B:
491-
for shot_line in unique_shot_lines:
492-
shot_line_idxs = np.where(index_headers["shot_line"][:] == shot_line)
493-
index_headers["shot_point"][shot_line_idxs] = np.floor(
494-
index_headers["shot_point"][shot_line_idxs] / max_num_guns
491+
shot_index = np.empty(len(index_headers), dtype="uint32")
492+
index_headers = rfn.append_fields(index_headers.base, "shot_index", shot_index)
493+
for sail_line in unique_sail_lines:
494+
sail_line_idxs = np.where(index_headers["sail_line"][:] == sail_line)
495+
index_headers["shot_index"][sail_line_idxs] = np.floor(
496+
index_headers["shot_point"][sail_line_idxs] / max_num_guns
495497
)
498+
# Make shot index zero-based PER sail line
499+
index_headers["shot_index"][sail_line_idxs] -= index_headers["shot_index"][sail_line_idxs].min()
496500
return index_headers
497501

498502

src/mdio/segy/utilities.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,21 @@ def get_grid_plan( # noqa: C901, PLR0913
5151
5252
Returns:
5353
All index dimensions and chunksize or dimensions and chunksize together with header values.
54+
55+
Raises:
56+
ValueError: If computed fields are not found after grid overrides.
5457
"""
5558
if grid_overrides is None:
5659
grid_overrides = {}
5760

5861
# Keep only dimension and non-dimension coordinates excluding the vertical axis
5962
horizontal_dimensions = template.spatial_dimension_names
6063
horizontal_coordinates = horizontal_dimensions + template.coordinate_names
64+
65+
# Remove any to be computed fields
66+
computed_fields = set(template.calculated_dimension_names)
67+
horizontal_coordinates = tuple(set(horizontal_coordinates) - computed_fields)
68+
6169
headers_subset = parse_headers(
6270
segy_file_kwargs=segy_file_kwargs,
6371
num_traces=segy_file_info.num_traces,
@@ -73,6 +81,13 @@ def get_grid_plan( # noqa: C901, PLR0913
7381
grid_overrides=grid_overrides,
7482
)
7583

84+
if len(computed_fields) > 0 and not computed_fields.issubset(headers_subset.dtype.names):
85+
err = (
86+
f"Required computed fields {sorted(computed_fields)} for template {template.name} "
87+
f"not found after grid overrides. Please ensure correct overrides are applied."
88+
)
89+
raise ValueError(err)
90+
7691
dimensions = []
7792
for dim_name in horizontal_dimensions:
7893
dim_unique = np.unique(headers_subset[dim_name])

tests/integration/conftest.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222
def get_segy_mock_4d_spec() -> SegySpec:
2323
"""Create a mock 4D SEG-Y specification."""
2424
trace_header_fields = [
25-
HeaderField(name="field_rec_no", byte=9, format="int32"),
25+
HeaderField(name="orig_field_record_num", byte=9, format="int32"),
2626
HeaderField(name="channel", byte=13, format="int32"),
2727
HeaderField(name="shot_point", byte=17, format="int32"),
2828
HeaderField(name="offset", byte=37, format="int32"),
2929
HeaderField(name="samples_per_trace", byte=115, format="int16"),
3030
HeaderField(name="sample_interval", byte=117, format="int16"),
31-
HeaderField(name="shot_line", byte=133, format="int16"),
31+
HeaderField(name="sail_line", byte=133, format="int16"),
3232
HeaderField(name="cable", byte=137, format="int16"),
3333
HeaderField(name="gun", byte=171, format="int16"),
3434
HeaderField(name="coordinate_scalar", byte=71, format="int16"),
@@ -111,15 +111,15 @@ def create_segy_mock_4d( # noqa: PLR0913
111111
gun = gun_headers[trc_idx]
112112
cable = cable_headers[trc_idx]
113113
channel = channel_headers[trc_idx]
114-
shot_line = 1
114+
sail_line = 1
115115
offset = 0
116116

117117
if index_receivers is False:
118-
channel, gun, shot_line = 0, 0, 0
118+
channel, gun, sail_line = 0, 0, 0
119119

120120
# Assign dimension coordinate fields with calculated mock data
121-
header_fields = ["field_rec_no", "channel", "shot_point", "offset", "shot_line", "cable", "gun"]
122-
headers[header_fields][trc_idx] = (shot, channel, shot, offset, shot_line, cable, gun)
121+
header_fields = ["orig_field_record_num", "channel", "shot_point", "offset", "sail_line", "cable", "gun"]
122+
headers[header_fields][trc_idx] = (shot, channel, shot, offset, sail_line, cable, gun)
123123

124124
# Assign coordinate fields with mock data
125125
x = start_x + step_x * trc_shot_idx
@@ -144,7 +144,7 @@ def segy_mock_4d_shots(fake_segy_tmp: Path) -> dict[StreamerShotGeometryType, Pa
144144
num_samples = 25
145145
shots = [2, 3, 5, 6, 7, 8, 9]
146146
guns = [1, 2]
147-
cables = [0, 101, 201, 301]
147+
cables = [0, 3, 5, 7]
148148
receivers_per_cable = [1, 5, 7, 5]
149149

150150
segy_paths = {}

0 commit comments

Comments
 (0)