Skip to content

Commit 7be45a7

Browse files
authored
Fix grid override selection bug (#808)
* Fix geometry detection in analyze_lines_for_guns to handle Type A cases correctly. Ensure all shot lines are processed and unique_guns_per_line is populated to prevent KeyError. Add tests for multiline Type A geometry scenarios. * Performance enhancements for geometry calculation
1 parent 735d7f7 commit 7be45a7

3 files changed

Lines changed: 195 additions & 7 deletions

File tree

src/mdio/segy/geometry.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,10 @@ def analyze_lines_for_guns(
185185
num_guns = unique_guns_in_line.shape[0]
186186
unique_guns_per_line[str(line_val)] = list(unique_guns_in_line)
187187

188+
# Skip geometry detection if we already know it's Type A
189+
if geom_type == ShotGunGeometryType.A:
190+
continue
191+
188192
for gun in unique_guns_in_line:
189193
gun_mask = gun_current == gun
190194
shots_for_gun = shot_current[gun_mask]
@@ -194,7 +198,7 @@ def analyze_lines_for_guns(
194198
msg = "%s %s has %s shots; div by %s guns gives %s unique mod shots."
195199
logger.info(msg, line_field, line_val, num_shots, num_guns, len(np.unique(mod_shots)))
196200
geom_type = ShotGunGeometryType.A
197-
return unique_lines, unique_guns_per_line, geom_type
201+
break # No need to check more guns for this line
198202

199203
return unique_lines, unique_guns_per_line, geom_type
200204

@@ -611,18 +615,27 @@ def transform(
611615
logger.info("%s: %s has guns: %s", line_field, line_val, guns)
612616
max_num_guns = max(len(guns), max_num_guns)
613617

614-
# Only calculate shot_index when shot points are interleaved across guns (Type B)
615-
if geom_type == ShotGunGeometryType.B:
616-
shot_index = np.empty(len(index_headers), dtype="uint32")
617-
# Use .base if available (view of another array), otherwise use the array directly
618-
base_array = index_headers.base if index_headers.base is not None else index_headers
619-
index_headers = rfn.append_fields(base_array, "shot_index", shot_index)
618+
# Always calculate shot_index - the OBN template requires it
619+
shot_index = np.empty(len(index_headers), dtype="uint32")
620+
# Use .base if available (view of another array), otherwise use the array directly
621+
base_array = index_headers.base if index_headers.base is not None else index_headers
622+
index_headers = rfn.append_fields(base_array, "shot_index", shot_index)
620623

624+
if geom_type == ShotGunGeometryType.B:
625+
# Type B: shot points are interleaved across guns, divide to get dense index
621626
for line_val in unique_lines:
622627
line_idxs = np.where(index_headers[line_field][:] == line_val)
623628
index_headers["shot_index"][line_idxs] = np.floor(index_headers["shot_point"][line_idxs] / max_num_guns)
624629
# Make shot index zero-based PER line
625630
index_headers["shot_index"][line_idxs] -= index_headers["shot_index"][line_idxs].min()
631+
else:
632+
# Type A: shot points are already unique per gun, create 0-based index from unique values
633+
for line_val in unique_lines:
634+
line_idxs = np.where(index_headers[line_field][:] == line_val)
635+
shot_points = index_headers["shot_point"][line_idxs]
636+
# np.unique returns sorted values; searchsorted maps each shot_point to its 0-based index
637+
unique_shots = np.unique(shot_points)
638+
index_headers["shot_index"][line_idxs] = np.searchsorted(unique_shots, shot_points)
626639

627640
return index_headers
628641

tests/integration/conftest.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,3 +342,76 @@ def segy_mock_obn_no_component(fake_segy_tmp: Path) -> Path:
342342
components=None, # No component header
343343
filename_suffix="no_component",
344344
)
345+
346+
347+
@pytest.fixture(scope="module")
348+
def segy_mock_obn_multiline_type_a(fake_segy_tmp: Path) -> Path:
349+
"""Generate mock OBN SEG-Y file with multiple shot lines and Type A geometry.
350+
351+
This fixture tests the scenario where:
352+
- Multiple shot lines exist (3 lines)
353+
- Shot points are NOT interleaved across guns (Type A geometry)
354+
- Each gun has the same dense shot point values
355+
356+
This specifically tests the fix for the bug where an early return in
357+
analyze_lines_for_guns() would skip populating unique_guns_per_line for
358+
lines after the first Type A detection, causing KeyError when later
359+
code tried to access those lines.
360+
"""
361+
num_samples = 25
362+
receivers = [101, 102]
363+
shot_lines = [1, 2, 3] # Multiple lines to test all are processed
364+
guns = [1, 2]
365+
components = [1] # Single component for simplicity
366+
367+
# Non-interleaved (Type A): both guns have the same shot point values
368+
# This triggers Type A detection because floor(shot_point / num_guns)
369+
# produces duplicates when shot points are dense per gun
370+
shot_points_per_gun = {
371+
1: [1, 2, 3], # gun 1: dense shot points
372+
2: [1, 2, 3], # gun 2: same dense shot points (Type A)
373+
}
374+
375+
return create_segy_mock_obn(
376+
fake_segy_tmp,
377+
num_samples=num_samples,
378+
receivers=receivers,
379+
shot_lines=shot_lines,
380+
guns=guns,
381+
shot_points_per_gun=shot_points_per_gun,
382+
components=components,
383+
filename_suffix="multiline_type_a",
384+
)
385+
386+
387+
@pytest.fixture(scope="module")
388+
def segy_mock_obn_multiline_type_a_sparse(fake_segy_tmp: Path) -> Path:
389+
"""Generate mock OBN SEG-Y file with Type A geometry and sparse shot points.
390+
391+
Variant of segy_mock_obn_multiline_type_a using non-contiguous shot point
392+
values per gun. Exercises the vectorized Type A shot_index mapping
393+
(np.searchsorted over np.unique) on sparse values to ensure indices are
394+
assigned positionally within the sorted unique set, not by value.
395+
"""
396+
num_samples = 25
397+
receivers = [101, 102]
398+
shot_lines = [1, 2]
399+
guns = [1, 2]
400+
components = [1]
401+
402+
# Sparse, non-contiguous shot points; same set per gun keeps geometry Type A
403+
shot_points_per_gun = {
404+
1: [10, 50, 100],
405+
2: [10, 50, 100],
406+
}
407+
408+
return create_segy_mock_obn(
409+
fake_segy_tmp,
410+
num_samples=num_samples,
411+
receivers=receivers,
412+
shot_lines=shot_lines,
413+
guns=guns,
414+
shot_points_per_gun=shot_points_per_gun,
415+
components=components,
416+
filename_suffix="multiline_type_a_sparse",
417+
)

tests/integration/test_import_obn_grid_overrides.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import TYPE_CHECKING
77

88
import dask
9+
import numpy as np
910
import pytest
1011
import xarray.testing as xrt
1112
from tests.integration.conftest import get_segy_mock_obn_spec
@@ -158,3 +159,104 @@ def test_import_obn_without_calculate_shot_index_raises(
158159
error_message = str(exc_info.value)
159160
assert "shot_index" in error_message
160161
assert "ObnReceiverGathers3D" in error_message
162+
163+
164+
class TestImportObnMultilineTypeA:
165+
"""Test OBN SEG-Y import with multiple shot lines and Type A geometry.
166+
167+
This test class verifies the fix for a bug where analyze_lines_for_guns()
168+
would return early upon detecting Type A geometry, leaving the
169+
unique_guns_per_line dictionary incomplete. This caused KeyError when
170+
CalculateShotIndex.transform() tried to access shot lines that weren't
171+
in the dictionary.
172+
173+
Regression test for: KeyError when ingesting OBN data with multiple shot
174+
lines where Type A geometry is detected on an earlier line.
175+
"""
176+
177+
def test_import_obn_multiline_type_a_all_lines_processed(
178+
self,
179+
segy_mock_obn_multiline_type_a: Path,
180+
zarr_tmp: Path,
181+
) -> None:
182+
"""Test that all shot lines are processed with Type A geometry.
183+
184+
This test verifies that:
185+
1. CalculateShotIndex works with Type A geometry (non-interleaved shots)
186+
2. All shot lines are included in the output, not just the first one
187+
3. shot_index is correctly calculated for Type A (0-based from unique values)
188+
"""
189+
segy_spec = get_segy_mock_obn_spec(include_component=True)
190+
grid_override = {"CalculateShotIndex": True}
191+
192+
segy_to_mdio(
193+
segy_spec=segy_spec,
194+
mdio_template=TemplateRegistry().get("ObnReceiverGathers3D"),
195+
input_path=segy_mock_obn_multiline_type_a,
196+
output_path=zarr_tmp,
197+
overwrite=True,
198+
grid_overrides=grid_override,
199+
)
200+
201+
ds = open_mdio(zarr_tmp)
202+
203+
# Verify ALL shot lines are present (the bug would cause lines to be missing)
204+
expected_shot_lines = [1, 2, 3]
205+
xrt.assert_duckarray_equal(ds["shot_line"], expected_shot_lines)
206+
207+
# Verify guns are present
208+
expected_guns = [1, 2]
209+
xrt.assert_duckarray_equal(ds["gun"], expected_guns)
210+
211+
# Verify shot_index is calculated correctly for Type A geometry
212+
# Type A: shot points [1, 2, 3] are already unique per gun
213+
# shot_index should be 0-based indices: [0, 1, 2]
214+
expected_shot_index = [0, 1, 2]
215+
xrt.assert_duckarray_equal(ds["shot_index"], expected_shot_index)
216+
217+
# Verify other dimensions
218+
expected_receivers = [101, 102]
219+
xrt.assert_duckarray_equal(ds["receiver"], expected_receivers)
220+
221+
expected_components = [1]
222+
xrt.assert_duckarray_equal(ds["component"], expected_components)
223+
224+
# Verify shot_point is preserved as a coordinate
225+
assert "shot_point" in ds.coords
226+
assert ds["shot_point"].dims == ("shot_line", "gun", "shot_index")
227+
228+
def test_import_obn_multiline_type_a_sparse_shot_points(
229+
self,
230+
segy_mock_obn_multiline_type_a_sparse: Path,
231+
zarr_tmp: Path,
232+
) -> None:
233+
"""Test Type A shot_index mapping with sparse, non-contiguous shot points.
234+
235+
Guards the vectorized Type A path (np.searchsorted over np.unique) by
236+
using shot points that are not 0/1-based or contiguous. shot_index must
237+
be a dense 0-based sequence over the sorted unique shot points, and the
238+
original shot_point values must be preserved as a coordinate.
239+
"""
240+
segy_spec = get_segy_mock_obn_spec(include_component=True)
241+
grid_override = {"CalculateShotIndex": True}
242+
243+
segy_to_mdio(
244+
segy_spec=segy_spec,
245+
mdio_template=TemplateRegistry().get("ObnReceiverGathers3D"),
246+
input_path=segy_mock_obn_multiline_type_a_sparse,
247+
output_path=zarr_tmp,
248+
overwrite=True,
249+
grid_overrides=grid_override,
250+
)
251+
252+
ds = open_mdio(zarr_tmp)
253+
254+
# Sparse shot points [10, 50, 100] map to dense 0-based indices [0, 1, 2]
255+
expected_shot_index = [0, 1, 2]
256+
xrt.assert_duckarray_equal(ds["shot_index"], expected_shot_index)
257+
258+
# Original shot_point values preserved as coordinate, not remapped
259+
assert "shot_point" in ds.coords
260+
assert ds["shot_point"].dims == ("shot_line", "gun", "shot_index")
261+
unique_shot_points = np.unique(ds["shot_point"].values)
262+
xrt.assert_duckarray_equal(unique_shot_points, [10, 50, 100])

0 commit comments

Comments
 (0)