Add mark_overflow_packets to hi_goodtimes (IMAP-Science-Operations-Center#2703)

tmplummer · web-flow · commit 2b98ca766728 · 2026-02-18T08:53:12.000-07:00
* Add mark_overflow_packets to hi_goodtimes

* Vectorize overflow_packet algorithm

* Test that all spin-bins are culled
diff --git a/imap_processing/hi/hi_goodtimes.py b/imap_processing/hi/hi_goodtimes.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 
 import numpy as np
+import pandas as pd
 import xarray as xr
 
 from imap_processing.hi.utils import parse_sensor_number
@@ -680,3 +681,130 @@ def mark_drf_times(
     logger.info(
         f"Dropped times during {len(transition_indices)} DRF restabilization period(s)"
     )
+
+
+def mark_overflow_packets(
+    goodtimes_ds: xr.Dataset,
+    l1b_de: xr.Dataset,
+    config_df: pd.DataFrame,
+    cull_code: int = CullCode.LOOSE,
+) -> None:
+    """
+    Remove times when DE packets overflow with qualified events.
+
+    Filters out 8-spin periods where a Direct Event packet contains the maximum
+    number of events (664) and the final event qualifies for a calibration product.
+    When a packet is full and ends with a qualified event, additional events may
+    have been lost, making the count data incomplete.
+
+    Algorithm Document Reference:
+        Section 2.3.2.2: Good Times Exclusions due to High Count Rate
+
+    Background:
+        Each DE packet can hold a maximum of 664 direct events. When a packet fills
+        completely, any additional events that occur are lost. If the final event
+        in a full packet has a coincidence type that is part of a defined calibration
+        product, the packet is considered to have potentially lost science-quality
+        events, and the entire 8-spin period should be excluded from analysis.
+
+    Parameters
+    ----------
+    goodtimes_ds : xarray.Dataset
+        Goodtimes dataset to update with cull flags.
+    l1b_de : xarray.Dataset
+        L1B Direct Event data containing:
+        - ccsds_index: Index mapping each event to its packet
+        - coincidence_type: Coincidence type bitmap for each event
+        - event_met: MET timestamp for each event
+    config_df : pandas.DataFrame
+        Calibration product configuration DataFrame with coincidence_type_values
+        column containing tuples of valid coincidence type integers for each
+        calibration product. Use CalibrationProductConfig.from_csv() to load.
+    cull_code : int, optional
+        Cull code to use for marking bad times (default: CullCode.LOOSE).
+
+    Notes
+    -----
+    This function modifies goodtimes_ds in place by calling mark_bad_times()
+    for MET timestamps with overflow packets containing qualified final events.
+
+    The check for qualified events uses the coincidence_type_values from the
+    calibration product configuration, which defines which coincidence types
+    are considered valid for science analysis.
+    """
+    logger.info("Running mark_overflow_packets culling")
+
+    ccsds_indices = l1b_de["ccsds_index"].values
+    coincidence_types = l1b_de["coincidence_type"].values
+    event_mets = l1b_de["event_met"].values
+
+    if len(ccsds_indices) == 0:
+        logger.info("No events in L1B DE data")
+        return
+
+    # Maximum number of DEs per packet
+    max_des_per_packet = 664
+
+    # Count events per packet using bincount
+    # bincount[i] = number of events with ccsds_index == i
+    packet_event_counts = np.bincount(ccsds_indices)
+
+    # Find packets that are full (have exactly 664 events)
+    full_packet_indices = np.nonzero(packet_event_counts == max_des_per_packet)[0]
+
+    if len(full_packet_indices) == 0:
+        logger.info("No full packets found")
+        return
+
+    # Use DEBUG level for per-packet logging if more than 10 full packets
+    log_per_packet = logger.info if len(full_packet_indices) <= 10 else logger.debug
+
+    # Build set of all valid coincidence types from calibration products
+    all_valid_coin_types = set()
+    for coin_types in config_df["coincidence_type_values"]:
+        all_valid_coin_types.update(coin_types)
+
+    # Find the last event index for each packet (vectorized)
+    # We need to find, for each full packet, the index of its final event.
+    # Since events within a packet appear consecutively in the array, the
+    # "last" event for packet P is the event with the largest array index
+    # where ccsds_indices == P.
+    #
+    # We use np.maximum.at to efficiently compute this:
+    # - last_event_per_packet[P] will hold the max event index for packet P
+    # - np.maximum.at updates last_event_per_packet[ccsds_indices[i]] with
+    #   event_indices[i] if it's larger than the current value
+    # - After processing all events, last_event_per_packet[P] contains the
+    #   index of the last event belonging to packet P
+    max_packet_idx = int(np.max(ccsds_indices))
+    last_event_per_packet = np.full(max_packet_idx + 1, -1, dtype=np.intp)
+    event_indices = np.arange(len(ccsds_indices))
+    np.maximum.at(last_event_per_packet, ccsds_indices, event_indices)
+
+    # Get the final event indices for full packets
+    final_event_indices = last_event_per_packet[full_packet_indices]
+
+    # Get coincidence types for final events
+    final_coin_types = coincidence_types[final_event_indices]
+
+    # Log each full packet
+    for i, packet_idx in enumerate(full_packet_indices):
+        log_per_packet(
+            f"Packet {packet_idx} is full with final event "
+            f"(coincidence_type={final_coin_types[i]})"
+        )
+
+    # Check which final events are qualified (in a calibration product)
+    qualified_mask = np.isin(final_coin_types, list(all_valid_coin_types))
+
+    # Get METs for qualified packets
+    mets_to_cull = event_mets[final_event_indices[qualified_mask]]
+
+    # Mark all identified times as bad (all spin bins)
+    if len(mets_to_cull) > 0:
+        goodtimes_ds.goodtimes.mark_bad_times(met=mets_to_cull, cull=cull_code)
+
+    logger.info(
+        f"Found {len(full_packet_indices)} full packet(s), "
+        f"dropped {len(mets_to_cull)} 8-spin period(s) due to overflow packets"
+    )
diff --git a/imap_processing/tests/hi/test_hi_goodtimes.py b/imap_processing/tests/hi/test_hi_goodtimes.py
@@ -1,6 +1,7 @@
 """Test coverage for imap_processing.hi.hi_goodtimes.py"""
 
 import numpy as np
+import pandas as pd
 import pytest
 import xarray as xr
 
@@ -10,6 +11,7 @@
     create_goodtimes_dataset,
     mark_drf_times,
     mark_incomplete_spin_sets,
+    mark_overflow_packets,
 )
 
 
@@ -1218,3 +1220,217 @@ def test_mark_drf_times_transition_at_end(self):
         n_culled = np.sum(gt["cull_flags"].values[:, 0] == CullCode.LOOSE)
         assert n_culled > 0  # Some should be culled
         assert n_culled <= 31  # But not all (only last ~30 minutes)
+
+
+class TestMarkOverflowPackets:
+    """Test suite for mark_overflow_packets function."""
+
+    @pytest.fixture
+    def mock_config_df(self):
+        """Create a mock calibration product configuration DataFrame."""
+        # Create a minimal config with coincidence types
+        # ABC1C2 = 15, ABC1 = 14, AB = 12
+        data = {
+            "coincidence_type_list": [("ABC1C2", "ABC1"), ("AB",)],
+            "tof_ab_low": [0, 0],
+            "tof_ab_high": [100, 100],
+            "tof_ac1_low": [0, 0],
+            "tof_ac1_high": [100, 100],
+            "tof_bc1_low": [-50, -50],
+            "tof_bc1_high": [50, 50],
+            "tof_c1c2_low": [0, 0],
+            "tof_c1c2_high": [100, 100],
+        }
+        df = pd.DataFrame(
+            data,
+            index=pd.MultiIndex.from_tuples(
+                [(1, 1), (2, 1)], names=["calibration_prod", "esa_energy_step"]
+            ),
+        )
+        # Add coincidence_type_values column (converted from strings to ints)
+        # ABC1C2=15, ABC1=14, AB=12
+        df["coincidence_type_values"] = [(15, 14), (12,)]
+        return df
+
+    @pytest.fixture
+    def mock_goodtimes(self):
+        """Create a mock goodtimes dataset."""
+        met_values = np.arange(1000.0, 1100.0, 10.0)
+        return xr.Dataset(
+            {
+                "cull_flags": xr.DataArray(
+                    np.zeros((len(met_values), 90), dtype=np.uint8),
+                    dims=["met", "spin_bin"],
+                ),
+                "esa_step": xr.DataArray(
+                    np.ones(len(met_values), dtype=np.uint8), dims=["met"]
+                ),
+            },
+            coords={"met": met_values, "spin_bin": np.arange(90)},
+            attrs={"sensor": "Hi45", "pointing": 1},
+        )
+
+    def test_no_full_packets(self, mock_goodtimes, mock_config_df):
+        """Test that no culling occurs when no packets are full."""
+        # Create L1B DE with packets having < 664 events
+        n_events = 100
+        l1b_de = xr.Dataset(
+            {
+                "ccsds_index": (["event_met"], np.zeros(n_events, dtype=np.uint16)),
+                "coincidence_type": (
+                    ["event_met"],
+                    np.full(n_events, 15, dtype=np.uint8),
+                ),
+            },
+            coords={"event_met": np.linspace(1000.0, 1010.0, n_events)},
+        )
+
+        mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
+
+        # No times should be culled
+        assert np.all(mock_goodtimes["cull_flags"].values == 0)
+
+    def test_full_packet_with_qualified_event(self, mock_goodtimes, mock_config_df):
+        """Test that full packet with qualified final event is culled."""
+        # Create L1B DE with one packet having exactly 664 events
+        n_events = 664
+        event_mets = np.linspace(1005.0, 1006.0, n_events)
+        l1b_de = xr.Dataset(
+            {
+                "ccsds_index": (["event_met"], np.zeros(n_events, dtype=np.uint16)),
+                # Final event has coincidence_type=15 (ABC1C2), which is qualified
+                "coincidence_type": (
+                    ["event_met"],
+                    np.full(n_events, 15, dtype=np.uint8),
+                ),
+            },
+            coords={"event_met": event_mets},
+        )
+
+        mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
+
+        # MET ~1006 should be culled (maps to goodtimes MET 1000)
+        # The MET 1000 bin should have all spin bins culled
+        assert mock_goodtimes["cull_flags"].values[0, :].sum() == 90
+
+    def test_full_packet_with_unqualified_event(self, mock_goodtimes, mock_config_df):
+        """Test that full packet with unqualified final event is NOT culled."""
+        # Create L1B DE with one packet having exactly 664 events
+        n_events = 664
+        event_mets = np.linspace(1005.0, 1006.0, n_events)
+        l1b_de = xr.Dataset(
+            {
+                "ccsds_index": (["event_met"], np.zeros(n_events, dtype=np.uint16)),
+                # Final event has coincidence_type=3 (not in any cal product)
+                "coincidence_type": (
+                    ["event_met"],
+                    np.full(n_events, 3, dtype=np.uint8),
+                ),
+            },
+            coords={"event_met": event_mets},
+        )
+
+        mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
+
+        # No times should be culled since final event is unqualified
+        assert np.all(mock_goodtimes["cull_flags"].values == 0)
+
+    def test_multiple_full_packets(self, mock_goodtimes, mock_config_df):
+        """Test handling of multiple full packets."""
+        # Create L1B DE with two packets, each having 664 events
+        n_events_per_packet = 664
+        n_packets = 2
+
+        ccsds_indices = np.concatenate(
+            [np.full(n_events_per_packet, i, dtype=np.uint16) for i in range(n_packets)]
+        )
+        # Packet 0: final event qualified (15)
+        # Packet 1: final event unqualified (3)
+        coincidence_types = np.concatenate(
+            [
+                np.concatenate(
+                    [np.full(n_events_per_packet - 1, 3, dtype=np.uint8), [15]]
+                ),
+                np.full(n_events_per_packet, 3, dtype=np.uint8),
+            ]
+        )
+        event_mets = np.concatenate(
+            [
+                np.linspace(1005.0, 1006.0, n_events_per_packet),  # Packet 0
+                np.linspace(1015.0, 1016.0, n_events_per_packet),  # Packet 1
+            ]
+        )
+
+        l1b_de = xr.Dataset(
+            {
+                "ccsds_index": (["event_met"], ccsds_indices),
+                "coincidence_type": (["event_met"], coincidence_types),
+            },
+            coords={"event_met": event_mets},
+        )
+
+        mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
+
+        # Only packet 0's MET should be culled (MET 1000)
+        # Packet 1 has unqualified final event, so MET 1010 should not be culled
+        assert np.sum(mock_goodtimes["cull_flags"].values[0, :] > 0) == 90  # All bins
+        assert np.all(mock_goodtimes["cull_flags"].values[1, :] == 0)  # MET 1010
+
+    def test_empty_de_data(self, mock_goodtimes, mock_config_df):
+        """Test handling of empty L1B DE data."""
+        l1b_de = xr.Dataset(
+            {
+                "ccsds_index": (["event_met"], np.array([], dtype=np.uint16)),
+                "coincidence_type": (["event_met"], np.array([], dtype=np.uint8)),
+            },
+            coords={"event_met": np.array([])},
+        )
+
+        # Should not raise, just return without culling
+        mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
+        assert np.all(mock_goodtimes["cull_flags"].values == 0)
+
+    def test_custom_cull_code(self, mock_goodtimes, mock_config_df):
+        """Test using a custom cull code."""
+        n_events = 664
+        event_mets = np.linspace(1005.0, 1006.0, n_events)
+        l1b_de = xr.Dataset(
+            {
+                "ccsds_index": (["event_met"], np.zeros(n_events, dtype=np.uint16)),
+                "coincidence_type": (
+                    ["event_met"],
+                    np.concatenate([np.full(n_events - 1, 3, dtype=np.uint8), [15]]),
+                ),
+            },
+            coords={"event_met": event_mets},
+        )
+
+        custom_cull = 5
+        mark_overflow_packets(
+            mock_goodtimes, l1b_de, mock_config_df, cull_code=custom_cull
+        )
+
+        # Check that the custom cull code was used
+        assert np.any(mock_goodtimes["cull_flags"].values == custom_cull)
+
+    def test_final_event_is_last_in_list(self, mock_goodtimes, mock_config_df):
+        """Test that the final event is the last one in the list for the packet."""
+        n_events = 664
+        event_mets = np.linspace(1005.0, 1006.0, n_events)
+
+        # All events have unqualified type except the last one in the list
+        coincidence_types = np.full(n_events, 3, dtype=np.uint8)
+        coincidence_types[-1] = 12  # Last event is qualified
+
+        l1b_de = xr.Dataset(
+            {
+                "ccsds_index": (["event_met"], np.zeros(n_events, dtype=np.uint16)),
+                "coincidence_type": (["event_met"], coincidence_types),
+            },
+            coords={"event_met": event_mets},
+        )
+
+        mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
+
+        # Should be culled because the final event (last in list) is qualified
+        assert np.sum(mock_goodtimes["cull_flags"].values > 0) > 0