Skip to content

Commit 2b98ca7

Browse files
authored
Add mark_overflow_packets to hi_goodtimes (IMAP-Science-Operations-Center#2703)
* Add mark_overflow_packets to hi_goodtimes * Vectorize overflow_packet algorithm * Test that all spin-bins are culled
1 parent c24d41a commit 2b98ca7

2 files changed

Lines changed: 344 additions & 0 deletions

File tree

imap_processing/hi/hi_goodtimes.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pathlib import Path
77

88
import numpy as np
9+
import pandas as pd
910
import xarray as xr
1011

1112
from imap_processing.hi.utils import parse_sensor_number
@@ -680,3 +681,130 @@ def mark_drf_times(
680681
logger.info(
681682
f"Dropped times during {len(transition_indices)} DRF restabilization period(s)"
682683
)
684+
685+
686+
def mark_overflow_packets(
687+
goodtimes_ds: xr.Dataset,
688+
l1b_de: xr.Dataset,
689+
config_df: pd.DataFrame,
690+
cull_code: int = CullCode.LOOSE,
691+
) -> None:
692+
"""
693+
Remove times when DE packets overflow with qualified events.
694+
695+
Filters out 8-spin periods where a Direct Event packet contains the maximum
696+
number of events (664) and the final event qualifies for a calibration product.
697+
When a packet is full and ends with a qualified event, additional events may
698+
have been lost, making the count data incomplete.
699+
700+
Algorithm Document Reference:
701+
Section 2.3.2.2: Good Times Exclusions due to High Count Rate
702+
703+
Background:
704+
Each DE packet can hold a maximum of 664 direct events. When a packet fills
705+
completely, any additional events that occur are lost. If the final event
706+
in a full packet has a coincidence type that is part of a defined calibration
707+
product, the packet is considered to have potentially lost science-quality
708+
events, and the entire 8-spin period should be excluded from analysis.
709+
710+
Parameters
711+
----------
712+
goodtimes_ds : xarray.Dataset
713+
Goodtimes dataset to update with cull flags.
714+
l1b_de : xarray.Dataset
715+
L1B Direct Event data containing:
716+
- ccsds_index: Index mapping each event to its packet
717+
- coincidence_type: Coincidence type bitmap for each event
718+
- event_met: MET timestamp for each event
719+
config_df : pandas.DataFrame
720+
Calibration product configuration DataFrame with coincidence_type_values
721+
column containing tuples of valid coincidence type integers for each
722+
calibration product. Use CalibrationProductConfig.from_csv() to load.
723+
cull_code : int, optional
724+
Cull code to use for marking bad times (default: CullCode.LOOSE).
725+
726+
Notes
727+
-----
728+
This function modifies goodtimes_ds in place by calling mark_bad_times()
729+
for MET timestamps with overflow packets containing qualified final events.
730+
731+
The check for qualified events uses the coincidence_type_values from the
732+
calibration product configuration, which defines which coincidence types
733+
are considered valid for science analysis.
734+
"""
735+
logger.info("Running mark_overflow_packets culling")
736+
737+
ccsds_indices = l1b_de["ccsds_index"].values
738+
coincidence_types = l1b_de["coincidence_type"].values
739+
event_mets = l1b_de["event_met"].values
740+
741+
if len(ccsds_indices) == 0:
742+
logger.info("No events in L1B DE data")
743+
return
744+
745+
# Maximum number of DEs per packet
746+
max_des_per_packet = 664
747+
748+
# Count events per packet using bincount
749+
# bincount[i] = number of events with ccsds_index == i
750+
packet_event_counts = np.bincount(ccsds_indices)
751+
752+
# Find packets that are full (have exactly 664 events)
753+
full_packet_indices = np.nonzero(packet_event_counts == max_des_per_packet)[0]
754+
755+
if len(full_packet_indices) == 0:
756+
logger.info("No full packets found")
757+
return
758+
759+
# Use DEBUG level for per-packet logging if more than 10 full packets
760+
log_per_packet = logger.info if len(full_packet_indices) <= 10 else logger.debug
761+
762+
# Build set of all valid coincidence types from calibration products
763+
all_valid_coin_types = set()
764+
for coin_types in config_df["coincidence_type_values"]:
765+
all_valid_coin_types.update(coin_types)
766+
767+
# Find the last event index for each packet (vectorized)
768+
# We need to find, for each full packet, the index of its final event.
769+
# Since events within a packet appear consecutively in the array, the
770+
# "last" event for packet P is the event with the largest array index
771+
# where ccsds_indices == P.
772+
#
773+
# We use np.maximum.at to efficiently compute this:
774+
# - last_event_per_packet[P] will hold the max event index for packet P
775+
# - np.maximum.at updates last_event_per_packet[ccsds_indices[i]] with
776+
# event_indices[i] if it's larger than the current value
777+
# - After processing all events, last_event_per_packet[P] contains the
778+
# index of the last event belonging to packet P
779+
max_packet_idx = int(np.max(ccsds_indices))
780+
last_event_per_packet = np.full(max_packet_idx + 1, -1, dtype=np.intp)
781+
event_indices = np.arange(len(ccsds_indices))
782+
np.maximum.at(last_event_per_packet, ccsds_indices, event_indices)
783+
784+
# Get the final event indices for full packets
785+
final_event_indices = last_event_per_packet[full_packet_indices]
786+
787+
# Get coincidence types for final events
788+
final_coin_types = coincidence_types[final_event_indices]
789+
790+
# Log each full packet
791+
for i, packet_idx in enumerate(full_packet_indices):
792+
log_per_packet(
793+
f"Packet {packet_idx} is full with final event "
794+
f"(coincidence_type={final_coin_types[i]})"
795+
)
796+
797+
# Check which final events are qualified (in a calibration product)
798+
qualified_mask = np.isin(final_coin_types, list(all_valid_coin_types))
799+
800+
# Get METs for qualified packets
801+
mets_to_cull = event_mets[final_event_indices[qualified_mask]]
802+
803+
# Mark all identified times as bad (all spin bins)
804+
if len(mets_to_cull) > 0:
805+
goodtimes_ds.goodtimes.mark_bad_times(met=mets_to_cull, cull=cull_code)
806+
807+
logger.info(
808+
f"Found {len(full_packet_indices)} full packet(s), "
809+
f"dropped {len(mets_to_cull)} 8-spin period(s) due to overflow packets"
810+
)

imap_processing/tests/hi/test_hi_goodtimes.py

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Test coverage for imap_processing.hi.hi_goodtimes.py"""
22

33
import numpy as np
4+
import pandas as pd
45
import pytest
56
import xarray as xr
67

@@ -10,6 +11,7 @@
1011
create_goodtimes_dataset,
1112
mark_drf_times,
1213
mark_incomplete_spin_sets,
14+
mark_overflow_packets,
1315
)
1416

1517

@@ -1218,3 +1220,217 @@ def test_mark_drf_times_transition_at_end(self):
12181220
n_culled = np.sum(gt["cull_flags"].values[:, 0] == CullCode.LOOSE)
12191221
assert n_culled > 0 # Some should be culled
12201222
assert n_culled <= 31 # But not all (only last ~30 minutes)
1223+
1224+
1225+
class TestMarkOverflowPackets:
1226+
"""Test suite for mark_overflow_packets function."""
1227+
1228+
@pytest.fixture
1229+
def mock_config_df(self):
1230+
"""Create a mock calibration product configuration DataFrame."""
1231+
# Create a minimal config with coincidence types
1232+
# ABC1C2 = 15, ABC1 = 14, AB = 12
1233+
data = {
1234+
"coincidence_type_list": [("ABC1C2", "ABC1"), ("AB",)],
1235+
"tof_ab_low": [0, 0],
1236+
"tof_ab_high": [100, 100],
1237+
"tof_ac1_low": [0, 0],
1238+
"tof_ac1_high": [100, 100],
1239+
"tof_bc1_low": [-50, -50],
1240+
"tof_bc1_high": [50, 50],
1241+
"tof_c1c2_low": [0, 0],
1242+
"tof_c1c2_high": [100, 100],
1243+
}
1244+
df = pd.DataFrame(
1245+
data,
1246+
index=pd.MultiIndex.from_tuples(
1247+
[(1, 1), (2, 1)], names=["calibration_prod", "esa_energy_step"]
1248+
),
1249+
)
1250+
# Add coincidence_type_values column (converted from strings to ints)
1251+
# ABC1C2=15, ABC1=14, AB=12
1252+
df["coincidence_type_values"] = [(15, 14), (12,)]
1253+
return df
1254+
1255+
@pytest.fixture
1256+
def mock_goodtimes(self):
1257+
"""Create a mock goodtimes dataset."""
1258+
met_values = np.arange(1000.0, 1100.0, 10.0)
1259+
return xr.Dataset(
1260+
{
1261+
"cull_flags": xr.DataArray(
1262+
np.zeros((len(met_values), 90), dtype=np.uint8),
1263+
dims=["met", "spin_bin"],
1264+
),
1265+
"esa_step": xr.DataArray(
1266+
np.ones(len(met_values), dtype=np.uint8), dims=["met"]
1267+
),
1268+
},
1269+
coords={"met": met_values, "spin_bin": np.arange(90)},
1270+
attrs={"sensor": "Hi45", "pointing": 1},
1271+
)
1272+
1273+
def test_no_full_packets(self, mock_goodtimes, mock_config_df):
1274+
"""Test that no culling occurs when no packets are full."""
1275+
# Create L1B DE with packets having < 664 events
1276+
n_events = 100
1277+
l1b_de = xr.Dataset(
1278+
{
1279+
"ccsds_index": (["event_met"], np.zeros(n_events, dtype=np.uint16)),
1280+
"coincidence_type": (
1281+
["event_met"],
1282+
np.full(n_events, 15, dtype=np.uint8),
1283+
),
1284+
},
1285+
coords={"event_met": np.linspace(1000.0, 1010.0, n_events)},
1286+
)
1287+
1288+
mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
1289+
1290+
# No times should be culled
1291+
assert np.all(mock_goodtimes["cull_flags"].values == 0)
1292+
1293+
def test_full_packet_with_qualified_event(self, mock_goodtimes, mock_config_df):
1294+
"""Test that full packet with qualified final event is culled."""
1295+
# Create L1B DE with one packet having exactly 664 events
1296+
n_events = 664
1297+
event_mets = np.linspace(1005.0, 1006.0, n_events)
1298+
l1b_de = xr.Dataset(
1299+
{
1300+
"ccsds_index": (["event_met"], np.zeros(n_events, dtype=np.uint16)),
1301+
# Final event has coincidence_type=15 (ABC1C2), which is qualified
1302+
"coincidence_type": (
1303+
["event_met"],
1304+
np.full(n_events, 15, dtype=np.uint8),
1305+
),
1306+
},
1307+
coords={"event_met": event_mets},
1308+
)
1309+
1310+
mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
1311+
1312+
# MET ~1006 should be culled (maps to goodtimes MET 1000)
1313+
# The MET 1000 bin should have all spin bins culled
1314+
assert mock_goodtimes["cull_flags"].values[0, :].sum() == 90
1315+
1316+
def test_full_packet_with_unqualified_event(self, mock_goodtimes, mock_config_df):
1317+
"""Test that full packet with unqualified final event is NOT culled."""
1318+
# Create L1B DE with one packet having exactly 664 events
1319+
n_events = 664
1320+
event_mets = np.linspace(1005.0, 1006.0, n_events)
1321+
l1b_de = xr.Dataset(
1322+
{
1323+
"ccsds_index": (["event_met"], np.zeros(n_events, dtype=np.uint16)),
1324+
# Final event has coincidence_type=3 (not in any cal product)
1325+
"coincidence_type": (
1326+
["event_met"],
1327+
np.full(n_events, 3, dtype=np.uint8),
1328+
),
1329+
},
1330+
coords={"event_met": event_mets},
1331+
)
1332+
1333+
mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
1334+
1335+
# No times should be culled since final event is unqualified
1336+
assert np.all(mock_goodtimes["cull_flags"].values == 0)
1337+
1338+
def test_multiple_full_packets(self, mock_goodtimes, mock_config_df):
1339+
"""Test handling of multiple full packets."""
1340+
# Create L1B DE with two packets, each having 664 events
1341+
n_events_per_packet = 664
1342+
n_packets = 2
1343+
1344+
ccsds_indices = np.concatenate(
1345+
[np.full(n_events_per_packet, i, dtype=np.uint16) for i in range(n_packets)]
1346+
)
1347+
# Packet 0: final event qualified (15)
1348+
# Packet 1: final event unqualified (3)
1349+
coincidence_types = np.concatenate(
1350+
[
1351+
np.concatenate(
1352+
[np.full(n_events_per_packet - 1, 3, dtype=np.uint8), [15]]
1353+
),
1354+
np.full(n_events_per_packet, 3, dtype=np.uint8),
1355+
]
1356+
)
1357+
event_mets = np.concatenate(
1358+
[
1359+
np.linspace(1005.0, 1006.0, n_events_per_packet), # Packet 0
1360+
np.linspace(1015.0, 1016.0, n_events_per_packet), # Packet 1
1361+
]
1362+
)
1363+
1364+
l1b_de = xr.Dataset(
1365+
{
1366+
"ccsds_index": (["event_met"], ccsds_indices),
1367+
"coincidence_type": (["event_met"], coincidence_types),
1368+
},
1369+
coords={"event_met": event_mets},
1370+
)
1371+
1372+
mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
1373+
1374+
# Only packet 0's MET should be culled (MET 1000)
1375+
# Packet 1 has unqualified final event, so MET 1010 should not be culled
1376+
assert np.sum(mock_goodtimes["cull_flags"].values[0, :] > 0) == 90 # All bins
1377+
assert np.all(mock_goodtimes["cull_flags"].values[1, :] == 0) # MET 1010
1378+
1379+
def test_empty_de_data(self, mock_goodtimes, mock_config_df):
1380+
"""Test handling of empty L1B DE data."""
1381+
l1b_de = xr.Dataset(
1382+
{
1383+
"ccsds_index": (["event_met"], np.array([], dtype=np.uint16)),
1384+
"coincidence_type": (["event_met"], np.array([], dtype=np.uint8)),
1385+
},
1386+
coords={"event_met": np.array([])},
1387+
)
1388+
1389+
# Should not raise, just return without culling
1390+
mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
1391+
assert np.all(mock_goodtimes["cull_flags"].values == 0)
1392+
1393+
def test_custom_cull_code(self, mock_goodtimes, mock_config_df):
1394+
"""Test using a custom cull code."""
1395+
n_events = 664
1396+
event_mets = np.linspace(1005.0, 1006.0, n_events)
1397+
l1b_de = xr.Dataset(
1398+
{
1399+
"ccsds_index": (["event_met"], np.zeros(n_events, dtype=np.uint16)),
1400+
"coincidence_type": (
1401+
["event_met"],
1402+
np.concatenate([np.full(n_events - 1, 3, dtype=np.uint8), [15]]),
1403+
),
1404+
},
1405+
coords={"event_met": event_mets},
1406+
)
1407+
1408+
custom_cull = 5
1409+
mark_overflow_packets(
1410+
mock_goodtimes, l1b_de, mock_config_df, cull_code=custom_cull
1411+
)
1412+
1413+
# Check that the custom cull code was used
1414+
assert np.any(mock_goodtimes["cull_flags"].values == custom_cull)
1415+
1416+
def test_final_event_is_last_in_list(self, mock_goodtimes, mock_config_df):
1417+
"""Test that the final event is the last one in the list for the packet."""
1418+
n_events = 664
1419+
event_mets = np.linspace(1005.0, 1006.0, n_events)
1420+
1421+
# All events have unqualified type except the last one in the list
1422+
coincidence_types = np.full(n_events, 3, dtype=np.uint8)
1423+
coincidence_types[-1] = 12 # Last event is qualified
1424+
1425+
l1b_de = xr.Dataset(
1426+
{
1427+
"ccsds_index": (["event_met"], np.zeros(n_events, dtype=np.uint16)),
1428+
"coincidence_type": (["event_met"], coincidence_types),
1429+
},
1430+
coords={"event_met": event_mets},
1431+
)
1432+
1433+
mark_overflow_packets(mock_goodtimes, l1b_de, mock_config_df)
1434+
1435+
# Should be culled because the final event (last in list) is qualified
1436+
assert np.sum(mock_goodtimes["cull_flags"].values > 0) > 0

0 commit comments

Comments
 (0)