Skip to content

Commit be17eb7

Browse files
authored
Add Hi Goodtimes statistical filter 0 check (IMAP-Science-Operations-Center#2716)
* Add Hi Goodtimes statistical filter 0 check * Copilot feedback changes
1 parent 017eb15 commit be17eb7

2 files changed

Lines changed: 753 additions & 1 deletion

File tree

imap_processing/hi/hi_goodtimes.py

Lines changed: 271 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import pandas as pd
1010
import xarray as xr
1111

12-
from imap_processing.hi.utils import parse_sensor_number
12+
from imap_processing.hi.utils import CoincidenceBitmap, parse_sensor_number
1313

1414
logger = logging.getLogger(__name__)
1515

@@ -808,3 +808,273 @@ def mark_overflow_packets(
808808
f"Found {len(full_packet_indices)} full packet(s), "
809809
f"dropped {len(mets_to_cull)} 8-spin period(s) due to overflow packets"
810810
)
811+
812+
813+
def _get_sweep_indices(esa_step: np.ndarray) -> np.ndarray:
814+
"""
815+
Assign sweep indices to each MET based on ESA step transitions.
816+
817+
A new sweep starts when ESA step transitions from high to low
818+
(e.g., 9 -> 1), detected using np.diff().
819+
820+
Parameters
821+
----------
822+
esa_step : numpy.ndarray
823+
ESA step values for each MET (epoch dimension).
824+
825+
Returns
826+
-------
827+
sweep_indices : numpy.ndarray
828+
Sweep index for each MET. First sweep is index 0.
829+
"""
830+
if len(esa_step) == 0:
831+
return np.array([], dtype=np.int32)
832+
833+
# Find sweep boundaries where ESA step transitions from high to low
834+
esa_diff = np.diff(esa_step.astype(np.int32))
835+
# Negative diff indicates high-to-low transition (e.g., 9 -> 1 = -8)
836+
sweep_boundaries = esa_diff < 0
837+
838+
# Create sweep indices using cumsum on boundaries
839+
# Prepend False so first MET is in sweep 0
840+
sweep_indices = (
841+
np.concatenate([[False], sweep_boundaries]).cumsum().astype(np.int32)
842+
)
843+
844+
return sweep_indices
845+
846+
847+
def _add_sweep_indices(l1b_de: xr.Dataset) -> xr.Dataset:
848+
"""
849+
Add esa_sweep coordinate to the dataset based on ESA step transitions.
850+
851+
Parameters
852+
----------
853+
l1b_de : xarray.Dataset
854+
L1B Direct Event dataset.
855+
856+
Returns
857+
-------
858+
xarray.Dataset
859+
Dataset with esa_sweep coordinate added on epoch dimension.
860+
"""
861+
sweep_indices = _get_sweep_indices(l1b_de["esa_step"].values)
862+
return l1b_de.assign_coords(esa_sweep=("epoch", sweep_indices))
863+
864+
865+
def _compute_normalized_counts_per_sweep(
866+
l1b_de: xr.Dataset,
867+
tof_ab_limit_ns: int,
868+
) -> xr.Dataset:
869+
"""
870+
Compute normalized AB coincidence counts per ESA sweep and reshape dataset.
871+
872+
This function:
873+
1. Computes normalized AB coincidence counts per sweep
874+
2. Removes all data associated with the event_met coordinate
875+
3. Reshapes the dataset so esa_sweep becomes a dimension (removing epoch)
876+
4. Returns the updated dataset with all epoch-based variables
877+
878+
Parameters
879+
----------
880+
l1b_de : xarray.Dataset
881+
L1B Direct Event dataset with esa_sweep coordinate on epoch dimension.
882+
tof_ab_limit_ns : int
883+
Maximum absolute value of tof_ab in nanoseconds.
884+
885+
Returns
886+
-------
887+
xarray.Dataset
888+
Reshaped dataset with esa_sweep as a dimension containing:
889+
- normalized_count: normalized AB coincidence counts per sweep
890+
- All other variables from the input dataset (first value per sweep)
891+
"""
892+
if "esa_sweep" not in l1b_de.coords:
893+
raise ValueError("Dataset must have esa_sweep coordinate")
894+
895+
# Filter to valid AB coincidences
896+
tof_ab = l1b_de["tof_ab"]
897+
coincidence_type = l1b_de["coincidence_type"]
898+
ccsds_index = l1b_de["ccsds_index"]
899+
900+
ab_coincidence_type = CoincidenceBitmap.detector_hit_str_to_int("AB")
901+
is_valid_ab = (coincidence_type == ab_coincidence_type) & (
902+
np.abs(tof_ab) <= tof_ab_limit_ns
903+
)
904+
905+
# Map events to sweeps via ccsds_index -> esa_sweep
906+
event_epoch_idx = ccsds_index.values
907+
event_sweep_idx = l1b_de["esa_sweep"].values[event_epoch_idx]
908+
909+
# Count valid AB events per sweep
910+
n_sweeps = int(l1b_de["esa_sweep"].max().values) + 1
911+
counts_per_sweep = np.zeros(n_sweeps, dtype=np.int64)
912+
np.add.at(counts_per_sweep, event_sweep_idx[is_valid_ab.values], 1)
913+
914+
# Normalize by number of unique ESA steps
915+
n_unique_esa_steps = len(np.unique(l1b_de["esa_step"].values))
916+
normalized_counts = counts_per_sweep / n_unique_esa_steps
917+
918+
# Remove all variables that depend on event_met dimension
919+
ds = l1b_de.drop_dims("event_met", errors="ignore")
920+
921+
# Set esa_sweep and esa_step as a multi-index on epoch dimension
922+
ds = ds.set_index(epoch=["esa_sweep", "esa_step"])
923+
924+
# Drop duplicates, keeping first occurrence of each (esa_sweep, esa_step) pair
925+
# This handles cases where multiple packets have the same esa_sweep and esa_step
926+
ds = ds.drop_duplicates(dim="epoch", keep="first")
927+
928+
# Unstack to make esa_sweep and esa_step into separate dimensions
929+
# This creates a 2D array with dimensions (esa_sweep, esa_step)
930+
ds_reshaped = ds.unstack("epoch")
931+
932+
# Add normalized_count as a new variable
933+
# It only has esa_sweep dimension (no esa_step variation within a sweep)
934+
ds_reshaped["normalized_count"] = xr.DataArray(
935+
normalized_counts,
936+
dims=["esa_sweep"],
937+
coords={"esa_sweep": np.arange(n_sweeps)},
938+
)
939+
940+
return ds_reshaped
941+
942+
943+
def mark_statistical_filter_0(
944+
goodtimes_ds: xr.Dataset,
945+
l1b_de_datasets: list[xr.Dataset],
946+
current_index: int,
947+
threshold_factor: float = 1.5,
948+
tof_ab_limit_ns: int = 15,
949+
cull_code: int = CullCode.LOOSE,
950+
min_pointings: int = 4,
951+
) -> None:
952+
"""
953+
Apply Statistical Filter 0 to detect drastic penetrating background changes.
954+
955+
Statistical Filter 0 from Algorithm Document Section 2.3.2.3 detects when
956+
the penetrating background rate has changed drastically, compromising
957+
background subtraction accuracy. For each ESA sweep across all input
958+
Pointings, it computes the normalized AB coincidence count (total count
959+
divided by number of ESA steps). It then marks ESA sweeps in the current
960+
Pointing where the normalized count exceeds 150% of the median.
961+
962+
Parameters
963+
----------
964+
goodtimes_ds : xarray.Dataset
965+
Goodtimes dataset for the current Pointing to update.
966+
l1b_de_datasets : list[xarray.Dataset]
967+
List of L1B DE datasets for surrounding Pointings. Typically includes
968+
current plus preceding and following Pointings
969+
(e.g., [P-3, P-2, P-1, P(current), P+1, P+2, P+3]).
970+
current_index : int
971+
Index of the current Pointing in l1b_de_datasets.
972+
threshold_factor : float, optional
973+
Multiplier for median comparison. Default is 1.5 (150% of median).
974+
tof_ab_limit_ns : int, optional
975+
Maximum |tof_ab| in nanoseconds for AB coincidences. Default is 15.
976+
cull_code : int, optional
977+
Cull code to use for marking bad times. Default is CullCode.LOOSE.
978+
min_pointings : int, optional
979+
Minimum number of Pointings required. Default is 4.
980+
981+
Raises
982+
------
983+
ValueError
984+
If current_index is out of range or if fewer than min_pointings
985+
datasets are provided.
986+
987+
Notes
988+
-----
989+
This function modifies goodtimes_ds in place. Only ESA sweeps in the
990+
current Pointing where the normalized count exceeds `threshold_factor *
991+
median` are marked as bad. Other sweeps remain unaffected.
992+
993+
Algorithm:
994+
1. For each complete ESA sweep across all Pointings, count AB coincidences
995+
where |tof_ab| <= 15ns and divide by number of ESA steps
996+
2. Calculate median of all normalized sweep counts
997+
3. For each sweep in current Pointing, mark all METs in that sweep as bad
998+
if normalized count > threshold_factor * median
999+
"""
1000+
logger.info("Running mark_statistical_filter_0 culling")
1001+
1002+
# Validate current_index is in range
1003+
if current_index < 0 or current_index >= len(l1b_de_datasets):
1004+
raise ValueError(
1005+
f"current_index {current_index} out of range for list of "
1006+
f"length {len(l1b_de_datasets)}"
1007+
)
1008+
1009+
# Validate that we have the minimum number of datasets
1010+
if len(l1b_de_datasets) < min_pointings:
1011+
raise ValueError(
1012+
f"At least {min_pointings} valid Pointings required, "
1013+
f"got {len(l1b_de_datasets)}"
1014+
)
1015+
1016+
# Add esa_sweep coordinate, reshape, and compute normalized_count for each dataset
1017+
all_normalized_counts: list[np.ndarray] = []
1018+
reshaped_datasets: dict[int, xr.Dataset] = {}
1019+
1020+
for i, l1b_de in enumerate(l1b_de_datasets):
1021+
# Add esa_sweep coordinate
1022+
l1b_de_with_sweep = _add_sweep_indices(l1b_de)
1023+
1024+
# Compute normalized counts and reshape dataset. This removes epoch
1025+
# dimension, adds esa_sweep dimension, and includes normalized_count.
1026+
reshaped_ds = _compute_normalized_counts_per_sweep(
1027+
l1b_de_with_sweep, tof_ab_limit_ns
1028+
)
1029+
1030+
# Store reshaped dataset and normalized counts
1031+
reshaped_datasets[i] = reshaped_ds
1032+
all_normalized_counts.append(reshaped_ds["normalized_count"].values)
1033+
1034+
offset = i - current_index
1035+
logger.debug(
1036+
f"Pointing {offset:+d}: "
1037+
f"{len(reshaped_ds['normalized_count'])} complete ESA sweeps"
1038+
)
1039+
1040+
current_ds = reshaped_datasets[current_index]
1041+
1042+
# Calculate median from all sweep counts
1043+
all_counts = np.concatenate(all_normalized_counts)
1044+
median_count = float(np.median(all_counts))
1045+
threshold = median_count * threshold_factor
1046+
1047+
logger.info(
1048+
f"Statistical Filter 0: median={median_count:.2f}, "
1049+
f"threshold={threshold:.2f} ({len(all_counts)} sweeps)"
1050+
)
1051+
1052+
# Find and mark bad sweeps in current dataset
1053+
bad_sweep_mask = current_ds["normalized_count"] > threshold
1054+
n_bad_sweeps = int(bad_sweep_mask.sum())
1055+
1056+
# Get MET time ranges for bad sweeps using xarray boolean indexing
1057+
# Select only the bad sweeps using the mask
1058+
bad_sweeps_ds = current_ds.isel(esa_sweep=bad_sweep_mask)
1059+
1060+
# For each bad sweep, mark the time range from first to last ccsds_met
1061+
for sweep_idx in range(len(bad_sweeps_ds["esa_sweep"])):
1062+
# Get all ccsds_met values for this sweep across all esa_steps
1063+
sweep_mets = bad_sweeps_ds["ccsds_met"].isel(esa_sweep=sweep_idx).values
1064+
1065+
# Get min and max MET values, ignoring NaNs
1066+
met_start: float = float(np.nanmin(sweep_mets))
1067+
met_end: float = float(np.nanmax(sweep_mets))
1068+
1069+
# Mark the entire time range for this sweep as bad
1070+
goodtimes_ds.goodtimes.mark_bad_times(
1071+
met=(met_start, met_end), bins=None, cull=cull_code
1072+
)
1073+
1074+
if n_bad_sweeps > 0:
1075+
logger.info(
1076+
f"Statistical Filter 0: Marked {n_bad_sweeps}/"
1077+
f"{len(current_ds['normalized_count'])} ESA sweeps as bad"
1078+
)
1079+
else:
1080+
logger.info("No bad ESA sweeps identified by Statistical Filter 0")

0 commit comments

Comments
 (0)