Skip to content

Commit 7ee85a0

Browse files
authored
MNT/FIX: Generalize Lo resweep handling (#2531)
This is needed by other data products too, so we can pull out the ancillary file handling to get the resweep values into a helper function. Change from for-loops and setting individual elements to numpy vectorization for resweeping. Fix exposure time calculation that was incorrectly assigning to many ESA levels during a sweep. We should only have 7 total values in the exposure factor calculation of esa_sweeps.
1 parent 19f1d25 commit 7ee85a0

2 files changed

Lines changed: 103 additions & 101 deletions

File tree

imap_processing/lo/l1b/lo_l1b.py

Lines changed: 95 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,112 +1450,26 @@ def resweep_histogram_data(
14501450
3D array of exposure factors (epoch, azimuth, esa_step) indicating how many
14511451
ESA steps were reswept during resweeping.
14521452
"""
1453-
# The sweep table contains the mapping of dates to the LUT table which shows how
1454-
# the ESA steps should be reswept.
1455-
sweep_df = lo_ancillary.read_ancillary_file(
1456-
next(str(s) for s in anc_dependencies if "sweep-table" in str(s))
1457-
)
1458-
lut_df = lo_ancillary.read_ancillary_file(
1459-
next(str(s) for s in anc_dependencies if "esa-mode-lut" in str(s))
1460-
)
1461-
1462-
# Get the time information to compare the epochs to the sweep table dates
1463-
sweep_dates = sweep_df["Date"].astype(str)
14641453
epochs = l1b_histrates["epoch"].values
1465-
epoch_utc = et_to_utc(ttj2000ns_to_et(epochs))
1454+
energy_mapping = _get_esa_level_indices(epochs, anc_dependencies=anc_dependencies)
14661455

14671456
# initialize the reswept counts arrays
14681457
h_counts_reswept = np.zeros_like(l1b_histrates["h_counts"].values)
14691458
o_counts_reswept = np.zeros_like(l1b_histrates["o_counts"].values)
1459+
exposure_factor = np.zeros_like(h_counts_reswept, dtype=int)
14701460

1471-
# Get the number of azimuth bins from the l1b_histrates dataset
1472-
num_azimuth = l1b_histrates.sizes["spin_bin_6"]
1473-
# initialize exposure factor to 1 as this will be used to scale (multiply)
1474-
# the exposure time later
1475-
exposure_factor = np.full(
1476-
(len(epochs), l1b_histrates.sizes["esa_step"], num_azimuth), 1, dtype=int
1461+
# Place potentially multiple esa_steps into the same energy level bin
1462+
np.add.at(
1463+
h_counts_reswept,
1464+
(slice(None), energy_mapping, slice(None)),
1465+
l1b_histrates["h_counts"].values,
14771466
)
1478-
1479-
for epoch_idx, epoch in enumerate(epoch_utc):
1480-
# Get only the date portion of the epoch string for comparison with the
1481-
# sweep table
1482-
epoch_date_only = epoch.split("T")[0]
1483-
1484-
# if the epoch dat is not in the sweep table, raise an error
1485-
if epoch_date_only not in sweep_dates.values:
1486-
raise ValueError(
1487-
f"No sweep table entry found for date {epoch} at epoch idx {epoch_idx}"
1488-
)
1489-
1490-
# Get the matching sweep table entry for the epoch date and its LUT table index
1491-
matching_sweep = sweep_df[sweep_dates == epoch_date_only]
1492-
unique_lut_tables = matching_sweep["LUT_table"].unique()
1493-
1494-
# There should only be one unique LUT table for each date
1495-
if len(unique_lut_tables) != 1:
1496-
raise ValueError(
1497-
f"Expected exactly 1 unique LUT_table value for date {epoch_date_only},"
1498-
f" but found {len(unique_lut_tables)}: {unique_lut_tables}"
1499-
)
1500-
1501-
# Get the LUT entries for the identified LUT table index
1502-
lut_table_idx = unique_lut_tables[0]
1503-
lut_entries = lut_df[lut_df["Tbl_Idx"] == lut_table_idx].copy()
1504-
1505-
# If there are no LUT entries for the identified LUT table, log a warning
1506-
# and skip resweeping for this epoch
1507-
if len(lut_entries) == 0:
1508-
logger.warning(f"No LUT entries found for table index {lut_table_idx}")
1509-
h_counts_reswept[epoch_idx] = l1b_histrates["h_counts"].values[epoch_idx]
1510-
o_counts_reswept[epoch_idx] = l1b_histrates["o_counts"].values[epoch_idx]
1511-
continue
1512-
1513-
# Sort the LUT entries by E-Step_Idx to ensure correct mapping order
1514-
lut_entries = lut_entries.sort_values("E-Step_Idx")
1515-
1516-
# Create a mapping of original ESA step index to true ESA step
1517-
energy_step_mapping = {}
1518-
# Loop through the LUT entries and populate the mapping
1519-
for _, row in lut_entries.iterrows():
1520-
# Original ESA step index is 1-based, convert to 0-based
1521-
esa_idx = int(row["E-Step_Idx"]) - 1
1522-
# True ESA step is 1-based
1523-
true_esa_step = int(row["E-Step_lvl"])
1524-
# Populate the mapping
1525-
energy_step_mapping[esa_idx] = true_esa_step
1526-
1527-
# TODO: Change all instances of azimuth to spin bin
1528-
# Resweep the counts for each spin bin using the energy step mapping
1529-
for az_idx in range(num_azimuth):
1530-
h_original = l1b_histrates["h_counts"].values[epoch_idx, :, az_idx]
1531-
o_original = l1b_histrates["o_counts"].values[epoch_idx, :, az_idx]
1532-
1533-
# Loop through the original ESA step indices and map to the true ESA steps
1534-
for orig_idx, true_esa_step in energy_step_mapping.items():
1535-
# Check that the original index and true ESA step are within bounds
1536-
if orig_idx < len(h_original) and 1 <= true_esa_step <= 7:
1537-
# Resweep the counts into the true ESA step
1538-
# (convert to 0-based index)
1539-
reswept_idx = true_esa_step - 1
1540-
h_counts_reswept[epoch_idx, reswept_idx, az_idx] += h_original[
1541-
orig_idx
1542-
]
1543-
o_counts_reswept[epoch_idx, reswept_idx, az_idx] += o_original[
1544-
orig_idx
1545-
]
1546-
# If a reswept was needed for this index, increment the exposure
1547-
# factor to so the exposure time can be scaled accordingly
1548-
if orig_idx != reswept_idx:
1549-
exposure_factor[epoch_idx, reswept_idx, az_idx] += 1
1550-
1551-
else:
1552-
logger.warning(
1553-
f"Original ESA index {orig_idx} or "
1554-
f"true ESA step {true_esa_step}"
1555-
f" out of bounds at epoch idx {epoch_idx}, "
1556-
f"spin bin idx {az_idx}"
1557-
)
1558-
1467+
np.add.at(
1468+
o_counts_reswept,
1469+
(slice(None), energy_mapping, slice(None)),
1470+
l1b_histrates["o_counts"].values,
1471+
)
1472+
np.add.at(exposure_factor, (slice(None), energy_mapping, slice(None)), 1)
15591473
l1b_histrates["h_counts"].values = h_counts_reswept
15601474
l1b_histrates["o_counts"].values = o_counts_reswept
15611475
l1b_histrates.attrs["energy_step_correction"] = (
@@ -1650,3 +1564,85 @@ def calculate_histogram_rates(
16501564
)
16511565

16521566
return l1b_histrates
1567+
1568+
1569+
def _get_esa_level_indices(epochs: np.ndarray, anc_dependencies: list) -> np.ndarray:
1570+
"""
1571+
Get the ESA level indices (reswept indices) for the given epochs.
1572+
1573+
This will always return a 7-element array mapping the original ESA step
1574+
indices (0-6) to the true ESA levels after resweeping. i.e. we could have
1575+
taken two measurements in a row at the same energy level, so the mapping
1576+
would be [0, 0, 1, 1, 2, 2, 3] potentially. The nominal stepping is
1577+
[0, 1, 2, 3, 4, 5, 6].
1578+
1579+
Parameters
1580+
----------
1581+
epochs : np.ndarray
1582+
Array of epochs in TTJ2000ns format.
1583+
anc_dependencies : list
1584+
List of ancillary file paths.
1585+
1586+
Returns
1587+
-------
1588+
esa_level_indices : np.ndarray
1589+
Array of ESA level indices for each epoch.
1590+
"""
1591+
# The sweep table contains the mapping of dates to the LUT table which shows how
1592+
# the ESA steps should be reswept.
1593+
sweep_df = lo_ancillary.read_ancillary_file(
1594+
next(str(s) for s in anc_dependencies if "sweep-table" in str(s))
1595+
)
1596+
lut_df = lo_ancillary.read_ancillary_file(
1597+
next(str(s) for s in anc_dependencies if "esa-mode-lut" in str(s))
1598+
)
1599+
1600+
# Get the time information to compare the epochs to the sweep table dates
1601+
sweep_dates = sweep_df["Date"].astype(str)
1602+
# Get only the date portion of the epoch string for comparison with the sweep table
1603+
# NOTE: We only use the first epoch here since the LUT mapping should be
1604+
# constant through the entire dataset
1605+
epoch_date_only = et_to_utc(ttj2000ns_to_et(epochs[0])).split("T")[0]
1606+
1607+
# Get the matching sweep table entry for the epoch date and its LUT table index
1608+
matching_sweep = sweep_df[sweep_dates == epoch_date_only]
1609+
# if the epoch date is not in the sweep table, raise an error
1610+
if len(matching_sweep) == 0:
1611+
raise ValueError(f"No sweep table entry found for date {epoch_date_only}")
1612+
1613+
unique_lut_tables = matching_sweep["LUT_table"].unique()
1614+
1615+
# There should only be one unique LUT table for each date
1616+
if len(unique_lut_tables) != 1:
1617+
raise ValueError(
1618+
f"Expected exactly 1 unique LUT_table value for date {epoch_date_only},"
1619+
f" but found {len(unique_lut_tables)}: {unique_lut_tables}"
1620+
)
1621+
1622+
# Get the LUT entries for the identified LUT index
1623+
lut_table_idx = unique_lut_tables[0]
1624+
lut_entries = lut_df[lut_df["Tbl_Idx"] == lut_table_idx].copy()
1625+
1626+
# If there are no LUT entries for the identified LUT table, log a warning
1627+
# and return the default mapping
1628+
if len(lut_entries) == 0:
1629+
logger.warning(f"No LUT entries found for table index {lut_table_idx}")
1630+
return np.arange(7)
1631+
1632+
# Sort the LUT entries by E-Step_Idx to ensure correct mapping order
1633+
lut_entries = lut_entries.sort_values("E-Step_Idx")
1634+
1635+
# TODO: It seems like this is also given to us in the main sweep table
1636+
# Can we just take the last 7 entries of the sweep table for that
1637+
# date and use those values instead of this extra work with the
1638+
# separate LUT ancillary file?
1639+
energy_step_mapping = np.zeros(7, dtype=int)
1640+
# Loop through the LUT entries and populate the mapping
1641+
for _, row in lut_entries.iterrows():
1642+
# Original ESA step index is 1-based, convert to 0-based
1643+
esa_idx = int(row["E-Step_Idx"]) - 1
1644+
true_esa_step = int(row["E-Step_lvl"]) - 1
1645+
# Populate the mapping
1646+
energy_step_mapping[esa_idx] = true_esa_step
1647+
1648+
return energy_step_mapping

imap_processing/tests/lo/test_lo_l1b.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -811,8 +811,12 @@ def test_resweep_histogram_success(anc_dependencies):
811811
"spin_bin_6": np.arange(60),
812812
},
813813
)
814+
# Exposure factor is how many times we saw each ESA step during the sweep
815+
# The first ESA level was repeated twice during the sweep and the second
816+
# ESA level was skipped entirely. [1, 1, 3, 4, 5, 6, 7]
814817
exposure_factor_expected = np.full((2, 7, 60), 1)
815818
exposure_factor_expected[:, 0, :] = 2
819+
exposure_factor_expected[:, 1, :] = 0
816820

817821
l1b_histrate.h_counts[0, 0, 0] = 5
818822
l1b_histrate.h_counts[0, 1, 0] = 10
@@ -835,6 +839,9 @@ def test_resweep_histogram_success(anc_dependencies):
835839
assert l1b_histrates.o_counts[1, 2, 0] == 4
836840

837841
assert np.array_equal(exposure_factor, exposure_factor_expected)
842+
# Sanity check on making sure we get 7 ESA levels for each sweep
843+
# regardless of which bin they are in
844+
np.testing.assert_equal(np.sum(exposure_factor, axis=1), 7)
838845

839846

840847
def test_resweep_histogram_no_date(anc_dependencies):
@@ -860,8 +867,7 @@ def test_resweep_histogram_no_date(anc_dependencies):
860867

861868
with pytest.raises(
862869
ValueError,
863-
match="No sweep table entry found for date "
864-
"2025-04-25T02:00:00.000 at epoch idx 0",
870+
match="No sweep table entry found for date 2025-04-25",
865871
):
866872
resweep_histogram_data(l1b_histrate, anc_dependencies)
867873

0 commit comments

Comments
 (0)