Skip to content

Commit 6ce6ce3

Browse files
authored
GLOWS L2 - Fix calibration bug (#2933)
* Return structure of calibration xarray dataset to what is needed for the combined_dataset method in the GlowsAncillaryCombiner class * Fix method that gets the calibration factor to correctly parse the structure of the calibration data and fix related tests * Address PR comments - fix filtering to find time values before or equal to the mid-epoch time and clean up comments * Remove datetime64 data type for start_time_utc data var in calibration dataset since they are strings in the real inputs * Remove sorting of time_block dimension
1 parent 924a306 commit 6ce6ce3

5 files changed

Lines changed: 83 additions & 36 deletions

File tree

imap_processing/ancillary/ancillary_dataset_combiner.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -431,17 +431,13 @@ def convert_file_to_dataset(self, filepath: str | Path) -> xr.Dataset: # noqa:
431431
lines = [line.strip() for line in f if not line.startswith("#")]
432432
identifiers = [line.split(" ", 1)[0] for line in lines]
433433
values = [float(line.split(" ", 1)[1]) for line in lines]
434-
ds = xr.Dataset(
434+
return xr.Dataset(
435435
{
436-
"cps_per_r": (["start_time_utc"], values), # floats
437-
},
438-
coords={
439-
"start_time_utc": np.array(identifiers, dtype="datetime64[s]")
440-
}, # (e.g. '2025-07-01T00:00:00')
436+
"start_time_utc": (["time_block"], identifiers),
437+
"cps_per_r": (["time_block"], values),
438+
}
441439
)
442440

443-
return ds.sortby("start_time_utc")
444-
445441
elif filename.endswith(".json"):
446442
# Handle pipeline settings JSON file using the generic read_json method
447443
return self.convert_json_to_dataset(filepath)

imap_processing/glows/l2/glows_l2_data.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -574,16 +574,39 @@ def get_calibration_factor(
574574
epoch_values : np.ndarray
575575
Array of epoch values from the L1B dataset, in TT J2000 nanoseconds.
576576
calibration_dataset : xr.Dataset
577-
Dataset containing calibration data.
577+
Dataset containing calibration data with the following structure:
578+
Coords: epoch (datetime64[s])
579+
Dims: epoch, cps_per_r_dim_0, start_time_utc_dim_0
580+
Data vars: "cps_per_r" and "start_time_utc" are 2D (epoch, *_dim_0)
581+
582+
Note: epoch and start_time_utc do not necessarily match in size or
583+
values
584+
- epoch contains timestamps in the calibration data up to a defined
585+
day buffer and start_time_utc are the timestamps for all the
586+
calibration data entries.
587+
- epoch is used for selecting the time block, and start_time_utc is
588+
used for selecting the calibration value within that block.
578589
579590
Returns
580591
-------
581592
float
582593
The calibration factor needed to compute flux in Rayleigh units.
583594
"""
584-
# Use the midpoint epoch for the day
595+
# Use the midpoint epoch for the observation day
585596
mid_idx = len(epoch_values) // 2
586597
mid_epoch_utc = et_to_datetime64(ttj2000ns_to_et(epoch_values[mid_idx].item()))
587-
return calibration_dataset.sel(start_time_utc=mid_epoch_utc, method="pad")[
588-
"cps_per_r"
589-
].data.item()
598+
599+
# Select calibration data before or equal to mid_epoch_utc using "pad" to find
600+
# the nearest preceding entry in the calibration dataset's epoch
601+
# coordinate which is in UTC datetime64 format.
602+
cal_at_epoch = calibration_dataset.sel(epoch=mid_epoch_utc, method="pad")
603+
604+
# start_time_utc is a data variable with its own index dimension.
605+
# Use searchsorted to find the last entry whose start_time_utc <= mid_epoch_utc.
606+
start_times = np.array(
607+
cal_at_epoch["start_time_utc"].values, dtype="datetime64[ns]"
608+
)
609+
nearest_idx = np.searchsorted(start_times, mid_epoch_utc, side="right") - 1
610+
611+
# Select the calibration value at the nearest index.
612+
return float(cal_at_epoch["cps_per_r"].isel(cps_per_r_dim_0=nearest_idx))

imap_processing/tests/ancillary/test_ancillary_dataset_combiner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ def test_glows_l2_calibration_combiner(tmp_path):
324324
combiner = GlowsAncillaryCombiner([], "20251115")
325325
dataset = combiner.convert_file_to_dataset(file_path)
326326

327-
assert "start_time_utc" in dataset.coords
327+
assert "start_time_utc" in dataset.data_vars
328328
assert (
329329
np.diff(dataset.start_time_utc.values.astype("datetime64")) >= np.timedelta64(0)
330330
).all()

imap_processing/tests/glows/conftest.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,11 +239,35 @@ def mock_conversion_table_dict():
239239
@pytest.fixture
240240
def mock_calibration_dataset():
241241
"""Create a mock CalibrationDataset object for testing."""
242+
243+
# Both cps_per_r and start_time_utc are 2D: (epoch, *_dim_0).
242244
return xr.Dataset(
243-
{"cps_per_r": xr.DataArray([0.849, 1.020], dims=["start_time_utc"])},
245+
{
246+
"cps_per_r": xr.DataArray(
247+
[[0.849, 1.020, 1.500], [0.849, 1.020, 1.500]],
248+
dims=["epoch", "cps_per_r_dim_0"],
249+
),
250+
"start_time_utc": xr.DataArray(
251+
np.array(
252+
[
253+
[
254+
"2011-09-19T09:58:04",
255+
"2011-09-20T18:12:48",
256+
"2011-09-21T18:15:50",
257+
],
258+
[
259+
"2011-09-19T09:58:04",
260+
"2011-09-20T18:12:48",
261+
"2011-09-21T18:15:50",
262+
],
263+
],
264+
),
265+
dims=["epoch", "start_time_utc_dim_0"],
266+
),
267+
},
244268
coords={
245-
"start_time_utc": np.array(
246-
["2011-09-19T09:58:04", "2011-09-20T18:12:48"], dtype="datetime64[s]"
269+
"epoch": np.array(
270+
["2011-09-19T00:00:00", "2011-09-20T00:00:00"], dtype="datetime64[s]"
247271
)
248272
},
249273
)

imap_processing/tests/glows/test_glows_l2_data.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -92,38 +92,42 @@ def l1b_dataset():
9292

9393

9494
def test_get_calibration_factor(mock_calibration_dataset):
95-
"""Test selecting correct calibration factor."""
95+
"""Test selecting correct calibration factor.
9696
97-
# Mock calibration data:
98-
# timestamps: ["2011-09-19T09:58:04", "2011-09-20T18:12:48"]
99-
# values: [0.849, 1.020]
97+
Mock calibration data:
98+
start_time_utc (dims epoch × start_time_utc_dim_0, same per epoch):
99+
["2011-09-19T09:58:04", "2011-09-20T18:12:48", "2011-09-21T18:15:50"]
100+
cps_per_r (dims epoch × cps_per_r_dim_0, same per epoch):
101+
index 0 → 0.849, index 1 → 1.020, index 2 → 1.500
102+
"""
103+
# Case 1: The mid-epoch ('2011-09-22T10:30:55.015') falls after the
104+
# start_time_utc entries, so the last entry (index 2) is selected → 1.500.
105+
106+
# ["2011-09-22T07:45:55.015", "2011-09-22T10:30:55.015", "2011-09-22T13:15:55.015"]
107+
later_epoch = np.array([369949621199000000, 369959521199000000, 369969421199000000])
108+
assert HistogramL2.get_calibration_factor(
109+
later_epoch, mock_calibration_dataset
110+
) == pytest.approx(1.500)
100111

101-
# Case 1: The mid-epoch is after calibration timestamps,
102-
# so the last value is selected (1.020).
112+
# Case 2: The mid-epoch ('2011-09-21T00:52:15.000') falls between the 2nd and
113+
# 3rd start_time_utc entries, so the 2nd entry (index 1) is selected 1.020.
103114

104115
# ['2011-09-21T00:50:15.000', '2011-09-21T00:52:15.000', '2011-09-21T00:54:15.000']
105-
later_epoch = np.array([369838281184000000, 369838401184000000, 369838521184000000])
116+
between_epoch = np.array(
117+
[369838281184000000, 369838401184000000, 369838521184000000]
118+
)
106119
assert HistogramL2.get_calibration_factor(
107-
later_epoch, mock_calibration_dataset
120+
between_epoch, mock_calibration_dataset
108121
) == pytest.approx(1.020)
109122

110-
# Case 2: The mid-epoch is before all calibration timestamps,
111-
# so a KeyError is raised with the "pad" filter method.
123+
# Case 3: The mid-epoch is before all start_time_utc entries,
124+
# so a KeyError is raised by xarray's "pad" selection method.
112125

113126
# ['2011-09-18T19:59:08.816', '2011-09-18T20:01:08.816', '2011-09-18T20:03:08.816']
114127
early_epoch = np.array([369648015000000000, 369648135000000000, 369648255000000000])
115128
with pytest.raises(KeyError):
116129
HistogramL2.get_calibration_factor(early_epoch, mock_calibration_dataset)
117130

118-
# Case 3: The mid-epoch is between the calibration times,
119-
# so the first value is selected (0.849).
120-
121-
# '2011-09-20T16:30:15.000'
122-
between_epoch = np.array([369808281184000000])
123-
assert HistogramL2.get_calibration_factor(
124-
between_epoch, mock_calibration_dataset
125-
) == pytest.approx(0.849)
126-
127131

128132
@pytest.mark.external_kernel
129133
def test_ecliptic_coords_computation(furnish_kernels):

0 commit comments

Comments
 (0)