Skip to content

Commit 3d3ef90

Browse files
authored
Modify build_cdf_dataset to automatically drop variables that don't have attributes defined. (IMAP-Science-Operations-Center#2326)
1 parent 1449239 commit 3d3ef90

2 files changed

Lines changed: 58 additions & 30 deletions

File tree

imap_processing/ena_maps/ena_maps.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,12 +1265,13 @@ def to_dataset(self) -> xr.Dataset:
12651265
coords={**self.non_spatial_coords, **self.spatial_coords},
12661266
)
12671267

1268-
def build_cdf_dataset(
1268+
def build_cdf_dataset( # noqa: PLR0912
12691269
self,
12701270
instrument: str,
12711271
level: str,
12721272
descriptor: str,
12731273
sensor: str | None = None,
1274+
drop_vars_with_no_attributes: bool = True,
12741275
) -> xr.Dataset:
12751276
"""
12761277
Format the data into a xarray.Dataset and add required CDF variables.
@@ -1285,6 +1286,12 @@ def build_cdf_dataset(
12851286
Descriptor for filename.
12861287
sensor : str, optional
12871288
Sensor number "45" or "90".
1289+
drop_vars_with_no_attributes : bool, optional
1290+
Default behavior is to drop any dataset variables that don't have
1291+
attributes defined in the CDF attribute manager. This ensures that
1292+
the output CDF doesn't have any of the intermedeiate variables left
1293+
over from computations. Sometimes, it is useful to output the
1294+
intermedeiate variables. To do so, set this to False.
12881295
12891296
Returns
12901297
-------
@@ -1388,13 +1395,18 @@ def build_cdf_dataset(
13881395
variable_name=name,
13891396
check_schema=check_schema,
13901397
)
1391-
except KeyError as e:
1392-
raise KeyError(
1393-
f"Attributes for variable {name} not found in "
1394-
f"loaded variable attributes."
1395-
) from e
1396-
1397-
cdf_ds[name].attrs.update(var_attrs)
1398+
cdf_ds[name].attrs.update(var_attrs)
1399+
except KeyError:
1400+
if drop_vars_with_no_attributes:
1401+
logger.debug(
1402+
f"Dropping variable '{name}' that has no attributes defined."
1403+
)
1404+
cdf_ds = cdf_ds.drop_vars(name)
1405+
else:
1406+
logger.debug(
1407+
f"Variable '{name}' has no attributes defined. It will "
1408+
f"be included in the output dataset with no attributes."
1409+
)
13981410

13991411
# Manually adjust epoch attributes
14001412
cdf_ds["epoch"].attrs.update(

imap_processing/tests/ena_maps/test_ena_maps.py

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -885,12 +885,20 @@ def mock_data_for_build_cdf_dataset(self):
885885
name="ena_intesity",
886886
dims=[k for k in coord_sizes.keys()][:-1],
887887
)
888-
# Add one variable that is expected to get removed
889-
mock_dataset["foo_var"] = xr.DataArray(
888+
# Add one variable that is expected to get removed because it has a
889+
# dimension that is not in the list of `coord_names`
890+
mock_dataset["extra_dimension_var"] = xr.DataArray(
890891
np.ones(tuple(s for s in coord_sizes.values())),
891-
name="foo_var",
892+
name="extra_dimension_var",
892893
dims=[k for k in coord_sizes.keys()],
893894
)
895+
# Add a variable that is expected to get removed because it has no
896+
# attributes defined
897+
mock_dataset["no_attr_var"] = xr.DataArray(
898+
np.ones(tuple(s for s in coord_sizes.values())[:-1]),
899+
name="no_attr_var",
900+
dims=[k for k in coord_sizes.keys()][:-1],
901+
)
894902
# Add required energy delta variables
895903
for side in ["minus", "plus"]:
896904
mock_dataset[f"{CoordNames.ENERGY_L2.value}_delta_{side}"] = xr.DataArray(
@@ -910,11 +918,12 @@ def test_build_cdf_dataset(self, mock_to_dataset, mock_data_for_build_cdf_datase
910918
skymap.min_epoch = 10
911919
skymap.max_epoch = 15
912920
cdf_dataset = skymap.build_cdf_dataset(
913-
"hi", "l2", "foo_descriptor", sensor="45"
921+
"hi", "l2", "foo_descriptor", sensor="45", drop_vars_with_no_attributes=True
914922
)
915923

916-
# Check that expected var gets removed
917-
assert "foo_var" not in cdf_dataset
924+
# Check that expected vars gets removed
925+
assert "extra_dimension_var" not in cdf_dataset
926+
assert "no_attr_var" not in cdf_dataset
918927
# Check the epoch values
919928
assert CoordNames.TIME.value in cdf_dataset
920929
assert cdf_dataset[CoordNames.TIME.value].values[0] == skymap.min_epoch
@@ -961,30 +970,15 @@ def test_build_cdf_dataset_key_error(
961970
):
962971
"""Test build_cdf_dataset raising a KeyError."""
963972
mock_dataset = mock_data_for_build_cdf_dataset
964-
# Add ena intensity variable
965-
mock_dataset["no_attrs_var"] = xr.DataArray(
966-
np.ones(
967-
tuple(s for s in mock_data_for_build_cdf_dataset.coords.sizes.values())[
968-
:-1
969-
]
970-
),
971-
name="no_attrs_var",
972-
dims=[k for k in mock_data_for_build_cdf_dataset.coords.sizes.keys()][:-1],
973-
)
974973
mock_to_dataset.return_value = mock_dataset
975974

976975
skymap = ena_maps.RectangularSkyMap(6, geometry.SpiceFrame.ECLIPJ2000)
977976
skymap.min_epoch = 10
978977
skymap.max_epoch = 15
979-
# Test that variables with no attributes defined raise KeyError
980-
with pytest.raises(
981-
KeyError, match="Attributes for variable no_attrs_var not found"
982-
):
983-
_ = skymap.build_cdf_dataset("hi", "l2", "foo_descriptor", sensor="45")
984978

985979
# Test that missing energy delta variable raise KeyError
986980
# Test for missing energy_delta_plus
987-
mock_dataset = mock_dataset.drop(["no_attrs_var", "energy_delta_plus"])
981+
mock_dataset = mock_dataset.drop(["energy_delta_plus"])
988982
mock_to_dataset.return_value = mock_dataset
989983
with pytest.raises(
990984
KeyError,
@@ -1000,6 +994,28 @@ def test_build_cdf_dataset_key_error(
1000994
):
1001995
_ = skymap.build_cdf_dataset("hi", "l2", "foo_descriptor", sensor="45")
1002996

997+
@mock.patch("imap_processing.ena_maps.ena_maps.RectangularSkyMap.to_dataset")
998+
def test_keep_vars_with_no_attributes(
999+
self, mock_to_dataset, mock_data_for_build_cdf_dataset
1000+
):
1001+
"""Test that variables with no attributes are kept when desired."""
1002+
# Set up the mock
1003+
mock_to_dataset.return_value = mock_data_for_build_cdf_dataset
1004+
1005+
skymap = ena_maps.RectangularSkyMap(6, geometry.SpiceFrame.ECLIPJ2000)
1006+
skymap.min_epoch = 10
1007+
skymap.max_epoch = 15
1008+
cdf_dataset = skymap.build_cdf_dataset(
1009+
"hi",
1010+
"l2",
1011+
"foo_descriptor",
1012+
sensor="45",
1013+
drop_vars_with_no_attributes=False,
1014+
)
1015+
1016+
# Check that expected var was not removed
1017+
assert "no_attr_var" in cdf_dataset
1018+
10031019

10041020
class TestHealpixSkyMap:
10051021
@pytest.fixture(autouse=True)

0 commit comments

Comments
 (0)