Populate map principal data CATDESC from descriptor (IMAP-Science-Operations-Center#2712)

jtniehof · web-flow · commit 6dd63c051003 · 2026-02-24T09:37:40.000-07:00
* ENA: Add to_catdesc to create CATDESC from map descriptor

* ENA: populate principal data CATDESC from descriptor

* ENA: Change CATDESC based on feedback from Dan

 * Put species before the quantity
 * Spell out Combined (and space out from instrument name)
 * Special-case ISN [species] Rate

* Address simple PR comments for map CATDESC

* ENA: Use descriptor to find principal data variable; populate CATDESC for Ultra

* Remove redundant test for Ultra L2 rectangular map from descriptor
diff --git a/imap_processing/ena_maps/ena_maps.py b/imap_processing/ena_maps/ena_maps.py
@@ -16,7 +16,7 @@
 
 from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
 from imap_processing.cdf.utils import load_cdf
-from imap_processing.ena_maps.utils import map_utils, spatial_utils
+from imap_processing.ena_maps.utils import map_utils, naming, spatial_utils
 
 # The coordinate names can vary between L1C and L2 data (e.g. azimuth vs longitude),
 # so we define an enum to handle the coordinate names.
@@ -1421,6 +1421,12 @@ def build_cdf_dataset(  # noqa: PLR0912
             {"DELTA_PLUS_VAR": "epoch_delta", "BIN_LOCATION": 0}
         )
 
+        # And CATDESC for principal data
+        md = naming.MapDescriptor.from_string(descriptor)
+        principal_data = md.principal_data_var
+        if principal_data in cdf_ds:
+            cdf_ds[principal_data].attrs["CATDESC"] = md.to_catdesc()
+
         return cdf_ds
 
     def to_properties_dict(self) -> dict:
diff --git a/imap_processing/ena_maps/utils/naming.py b/imap_processing/ena_maps/utils/naming.py
@@ -173,6 +173,87 @@ def to_string(self) -> str:
             ]
         )
 
+    def to_catdesc(self) -> str:
+        """
+        Convert the MapDescriptor instance to a human-readable CATDESC string.
+
+        Returns
+        -------
+        str
+            Information in descriptor converted to SPDF CATDESC attribute. This
+            is normally used for plot titles and should be under about 80 characters.
+        """
+        instrument = self.instrument.name.split("_")[0]
+        if instrument not in ("IDEX", "GLOWS"):
+            instrument = instrument.title()
+        sensor = " Combined" if self.sensor == "combined" else self.sensor
+        species = "UV" if self.species == "uv" else self.species.title()
+        m = re.match(
+            r"^(drt|ena|int|isn|spx)(?:(?<=spx)\d+)?([^-_\s]*)$", self.principal_data
+        )
+        quantity = {
+            "drt": "Rate",
+            "ena": "Inten",
+            "int": "Inten",
+            "isn": "Rate",
+            "spx": "Spectral",
+        }[m.group(1)]
+        if m.group(1) == "isn":
+            species = "ISN " + species
+        extras = m.group(2)
+        coord = self.coordinate_system.upper()
+        frame = {
+            "hf": "Helio",
+            "hk": "Helio Kin",
+            "sf": "SC",
+        }[self.frame_descriptor]
+        survival = "Surv Corr" if self.survival_corrected == "sp" else "No Surv Corr"
+        spin_phase = self.spin_phase.title()
+        if spin_phase == "Full":
+            spin_phase = "Full Spin"
+        m = re.match(r"^(\d+)deg|nside(\d+)", self.resolution_str)
+        resolution = f"{m.group(1)} deg" if m.group(1) else f"NSide {m.group(2)}"
+        if isinstance(self.duration, int):
+            duration = f"{self.duration} Day"
+        else:
+            m = re.match(r"^(\d+)(.*)$", self.duration)
+            duration = f"{m.group(1)} {m.group(2).title()}"
+            if duration.endswith("Mo"):
+                duration += "n"
+        catdesc = (
+            f"IMAP {instrument}{sensor} {species} {quantity}, {coord} "
+            f"{frame} Frame, {survival}, {spin_phase}, {resolution}, {duration}"
+        )
+        possible_extras = [
+            ("nbs", "No sputter/bootstrap"),
+            ("nbkgnd", "No bkgnd sub"),
+        ]
+        for extra, long_description in possible_extras:
+            if extras.startswith(extra):
+                catdesc += f", {long_description}"
+                break
+        return catdesc
+
+    @property
+    def principal_data_var(self) -> str:
+        """
+        The name of the variable containing the principal data for the map.
+
+        Returns
+        -------
+        principal_data_var : str
+            CDF (dataset) variable name expected to contain the principal data.
+        """
+        if self.principal_data.startswith("isnnbkgnd"):
+            return "isn_rate"
+        return {
+            "drt": "dust_rate",
+            "ena": "ena_intensity",
+            "int": "glows_rate",
+            "isn": "isn_rate_bg_subtracted",
+            "spx": "ena_spectral_index",
+        }[self.principal_data[:3]]
+
     # Methods for parsing and building parts of the map descriptor string
     @staticmethod
     def get_instrument_descriptor(
diff --git a/imap_processing/tests/ena_maps/test_ena_maps.py b/imap_processing/tests/ena_maps/test_ena_maps.py
@@ -921,7 +921,11 @@ def test_build_cdf_dataset(self, mock_to_dataset, mock_data_for_build_cdf_datase
         skymap.min_epoch = 10
         skymap.max_epoch = 15
         cdf_dataset = skymap.build_cdf_dataset(
-            "hi", "l2", "foo_descriptor", sensor="45", drop_vars_with_no_attributes=True
+            "hi",
+            "l2",
+            "h45-ena-h-sf-nsp-ram-hae-6deg-6mo",
+            sensor="45",
+            drop_vars_with_no_attributes=True,
         )
 
         # Check that expected vars gets removed
@@ -967,6 +971,12 @@ def test_build_cdf_dataset(self, mock_to_dataset, mock_data_for_build_cdf_datase
                     f"attr '{attr}' should not be in variable attributes for '{var}'"
                 )
 
+        # Check CATDESC made from descriptor
+        assert (
+            cdf_dataset["ena_intensity"].attrs["CATDESC"]
+            == "IMAP Hi45 H Inten, HAE SC Frame, No Surv Corr, Ram, 6 deg, 6 Mon"
+        )
+
     @mock.patch("imap_processing.ena_maps.ena_maps.RectangularSkyMap.to_dataset")
     def test_build_cdf_dataset_external_dataset(
         self, mock_to_dataset, mock_data_for_build_cdf_dataset
@@ -979,12 +989,16 @@ def test_build_cdf_dataset_external_dataset(
         skymap.min_epoch = 10
         skymap.max_epoch = 15
         cdf_dataset_standard = skymap.build_cdf_dataset(
-            "hi", "l2", "foo_descriptor", sensor="45", drop_vars_with_no_attributes=True
+            "hi",
+            "l2",
+            "h45-ena-h-sf-nsp-ram-hae-6deg-6mo",
+            sensor="45",
+            drop_vars_with_no_attributes=True,
         )
         cdf_dataset_external = skymap.build_cdf_dataset(
             "hi",
             "l2",
-            "foo_descriptor",
+            "h45-ena-h-sf-nsp-ram-hae-6deg-6mo",
             sensor="45",
             drop_vars_with_no_attributes=True,
             external_map_dataset=mock_data_for_build_cdf_dataset,
@@ -1019,7 +1033,9 @@ def test_build_cdf_dataset_key_error(
             KeyError,
             match="Required variable 'energy_delta_minus' not found in cdf Dataset.",
         ):
-            _ = skymap.build_cdf_dataset("hi", "l2", "foo_descriptor", sensor="45")
+            _ = skymap.build_cdf_dataset(
+                "hi", "l2", "h45-ena-h-sf-nsp-ram-hae-6deg-6mo", sensor="45"
+            )
 
     @mock.patch("imap_processing.ena_maps.ena_maps.RectangularSkyMap.to_dataset")
     def test_keep_vars_with_no_attributes(
@@ -1035,7 +1051,7 @@ def test_keep_vars_with_no_attributes(
         cdf_dataset = skymap.build_cdf_dataset(
             "hi",
             "l2",
-            "foo_descriptor",
+            "h45-ena-h-sf-nsp-ram-hae-6deg-6mo",
             sensor="45",
             drop_vars_with_no_attributes=False,
         )
diff --git a/imap_processing/tests/ena_maps/test_naming.py b/imap_processing/tests/ena_maps/test_naming.py
@@ -304,3 +304,92 @@ def test_to_string(self):
         )
         descriptor_str_ultra_combined = md_ultra_combined.to_string()
         assert descriptor_str_ultra_combined == "ulc-ena-h-sf-nsp-full-hae-nside32-1yr"
+
+    @pytest.mark.parametrize(
+        "descriptor_str, expected_catdesc",
+        [
+            (
+                "h45-spx-h-hf-sp-ram-hae-4deg-3mo",
+                "IMAP Hi45 H Spectral, HAE Helio Frame, Surv Corr, Ram, 4 deg, 3 Mon",
+            ),
+            (
+                "h45-spx0305-h-hf-sp-ram-hae-4deg-3mo",
+                "IMAP Hi45 H Spectral, HAE Helio Frame, Surv Corr, Ram, 4 deg, 3 Mon",
+            ),
+            (
+                "hic-ena-h-hf-sp-ram-hae-4deg-3mo",
+                "IMAP Hi Combined H Inten, HAE Helio Frame, Surv Corr, Ram,"
+                " 4 deg, 3 Mon",
+            ),
+            (
+                "u45-ena-h-hf-sp-ram-hae-4deg-3mo",
+                "IMAP Ultra45 H Inten, HAE Helio Frame, Surv Corr, Ram, 4 deg, 3 Mon",
+            ),
+            (
+                "u45-ena-h-hf-sp-full-hae-4deg-3mo",
+                "IMAP Ultra45 H Inten, HAE Helio Frame, Surv Corr, Full Spin,"
+                " 4 deg, 3 Mon",
+            ),
+            (
+                "u45-ena-h-hf-sp-ram-hae-nside128-3mo",
+                "IMAP Ultra45 H Inten, HAE Helio Frame, Surv Corr, Ram, NSide 128,"
+                " 3 Mon",
+            ),
+            (
+                "u45-enaCUSTOM-h-hf-sp-ram-hae-4deg-3mo",
+                "IMAP Ultra45 H Inten, HAE Helio Frame, Surv Corr, Ram, 4 deg, 3 Mon",
+            ),
+            (
+                "l090-enanbs-h-sf-nsp-ram-hae-6deg-1yr",
+                "IMAP Lo90 H Inten, HAE SC Frame, No Surv Corr, Ram, 6 deg, 1 Yr,"
+                " No sputter/bootstrap",
+            ),
+            (
+                "t090-ena-o-sf-nsp-ram-hae-6deg-1yr",
+                "IMAP Lo90 O Inten, HAE SC Frame, No Surv Corr, Ram, 6 deg, 1 Yr",
+            ),
+            (
+                "l090-ena-h-hf-nsp-ram-gcs-6deg-1yr",
+                "IMAP Lo90 H Inten, GCS Helio Frame, No Surv Corr, Ram, 6 deg, 1 Yr",
+            ),
+            (
+                "l090-isn-h-sf-nsp-ram-hae-6deg-1yr",
+                "IMAP Lo90 ISN H Rate, HAE SC Frame, No Surv Corr, Ram, 6 deg, 1 Yr",
+            ),
+            (
+                "l090-isnnbkgnd-h-sf-nsp-ram-hae-6deg-1yr",
+                "IMAP Lo90 ISN H Rate, HAE SC Frame, No Surv Corr, Ram, 6 deg, 1 Yr,"
+                " No bkgnd sub",
+            ),
+            (
+                "glx-int-uv-sf-nsp-full-hae-6deg-1yr",
+                "IMAP GLOWS UV Inten, HAE SC Frame, No Surv Corr, Full Spin, 6 deg,"
+                " 1 Yr",
+            ),
+            (
+                "idx-drt-dust-sf-nsp-full-hae-6deg-1yr",
+                "IMAP IDEX Dust Rate, HAE SC Frame, No Surv Corr, Full Spin, 6 deg,"
+                " 1 Yr",
+            ),
+        ],
+    )
+    def test_to_catdesc(self, descriptor_str, expected_catdesc):
+        # Use case is primarily from descriptor str to CATDESC
+        md = MapDescriptor.from_string(descriptor_str)
+        actual_catdesc = md.to_catdesc()
+        assert actual_catdesc == expected_catdesc
+
+    @pytest.mark.parametrize(
+        "descriptor_str, expected_principal_data_var",
+        [
+            ("hic-ena-h-hf-sp-ram-hae-4deg-3mo", "ena_intensity"),
+            ("h45-spx0305-h-hf-sp-ram-hae-4deg-3mo", "ena_spectral_index"),
+            ("idx-drt-dust-sf-nsp-full-hae-6deg-1yr", "dust_rate"),
+            ("glx-int-uv-sf-nsp-full-hae-6deg-1yr", "glows_rate"),
+            ("l090-isnnbkgnd-h-sf-nsp-ram-hae-6deg-1yr", "isn_rate"),
+            ("l090-isn-h-sf-nsp-ram-hae-6deg-1yr", "isn_rate_bg_subtracted"),
+        ],
+    )
+    def test_principal_data_var(self, descriptor_str, expected_principal_data_var):
+        md = MapDescriptor.from_string(descriptor_str)
+        assert md.principal_data_var == expected_principal_data_var
diff --git a/imap_processing/tests/ultra/unit/test_ultra_l2.py b/imap_processing/tests/ultra/unit/test_ultra_l2.py
@@ -712,6 +712,12 @@ def test_ultra_l2_descriptor_rectmap(self, mock_data_dict, furnish_kernels):
 
         assert output_map.attrs["Spice_reference_frame"] == "IMAP_HAE"
         assert output_map.attrs["Spacing_degrees"] == "6.0"
+        # Variable Metadata spot checks
+        assert (
+            output_map["ena_intensity"].attrs["CATDESC"]
+            == "IMAP Ultra90 H Inten, HAE Helio Frame, No Surv Corr, Full Spin,"
+            " 6 deg, 6 Mon"
+        )
         write_cdf(output_map)
 
     @pytest.mark.usefixtures("_setup_spice_kernels_list")
diff --git a/imap_processing/ultra/l2/ultra_l2.py b/imap_processing/ultra/l2/ultra_l2.py
@@ -850,4 +850,11 @@ def ultra_l2(
     map_dataset["obs_date"] = map_dataset["obs_date"].astype(np.int64)
     map_dataset["obs_date_range"] = map_dataset["obs_date_range"].astype(np.int64)
 
+    # Adjust CATDESC per descriptor
+    if descriptor is not None:
+        md = MapDescriptor.from_string(descriptor)
+        principal_data = md.principal_data_var
+        if principal_data in map_dataset:
+            map_dataset[principal_data].attrs["CATDESC"] = md.to_catdesc()
+
     return [map_dataset]