Fix #246: BinaryParameter truncation in xarray dataset creation (#247)

medley56 · web-flow · commit e6dcf0c26a4d · 2026-04-02T15:43:54.000-06:00
- Add conditional logic in create_dataset to change BinaryParameter
  objects to bytes objects when creating numpy arrays of byte strings
- Add a focused regression test in test_xarr.py:181
- Modify workflow run conditions for tests
- Bump version to 6.1.2
diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml
@@ -46,7 +46,7 @@ jobs:
 
       # Save ("upload") the distribution artifacts for use by downstream Actions jobs
       - name: Upload distribution artifacts
-        uses: actions/upload-artifact@v6 # This allows us to persist the dist directory after the job has completed
+        uses: actions/upload-artifact@v7 # This allows us to persist the dist directory after the job has completed
         with:
           name: python-package-distributions
           path: dist/
@@ -98,7 +98,7 @@ jobs:
 
       # This makes the artifacts available for downstream jobs
       - name: Upload Conda build artifact
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v7
         with:
           name: conda-package
           path: ${{ env.CONDA_BLD_PATH }}/**/space_packet_parser-*
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -22,12 +22,6 @@ jobs:
 
   # Run unit and integration tests
   run-tests:
-    # Don't run for label additions
-    if: |
-      github.event_name != 'pull_request' ||
-      github.event.action == 'opened' ||
-      github.event.action == 'synchronize' ||
-      github.event.action == 'reopened'
     name: Test
     runs-on: ${{ matrix.os }}
     permissions:
@@ -62,18 +56,12 @@ jobs:
         run: |
           pytest --color=yes --cov --cov-report=xml
 
-      - uses: codecov/codecov-action@v5
+      - uses: codecov/codecov-action@v6
         with:
           use_oidc: true
 
   # Run the example scripts and ensure there are no errors
   run-examples:
-    # Don't run for label additions
-    if: |
-      github.event_name != 'pull_request' ||
-      github.event.action == 'opened' ||
-      github.event.action == 'synchronize' ||
-      github.event.action == 'reopened'
     name: Run Examples
     runs-on: ubuntu-latest
     permissions:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -34,7 +34,7 @@ jobs:
     steps:
       # This downloads the build artifacts from the build job
       - name: Download distribution artifacts
-        uses: actions/download-artifact@v7
+        uses: actions/download-artifact@v8
         with:
           name: python-package-distributions
           path: dist/
@@ -59,7 +59,7 @@ jobs:
     steps:
       # This downloads the build artifacts from the build job
       - name: Download distribution artifacts
-        uses: actions/download-artifact@v7
+        uses: actions/download-artifact@v8
         with:
           name: python-package-distributions
           path: dist/
@@ -80,7 +80,7 @@ jobs:
 
     steps:
       - name: Download Conda artifact
-        uses: actions/download-artifact@v7
+        uses: actions/download-artifact@v8
         with:
           name: conda-package
           path: conda-package/
@@ -146,12 +146,21 @@ jobs:
           GITHUB_TOKEN: ${{ github.token }}
         # Uses the GitHub CLI to generate the Release and auto-generate the release notes. Also generates
         # the Release title based on the annotation on the git tag.
-        run: >-
+        run: |
           RELEASE_NAME=$(basename "${{ github.ref_name }}")
-          gh release create
-          '${{ github.ref_name }}'
-          --repo '${{ github.repository }}'
-          --title "$RELEASE_NAME"
-          ${{ env.PRE_RELEASE_OPTION }}
-          --generate-notes
-          --notes-start-tag '${{ env.LATEST_RELEASE_TAG }}'
+          ARGS=(
+            "${{ github.ref_name }}"
+            --repo "${{ github.repository }}"
+            --title "$RELEASE_NAME"
+          )
+
+          if [ "${{ env.PRE_RELEASE_OPTION }}" = "--prerelease" ]; then
+            ARGS+=(--prerelease)
+          fi
+
+          ARGS+=(
+            --generate-notes
+            --notes-start-tag "${{ env.LATEST_RELEASE_TAG }}"
+          )
+
+          gh release create "${ARGS[@]}"
diff --git a/.gitignore b/.gitignore
@@ -26,6 +26,7 @@ build
 dist
 space_packet_parser/_version.py
 uv.lock
+node_modules
 
 # Packages #
 ############
diff --git a/CITATION.cff b/CITATION.cff
@@ -1,7 +1,7 @@
 cff-version: 1.2.0
 title: 'space_packet_parser'
 type: software
-version: '6.1.1'
+version: '6.1.2'
 description: A CCSDS telemetry packet decoding library based on the XTCE packet format description standard.
 license: BSD-3-Clause
 abstract: The Space Packet Parser Python library is a generalized, configurable packet decoding library for CCSDS telemetry
diff --git a/docs/source/changelog.md b/docs/source/changelog.md
@@ -7,6 +7,10 @@ list and release milestones.
 
 Release notes for the `space_packet_parser` library
 
+### v6.1.2
+
+- BUGFIX: Prevent BinaryParameter truncation in `create_dataset`. [#246](https://github.com/lasp/space_packet_parser/issues/246)
+
 ### v6.1.1
 
 - BUGFIX: Support lxml 5.2.1. [#236](https://github.com/lasp/space_packet_parser/issues/236)
diff --git a/meta.yaml b/meta.yaml
@@ -1,6 +1,6 @@
 package:
   name: "space_packet_parser"
-  version: "6.1.1"
+  version: "6.1.2"
 
 source:
   path: .
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "space_packet_parser"
-version = "6.1.1"
+version = "6.1.2"
 description = "A CCSDS telemetry packet decoding library based on the XTCE packet format description standard."
 license = { text = "BSD-3-Clause" }
 readme = "README.md"
diff --git a/space_packet_parser/xarr.py b/space_packet_parser/xarr.py
@@ -255,12 +255,17 @@ def _process_generator(generator):
     dataset_by_apid = {}
 
     for apid, data in data_dict.items():
-        ds = xr.Dataset(
-            data_vars={
-                key: (["packet"], np.asarray(list_of_values, dtype=datatype_mapping[apid][key]))
-                for key, list_of_values in data.items()
-            }
-        )
+        data_vars: dict[str, tuple[list[str], np.ndarray]] = {}  # {var_name: ([dims, ...], data_array)}
+        for key, list_of_values in data.items():
+            dtype = np.dtype(datatype_mapping[apid][key])
+            if dtype.kind == "S":
+                # Special case for byte strings. np.asarray doesn't process BinaryParameter objects correctly to
+                # byte strings, so we need to convert them to bytes first before creating the array.
+                # See: https://github.com/lasp/space_packet_parser/issues/246
+                list_of_values = [bytes(val) for val in list_of_values]
+            data_vars[key] = (["packet"], np.asarray(list_of_values, dtype=dtype))
+
+        ds = xr.Dataset(data_vars=data_vars)
 
         dataset_by_apid[apid] = ds
 
diff --git a/tests/unit/test_xarr.py b/tests/unit/test_xarr.py
@@ -178,6 +178,43 @@ def test_create_dataset_with_custom_generator(tmp_path, fixed_length_packet_defi
     assert list(dataset["INT32_FIELD"].values) == [12345, 67890, -99999]
 
 
+def test_create_dataset_preserves_binary_parameter_width(tmp_path):
+    """Test that binary parameters keep their full byte width in the resulting dataset."""
+    packet_definition = definitions.XtcePacketDefinition(
+        container_set=[
+            containers.SequenceContainer(
+                "BINARY_CONTAINER",
+                entry_list=[
+                    parameters.Parameter(
+                        "BIN_FIELD",
+                        parameter_type=parameter_types.BinaryParameterType(
+                            "BIN_TYPE", encoding=encodings.BinaryDataEncoding(fixed_size_in_bits=64)
+                        ),
+                    )
+                ],
+            )
+        ]
+    )
+    packet_data = b"ABCDEFGH"
+    test_file = tmp_path / "binary_packets.bin"
+    test_file.write_bytes(packet_data)
+
+    datasets = xarr.create_dataset(
+        test_file,
+        packet_definition,
+        packet_bytes_generator=fixed_length_generator,
+        generator_kwargs={"packet_length_bytes": 8},
+        parse_bytes_kwargs={"root_container_name": "BINARY_CONTAINER"},
+    )
+
+    dataset = list(datasets.values())[0]
+
+    assert dataset["BIN_FIELD"].values.dtype.kind == "S"  # Should be a bytes/string type
+    assert dataset["BIN_FIELD"].values.dtype.itemsize == 8
+    assert dataset["BIN_FIELD"].values.dtype == "|S8"
+    assert dataset["BIN_FIELD"].values.tolist() == [packet_data]
+
+
 def test_create_dataset_with_packet_filter(tmp_path, fixed_length_packet_definition, fixed_length_test_packets):
     """Test filtering packets with packet_filter parameter using raw byte inspection"""
     _, _, _, binary_data = fixed_length_test_packets