|
18 | 18 | import logging |
19 | 19 | from copy import deepcopy |
20 | 20 | from pathlib import Path |
| 21 | +from typing import Any |
21 | 22 |
|
22 | 23 | import click |
23 | 24 | import numpy as np |
|
28 | 29 | from rich.console import Console |
29 | 30 | from rich.logging import RichHandler |
30 | 31 |
|
31 | | -from im2deep.exceptions import IM2DeepError |
| 32 | +from im2deep.exceptions import IM2DeepError, PSMMetadataError |
32 | 33 | from im2deep.utils import ccs2im, im2ccs |
33 | 34 |
|
34 | 35 | console = Console() |
35 | 36 |
|
36 | 37 | LOGGER = logging.getLogger(__name__) |
37 | 38 |
|
| 39 | +ConvertibleToFloat = str | float | int | np.floating[Any] | np.ndarray[Any, Any] |
| 40 | + |
| 41 | + |
| 42 | +def _normalize_ccs_metadata_value(value: ConvertibleToFloat) -> str: |
| 43 | + """Validate and normalize a CCS metadata value to the canonical string form.""" |
| 44 | + try: |
| 45 | + return str(float(value)) |
| 46 | + except (TypeError, ValueError) as exc: |
| 47 | + raise PSMMetadataError( |
| 48 | + f"Invalid CCS metadata value {value!r}; expected a numeric string or number." |
| 49 | + ) from exc |
| 50 | + |
38 | 51 |
|
39 | 52 | def parse_input( |
40 | 53 | input_file: str | Path | PSMList | pd.DataFrame, filetype: str | None = None |
@@ -86,7 +99,7 @@ def parse_input( |
86 | 99 | if "CCS" in row: |
87 | 100 | if precursor.metadata is None: |
88 | 101 | precursor.metadata = {} |
89 | | - precursor.metadata["CCS"] = float(row["CCS"]) # type: ignore |
| 102 | + precursor.metadata["CCS"] = _normalize_ccs_metadata_value(row["CCS"]) # type: ignore |
90 | 103 | list_of_precursors.append(precursor) |
91 | 104 | except Exception as e: |
92 | 105 | LOGGER.warning("Error parsing row %d: %s. Skipping.", idx, e) |
@@ -195,7 +208,7 @@ def _parse_legacy_format(input_file: str | Path) -> PSMList: |
195 | 208 | charge=int(row["charge"]), |
196 | 209 | ) |
197 | 210 | if has_ccs: |
198 | | - metadata = {"CCS": float(row["CCS"])} |
| 211 | + metadata = {"CCS": _normalize_ccs_metadata_value(row["CCS"])} |
199 | 212 |
|
200 | 213 | LOGGER.debug(f"Parsed PSM: {peptidoform} with metadata: {metadata}") |
201 | 214 | precursor = PSM(peptidoform=peptidoform, metadata=metadata, spectrum_id=idx) |
@@ -263,19 +276,22 @@ def validate_psm_list(psm_list: PSMList, needs_target: bool = False) -> PSMList: |
263 | 276 | ) |
264 | 277 |
|
265 | 278 | # TODO: Could be vectorized over all ion mobility values |
266 | | - # If ion_mobility is present, convert to CCS |
| 279 | + # Normalize targets while preserving the PSM metadata str -> str contract. |
267 | 280 | for psm in psm_list_filtered: |
| 281 | + if psm.metadata is None: |
| 282 | + psm.metadata = {} |
| 283 | + |
268 | 284 | if psm.ion_mobility is not None: |
269 | | - if psm.metadata is None: |
270 | | - psm.metadata = {} |
271 | 285 | if "CCS" not in psm.metadata: |
272 | | - psm.metadata["CCS"] = str( |
| 286 | + psm.metadata["CCS"] = _normalize_ccs_metadata_value( |
273 | 287 | im2ccs( |
274 | 288 | psm.ion_mobility, |
275 | 289 | psm.peptidoform.theoretical_mz, |
276 | 290 | psm.peptidoform.precursor_charge, |
277 | 291 | ) |
278 | 292 | ) |
| 293 | + elif psm.metadata.get("CCS") is not None: |
| 294 | + psm.metadata["CCS"] = _normalize_ccs_metadata_value(psm.metadata["CCS"]) |
279 | 295 |
|
280 | 296 | if needs_target and not all_has_targets: |
281 | 297 | raise IM2DeepError("PSMList must contain 'ion_mobility' or 'CCS' metadata for all PSMs.") |
|
0 commit comments