Skip to content

Commit 9c4c354

Browse files
committed
Fix remaining test issues
1 parent 140d3cf commit 9c4c354

5 files changed

Lines changed: 51 additions & 18 deletions

File tree

im2deep/_io_helpers.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import logging
1919
from copy import deepcopy
2020
from pathlib import Path
21+
from typing import Any
2122

2223
import click
2324
import numpy as np
@@ -28,13 +29,25 @@
2829
from rich.console import Console
2930
from rich.logging import RichHandler
3031

31-
from im2deep.exceptions import IM2DeepError
32+
from im2deep.exceptions import IM2DeepError, PSMMetadataError
3233
from im2deep.utils import ccs2im, im2ccs
3334

3435
console = Console()
3536

3637
LOGGER = logging.getLogger(__name__)
3738

39+
ConvertibleToFloat = str | float | int | np.floating[Any] | np.ndarray[Any, Any]
40+
41+
42+
def _normalize_ccs_metadata_value(value: ConvertibleToFloat) -> str:
43+
"""Validate and normalize a CCS metadata value to the canonical string form."""
44+
try:
45+
return str(float(value))
46+
except (TypeError, ValueError) as exc:
47+
raise PSMMetadataError(
48+
f"Invalid CCS metadata value {value!r}; expected a numeric string or number."
49+
) from exc
50+
3851

3952
def parse_input(
4053
input_file: str | Path | PSMList | pd.DataFrame, filetype: str | None = None
@@ -86,7 +99,7 @@ def parse_input(
8699
if "CCS" in row:
87100
if precursor.metadata is None:
88101
precursor.metadata = {}
89-
precursor.metadata["CCS"] = float(row["CCS"]) # type: ignore
102+
precursor.metadata["CCS"] = _normalize_ccs_metadata_value(row["CCS"]) # type: ignore
90103
list_of_precursors.append(precursor)
91104
except Exception as e:
92105
LOGGER.warning("Error parsing row %d: %s. Skipping.", idx, e)
@@ -195,7 +208,7 @@ def _parse_legacy_format(input_file: str | Path) -> PSMList:
195208
charge=int(row["charge"]),
196209
)
197210
if has_ccs:
198-
metadata = {"CCS": float(row["CCS"])}
211+
metadata = {"CCS": _normalize_ccs_metadata_value(row["CCS"])}
199212

200213
LOGGER.debug(f"Parsed PSM: {peptidoform} with metadata: {metadata}")
201214
precursor = PSM(peptidoform=peptidoform, metadata=metadata, spectrum_id=idx)
@@ -263,19 +276,22 @@ def validate_psm_list(psm_list: PSMList, needs_target: bool = False) -> PSMList:
263276
)
264277

265278
# TODO: Could be vectorized over all ion mobility values
266-
# If ion_mobility is present, convert to CCS
279+
# Normalize targets while preserving the PSM metadata str -> str contract.
267280
for psm in psm_list_filtered:
281+
if psm.metadata is None:
282+
psm.metadata = {}
283+
268284
if psm.ion_mobility is not None:
269-
if psm.metadata is None:
270-
psm.metadata = {}
271285
if "CCS" not in psm.metadata:
272-
psm.metadata["CCS"] = str(
286+
psm.metadata["CCS"] = _normalize_ccs_metadata_value(
273287
im2ccs(
274288
psm.ion_mobility,
275289
psm.peptidoform.theoretical_mz,
276290
psm.peptidoform.precursor_charge,
277291
)
278292
)
293+
elif psm.metadata.get("CCS") is not None:
294+
psm.metadata["CCS"] = _normalize_ccs_metadata_value(psm.metadata["CCS"])
279295

280296
if needs_target and not all_has_targets:
281297
raise IM2DeepError("PSMList must contain 'ion_mobility' or 'CCS' metadata for all PSMs.")

im2deep/calibration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def get_charge(pf):
332332
# Extract CCS from metadata if it's not a direct column
333333
if "CCS" not in target_work.columns and "metadata" in target_work.columns:
334334
target_work["CCS"] = target_work["metadata"].apply(
335-
lambda x: x.get("CCS", np.nan) if isinstance(x, dict) else np.nan
335+
lambda x: float(x.get("CCS")) if isinstance(x, dict) and x.get("CCS") is not None else np.nan
336336
)
337337

338338
source_work["peptide_key"] = source_work["peptidoform"].apply(get_peptide_key)
@@ -434,7 +434,7 @@ def get_charge(pf):
434434

435435
if "CCS" not in target_work.columns and "metadata" in target_work.columns:
436436
target_work["CCS"] = target_work["metadata"].apply(
437-
lambda x: x["CCS"] if "CCS" in x else np.nan
437+
lambda x: float(x["CCS"]) if isinstance(x, dict) and "CCS" in x else np.nan
438438
)
439439

440440
source_work["peptide_key"] = source_work["peptidoform"].apply(get_peptide_key)

im2deep/core.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,7 @@ def predict_and_calibrate(
128128
)
129129

130130
if not calibration.is_fitted:
131-
if psm_df_reference is None:
132-
raise ValueError(
133-
"Reference PSM list must be provided for calibration fitting when using a custom "
134-
"calibration object."
135-
)
131+
calibration_reference = psm_df_reference if psm_df_reference is not None else psm_df_cal
136132
LOGGER.info("Fitting calibration...")
137133
if any(psm_list_cal["is_decoy"]):
138134
LOGGER.warning(
@@ -141,7 +137,7 @@ def predict_and_calibrate(
141137
)
142138
calibration.fit(
143139
psm_df_cal,
144-
psm_df_reference,
140+
calibration_reference,
145141
multi=multi,
146142
)
147143
else:

im2deep/exceptions.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
-------
99
IM2DeepError
1010
Base exception class for all IM2Deep-related errors.
11+
PSMMetadataError
12+
Exception raised when PSM metadata has an invalid type or value.
1113
CalibrationError
1214
Exception raised when calibration-related errors occur.
1315
"""
@@ -36,6 +38,17 @@ class IM2DeepError(Exception):
3638
pass
3739

3840

41+
class PSMMetadataError(IM2DeepError, ValueError):
42+
"""
43+
Exception raised when PSM metadata violates IM2Deep expectations.
44+
45+
This is used when metadata is structurally present but contains invalid
46+
values for fields that IM2Deep needs to parse, such as non-numeric CCS.
47+
"""
48+
49+
pass
50+
51+
3952
class CalibrationError(IM2DeepError):
4053
"""
4154
Exception raised when calibration-related errors occur.

tests/test_utils.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from psm_utils import PSMList
77

88
from im2deep._io_helpers import parse_input, validate_psm_list
9-
from im2deep.exceptions import IM2DeepError
9+
from im2deep.exceptions import IM2DeepError, PSMMetadataError
1010
from im2deep.utils import ccs2im, im2ccs
1111

1212

@@ -26,14 +26,22 @@ def test_validate_psm_list_with_ccs(self, sample_psm_list_with_ccs):
2626
for psm in result:
2727
assert psm.metadata is not None
2828
assert "CCS" in psm.metadata
29-
# CCS should always be stored as float
30-
assert isinstance(psm.metadata["CCS"], float)
29+
# PSM metadata is string-valued; CCS should be normalized to a numeric string.
30+
assert isinstance(psm.metadata["CCS"], str)
31+
float(psm.metadata["CCS"])
3132

3233
def test_validate_psm_list_missing_ccs(self, sample_psm_list):
3334
"""Test validation fails when CCS values are required but missing."""
3435
with pytest.raises(IM2DeepError, match="ion_mobility.*CCS.*metadata"):
3536
validate_psm_list(sample_psm_list, needs_target=True)
3637

38+
def test_validate_psm_list_invalid_ccs_value(self, sample_psm_list_with_ccs):
39+
"""Test validation fails with a custom metadata error for invalid CCS values."""
40+
sample_psm_list_with_ccs[0].metadata["CCS"] = "not-a-number" # type: ignore[index]
41+
42+
with pytest.raises(PSMMetadataError, match="Invalid CCS metadata value"):
43+
validate_psm_list(sample_psm_list_with_ccs, needs_target=True)
44+
3745
def test_validate_psm_list_empty(self):
3846
"""Test validation with empty PSMList."""
3947
empty_list = PSMList(psm_list=[])

0 commit comments

Comments
 (0)