Skip to content

Commit d1daeea

Browse files
committed
Update tests
1 parent 9c40b0d commit d1daeea

2 files changed

Lines changed: 288 additions & 0 deletions

File tree

tests/test_calibration.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,27 @@ def test_zero_range_predicted_raises():
7777
cal = PiecewiseLinearCalibration(number_of_splits=10)
7878
with pytest.raises(CalibrationError):
7979
cal.fit(target=y, source=x)
80+
81+
82+
def test_piecewise_skips_sparse_segments_with_min_samples_threshold():
83+
source_dense = np.linspace(0.0, 80.0, 1000, dtype=np.float32)
84+
source_sparse = np.array([95.0, 97.0, 99.0], dtype=np.float32)
85+
source = np.concatenate([source_dense, source_sparse]).astype(np.float32)
86+
target = (1.2 * source) + 3.0
87+
88+
cal_no_threshold = PiecewiseLinearCalibration(
89+
number_of_splits=100,
90+
min_samples_per_segment=1,
91+
)
92+
cal_no_threshold.fit(target=target, source=source)
93+
x_no_threshold, _ = cal_no_threshold.get_calibration_curve()
94+
95+
cal_threshold = PiecewiseLinearCalibration(
96+
number_of_splits=100,
97+
min_samples_per_segment=10,
98+
)
99+
cal_threshold.fit(target=target, source=source)
100+
x_threshold, _ = cal_threshold.get_calibration_curve()
101+
102+
assert x_threshold.size > 1
103+
assert x_threshold.size < x_no_threshold.size

tests/test_features.py

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
"""Tests for deeplc._features.encode_peptidoform."""
2+
3+
from __future__ import annotations
4+
5+
import warnings
6+
7+
import numpy as np
8+
import pytest
9+
from psm_utils import Peptidoform
10+
from pyteomics import mass
11+
12+
from deeplc._features import (
13+
DEFAULT_DICT_AA,
14+
DEFAULT_DICT_INDEX,
15+
DEFAULT_DICT_INDEX_POS,
16+
DEFAULT_POSITIONS,
17+
DEFAULT_POSITIONS_NEG,
18+
DEFAULT_POSITIONS_POS,
19+
encode_peptidoform,
20+
)
21+
22+
# HELPERS
23+
24+
PADDING = 60
25+
26+
# Number of rows in pos_matrix: max(positions) - min(positions) + 1
27+
# DEFAULT_POSITIONS = {0,1,2,3,-1,-2,-3,-4} → 3 - (-4) + 1 = 8
28+
_POS_ROWS = max(DEFAULT_POSITIONS) - min(DEFAULT_POSITIONS) + 1
29+
# matrix_global = sum(std_matrix, axis=0) [6] + seq_len [1] + pos_matrix.flatten() [8*6]
30+
_GLOBAL_BASE_LEN = len(DEFAULT_DICT_INDEX) + 1 + _POS_ROWS * len(DEFAULT_DICT_INDEX_POS)
31+
# _compute_rolling_sum(std_matrix.T, n=2)[:, ::2].T → (30, 6)
32+
_SUM_ROWS = (PADDING - 1) // 2 # == 29 for n=2, stride 2 on 59 cols
33+
34+
35+
class TestReturnStructure:
36+
"""Tests that encode_peptidoform returns the expected keys and shapes."""
37+
38+
def test_returns_four_keys(self):
39+
result = encode_peptidoform("ACDE")
40+
assert set(result.keys()) == {"matrix", "matrix_sum", "matrix_global", "matrix_hc"}
41+
42+
def test_matrix_shape(self):
43+
result = encode_peptidoform("ACDE")
44+
assert result["matrix"].shape == (PADDING, len(DEFAULT_DICT_INDEX))
45+
46+
def test_matrix_hc_shape(self):
47+
result = encode_peptidoform("ACDE")
48+
assert result["matrix_hc"].shape == (PADDING, len(DEFAULT_DICT_AA))
49+
50+
def test_matrix_global_shape_no_ccs(self):
51+
result = encode_peptidoform("ACDE")
52+
assert result["matrix_global"].shape == (_GLOBAL_BASE_LEN,)
53+
54+
def test_matrix_global_shape_with_ccs(self):
55+
result = encode_peptidoform("ACDE/2", add_ccs_features=True)
56+
# add_ccs_features appends 5 extra values (H%, FWY%, DE%, KR%, charge)
57+
assert result["matrix_global"].shape == (_GLOBAL_BASE_LEN + 5,)
58+
59+
def test_matrix_sum_shape(self):
60+
result = encode_peptidoform("ACDE")
61+
assert result["matrix_sum"].ndim == 2
62+
assert result["matrix_sum"].shape[1] == len(DEFAULT_DICT_INDEX)
63+
64+
def test_matrix_dtype(self):
65+
result = encode_peptidoform("ACDE")
66+
assert result["matrix"].dtype == np.float16
67+
68+
def test_matrix_hc_dtype(self):
69+
result = encode_peptidoform("ACDE")
70+
assert result["matrix_hc"].dtype == np.float16
71+
72+
73+
class TestStringInput:
74+
"""Tests that both str and Peptidoform inputs are accepted and equivalent."""
75+
76+
def test_str_and_peptidoform_are_equivalent(self):
77+
str_result = encode_peptidoform("ACDE")
78+
pf_result = encode_peptidoform(Peptidoform("ACDE"))
79+
for key in str_result:
80+
np.testing.assert_array_equal(str_result[key], pf_result[key])
81+
82+
83+
class TestPaddingAndSeqLen:
84+
"""Tests that padding and sequence length are handled correctly."""
85+
86+
def test_padded_rows_are_zero(self):
87+
seq = "ACDE"
88+
result = encode_peptidoform(seq)
89+
# Rows beyond seq length should be all zeros in standard matrix
90+
assert np.all(result["matrix"][len(seq) :] == 0)
91+
92+
def test_padded_rows_are_zero_onehot(self):
93+
seq = "ACDE"
94+
result = encode_peptidoform(seq)
95+
assert np.all(result["matrix_hc"][len(seq) :] == 0)
96+
97+
def test_seq_len_encoded_in_matrix_global(self):
98+
seq = "ACDE"
99+
result = encode_peptidoform(seq)
100+
# matrix_global[len(DEFAULT_DICT_INDEX)] holds seq_len
101+
assert result["matrix_global"][len(DEFAULT_DICT_INDEX)] == len(seq)
102+
103+
def test_truncation_warns(self):
104+
long_seq = "A" * (PADDING + 5)
105+
with warnings.catch_warnings(record=True) as w:
106+
warnings.simplefilter("always")
107+
result = encode_peptidoform(long_seq)
108+
assert any("Truncating" in str(warning.message) for warning in w)
109+
# After truncation seq_len == PADDING
110+
assert result["matrix_global"][len(DEFAULT_DICT_INDEX)] == PADDING
111+
112+
113+
class TestOneHotEncoding:
114+
"""Tests the one-hot (matrix_hc) component."""
115+
116+
def test_first_residue_one_hot(self):
117+
# "A" is index 5 in DEFAULT_DICT_AA
118+
result = encode_peptidoform("ACDE")
119+
assert result["matrix_hc"][0, DEFAULT_DICT_AA["A"]] == 1.0
120+
121+
def test_second_residue_one_hot(self):
122+
result = encode_peptidoform("ACDE")
123+
assert result["matrix_hc"][1, DEFAULT_DICT_AA["C"]] == 1.0
124+
125+
def test_each_residue_has_exactly_one_hot(self):
126+
seq = "ACDE"
127+
result = encode_peptidoform(seq)
128+
for i in range(len(seq)):
129+
assert result["matrix_hc"][i].sum() == 1.0
130+
131+
def test_padded_rows_are_zero_and_no_hot(self):
132+
seq = "AC"
133+
result = encode_peptidoform(seq)
134+
assert result["matrix_hc"][2:].sum() == 0.0
135+
136+
137+
class TestStandardMatrixComposition:
138+
"""Tests that atomic composition in std_matrix is correct."""
139+
140+
def test_glycine_carbon_count(self):
141+
# Glycine (G): C2 H3 N1 O1 — check carbon at index 0
142+
result = encode_peptidoform("G")
143+
c_idx = DEFAULT_DICT_INDEX["C"]
144+
expected_c = mass.std_aa_comp["G"]["C"]
145+
assert result["matrix"][0, c_idx] == expected_c
146+
147+
def test_unmodified_and_modified_differ_in_affected_residue(self):
148+
# Oxidized methionine adds one O
149+
unmod = encode_peptidoform("ACMDE")
150+
mod = encode_peptidoform("ACM[Oxidation]DE")
151+
o_idx = DEFAULT_DICT_INDEX["O"]
152+
assert mod["matrix"][2, o_idx] > unmod["matrix"][2, o_idx]
153+
154+
def test_modification_does_not_affect_other_residues(self):
155+
unmod = encode_peptidoform("ACMDE")
156+
mod = encode_peptidoform("ACM[Oxidation]DE")
157+
for i in [0, 1, 3, 4]:
158+
np.testing.assert_array_equal(mod["matrix"][i], unmod["matrix"][i])
159+
160+
161+
class TestNTerminalModification:
162+
"""Tests that N-terminal modifications are applied to position 0."""
163+
164+
def test_nterm_mod_changes_position_zero(self):
165+
unmod = encode_peptidoform("ACDE")
166+
mod = encode_peptidoform("[Acetyl]-ACDE")
167+
# Acetyl adds C2H2O to position 0; at least carbon should increase
168+
c_idx = DEFAULT_DICT_INDEX["C"]
169+
assert mod["matrix"][0, c_idx] > unmod["matrix"][0, c_idx]
170+
171+
def test_nterm_mod_does_not_affect_other_positions(self):
172+
unmod = encode_peptidoform("ACDE")
173+
mod = encode_peptidoform("[Acetyl]-ACDE")
174+
for i in range(1, 4):
175+
np.testing.assert_array_equal(mod["matrix"][i], unmod["matrix"][i])
176+
177+
def test_nterm_mod_reflected_in_matrix_global(self):
178+
unmod = encode_peptidoform("ACDE")
179+
mod = encode_peptidoform("[Acetyl]-ACDE")
180+
# matrix_global contains the column sums so modification must change it
181+
assert not np.array_equal(mod["matrix_global"], unmod["matrix_global"])
182+
183+
def test_nterm_mod_reflected_in_pos_matrix_part(self):
184+
# Position 0 is in DEFAULT_POSITIONS_POS so pos_matrix row 0 must change
185+
unmod = encode_peptidoform("ACDE")
186+
mod = encode_peptidoform("[Acetyl]-ACDE")
187+
# pos_matrix is concatenated at the end of matrix_global after the base part
188+
base = len(DEFAULT_DICT_INDEX) + 1 # col sums + seq_len
189+
pos_flat_unmod = unmod["matrix_global"][base:]
190+
pos_flat_mod = mod["matrix_global"][base:]
191+
assert not np.array_equal(pos_flat_unmod, pos_flat_mod)
192+
193+
194+
class TestCTerminalModification:
195+
"""Tests that C-terminal modifications are applied to the last residue position."""
196+
197+
def test_cterm_mod_changes_last_residue_position(self):
198+
seq = "ACDE"
199+
unmod = encode_peptidoform(seq)
200+
mod = encode_peptidoform("ACDE-[Amidation]")
201+
last = len(seq) - 1
202+
# Amidation changes N count (replaces O with NH2)
203+
assert not np.array_equal(mod["matrix"][last], unmod["matrix"][last])
204+
205+
def test_cterm_mod_does_not_affect_other_positions(self):
206+
unmod = encode_peptidoform("ACDE")
207+
mod = encode_peptidoform("ACDE-[Amidation]")
208+
for i in range(0, 3):
209+
np.testing.assert_array_equal(mod["matrix"][i], unmod["matrix"][i])
210+
211+
def test_cterm_mod_reflected_in_matrix_global(self):
212+
unmod = encode_peptidoform("ACDE")
213+
mod = encode_peptidoform("ACDE-[Amidation]")
214+
assert not np.array_equal(mod["matrix_global"], unmod["matrix_global"])
215+
216+
217+
class TestBothTerminalModifications:
218+
"""Tests a peptide carrying both N- and C-terminal modifications."""
219+
220+
def test_both_term_mods_change_both_ends(self):
221+
unmod = encode_peptidoform("ACDE")
222+
mod = encode_peptidoform("[Acetyl]-ACDE-[Amidation]")
223+
assert not np.array_equal(mod["matrix"][0], unmod["matrix"][0])
224+
assert not np.array_equal(mod["matrix"][3], unmod["matrix"][3])
225+
226+
def test_middle_residues_unchanged(self):
227+
unmod = encode_peptidoform("ACDE")
228+
mod = encode_peptidoform("[Acetyl]-ACDE-[Amidation]")
229+
for i in [1, 2]:
230+
np.testing.assert_array_equal(mod["matrix"][i], unmod["matrix"][i])
231+
232+
233+
class TestCCSFeatures:
234+
"""Tests the add_ccs_features flag."""
235+
236+
def test_ccs_features_requires_charge(self):
237+
with pytest.raises(ValueError, match="no charge"):
238+
encode_peptidoform("ACDE", add_ccs_features=True)
239+
240+
def test_ccs_features_appends_five_values(self):
241+
base = encode_peptidoform("ACDE/2")
242+
ccs = encode_peptidoform("ACDE/2", add_ccs_features=True)
243+
assert ccs["matrix_global"].shape[0] == base["matrix_global"].shape[0] + 5
244+
245+
def test_ccs_charge_value_position(self):
246+
# matrix_global layout with CCS:
247+
# [col_sums(6), seq_len(1), H%(1), FWY%(1), DE%(1), KR%(1), charge(1), pos_flat(48)]
248+
charge = 3
249+
result = encode_peptidoform(f"ACDE/{charge}", add_ccs_features=True)
250+
charge_idx = len(DEFAULT_DICT_INDEX) + 1 + 4 # 6 col sums + seq_len + 4 ratios
251+
assert result["matrix_global"][charge_idx] == charge
252+
253+
254+
class TestShortPeptide:
255+
"""Tests edge cases for short peptides."""
256+
257+
def test_single_residue(self):
258+
result = encode_peptidoform("A")
259+
assert result["matrix"].shape == (PADDING, len(DEFAULT_DICT_INDEX))
260+
assert result["matrix_hc"][0, DEFAULT_DICT_AA["A"]] == 1.0
261+
262+
def test_two_residues_no_crash(self):
263+
result = encode_peptidoform("AC")
264+
assert result["matrix_global"].shape == (_GLOBAL_BASE_LEN,)

0 commit comments

Comments
 (0)