Skip to content

Commit c4ad90b

Browse files
authored
Merge pull request #51 from bigbio/dev
Update github actions, more examples added, remove filtering of features based on percentile tic.
2 parents df13276 + b29962b commit c4ad90b

5 files changed

Lines changed: 46 additions & 38 deletions

File tree

.github/workflows/python-package.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,15 @@ jobs:
4444
- name: Test commandline tool
4545
run: |
4646
quantmsutilsc --help
47+
- name: Download test files.
48+
run: |
49+
wget https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/quantms-ci-github/quantms-utils/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML
50+
wget https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/quantms-ci-github/quantms-utils/RD139_Narrow_UPS1_0_1fmol_inj1.mzML
51+
- name: Test package online TMT
52+
run: |
53+
quantmsutilsc mzmlstats --ms_path TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML --ms2_file --feature_detection
54+
- name: Text package online DIA
55+
run: |
56+
quantmsutilsc mzmlstats --ms_path RD139_Narrow_UPS1_0_1fmol_inj1.mzML --ms2_file --feature_detection
57+
58+

quantmsutils/mzml/ms1_feature_finder.py

Lines changed: 30 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@
44
previous algorithm by Andy Lim https://github.com/bmx8177/MS1Connect
55
published in https://doi.org/10.1093/bioinformatics/btad058.
66
7-
We improved the original algorithm by using FeatureFinderMultiplexAlgorithm instead of FeatureFinder as originally
8-
implemented by Andy Lim. Also, we annotated additional features such as min and max retention time and mz values.
9-
10-
This algorithm is used to detect MS1 features from mzML files and save them to parquet format.
7+
We improved the original algorithm with the following ideas:
8+
- Using FeatureFinderMultiplexAlgorithm instead of FeatureFinder as originally implemented by Andy Lim. This will provide
9+
a more robust way to perform FeatureFinding.
10+
- Remove the filtering of percentile TIC for features, we leave this step to future consuming tools of the data to perform
11+
extra curation of the features based on percentile_tic, or quality of the feature, etc.
12+
- We annotated additional features such as min and max retention time and mz values.
13+
- This algorithm is used to detect MS1 features from mzML files and save them to parquet format.
1114
"""
1215

1316
import bisect
@@ -28,7 +31,7 @@ class MS1FeatureDetector:
2831
Class for detecting MS1 features from mzML files and saving to parquet format.
2932
"""
3033

31-
def __init__(self, min_ptic: float = 0.05, max_ptic: float = 0.95, ms_level: int = 1):
34+
def __init__(self, ms_level: int = 1):
3235
"""
3336
Initialize the MS1 feature detector.
3437
@@ -43,10 +46,7 @@ def __init__(self, min_ptic: float = 0.05, max_ptic: float = 0.95, ms_level: int
4346
"""
4447
# Configure logging
4548

46-
self.min_ptic = min_ptic
47-
self.max_ptic = max_ptic
4849
self.ms_level = ms_level
49-
5050
# Initialize options for file loading
5151
self.options = oms.PeakFileOptions()
5252
self.options.setMSLevels([self.ms_level])
@@ -142,7 +142,11 @@ def _find_ptic_for_rt(rt: float, rt_list: List[float], ptic_list: List[float]) -
142142
return ptic_left + rt_frac * (ptic_right - ptic_left)
143143

144144
def _extract_features(
145-
self, features: oms.FeatureMap, rt_list: List[float], ptic_list: List[float], scans: List[str]
145+
self,
146+
features: oms.FeatureMap,
147+
rt_list: List[float],
148+
ptic_list: List[float],
149+
scans: List[str],
146150
) -> List[Dict[str, Any]]:
147151
"""
148152
Extract feature information and filter by pTIC.
@@ -182,30 +186,23 @@ def _extract_features(
182186
select_scans = self._get_selected_scans(scans, rt_list, minRT, maxRT)
183187
num_scans = len(select_scans)
184188

185-
186-
187-
# Filter by pTIC
188-
if self.min_ptic <= ptic <= self.max_ptic:
189-
feature_list.append(
190-
{
191-
"feature_mz": mz,
192-
"feature_intensity": intensity,
193-
"feature_rt": rt,
194-
"feature_charge": charge,
195-
"feature_percentile_tic": ptic,
196-
"feature_quality": quality,
197-
"feature_id": feature.getUniqueId(),
198-
"feature_min_rt": minRT,
199-
"feature_min_mz": minMZ,
200-
"feature_max_rt": maxRT,
201-
"feature_max_mz": maxMZ,
202-
"feature_num_scans": num_scans,
203-
"feature_scans": select_scans
204-
205-
}
206-
)
207-
else:
208-
logger.debug(f"Skipping feature at RT {rt} due to pTIC {ptic}")
189+
feature_list.append(
190+
{
191+
"feature_mz": mz,
192+
"feature_intensity": intensity,
193+
"feature_rt": rt,
194+
"feature_charge": charge,
195+
"feature_percentile_tic": ptic,
196+
"feature_quality": quality,
197+
"feature_id": feature.getUniqueId(),
198+
"feature_min_rt": minRT,
199+
"feature_min_mz": minMZ,
200+
"feature_max_rt": maxRT,
201+
"feature_max_mz": maxMZ,
202+
"feature_num_scans": num_scans,
203+
"feature_scans": select_scans,
204+
}
205+
)
209206

210207
return feature_list
211208

@@ -321,4 +318,3 @@ def _get_selected_scans(scans: List[str], rt_list: List[float], min_rt: float, m
321318
if min_rt <= rt <= max_rt:
322319
selected_scans.append(scans[i])
323320
return selected_scans
324-

quantmsutils/mzml/mzml_statistics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
SCAN,
2828
SUMMED_PEAK_INTENSITY,
2929
PRECURSOR_RT,
30-
PRECURSOR_TOTAL_INTENSITY
30+
PRECURSOR_TOTAL_INTENSITY,
3131
)
3232

3333
logging.basicConfig(format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.INFO)
@@ -607,7 +607,7 @@ def mzml_statistics(
607607
batch_size=batch_size,
608608
)
609609
if feature_detection:
610-
feature_detector = MS1FeatureDetector(min_ptic=0.05, max_ptic=0.95)
610+
feature_detector = MS1FeatureDetector()
611611
feature_detector.process_file(input_file=ms_path, output_file=feature_output_path)
612612
logger.info("The file {} has been processed".format(ms_path))
613613
else:

quantmsutils/openms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@ def extract_scan_id(spectrum: oms.MSSpectrum) -> str:
2121
match = re.search(SCAN_PATTERN, spectrum.getNativeID())
2222
if match:
2323
return match.group(1)
24-
return spectrum.getNativeID()
24+
return spectrum.getNativeID()

tests/test_commands.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ class TestFeatureFinder:
204204
def test_feature_finder(self):
205205
"""Test feature finder on TMT data"""
206206

207-
detector = MS1FeatureDetector(min_ptic=0.05, max_ptic=0.95)
207+
detector = MS1FeatureDetector()
208208
result = detector.process_file(input_file=TMT_MZML_FILE, output_file=TMT_MS1_FEAURES)
209209

210210
parquet_df = pd.read_parquet(result)

0 commit comments

Comments
 (0)