44 previous algorithm by Andy Lim https://github.com/bmx8177/MS1Connect
55 published in https://doi.org/10.1093/bioinformatics/btad058.
66
7- We improved the original algorithm by using FeatureFinderMultiplexAlgorithm instead of FeatureFinder as originally
8- implemented by Andy Lim. Also, we annotated additional features such as min and max retention time and mz values.
9-
10- This algorithm is used to detect MS1 features from mzML files and save them to parquet format.
7+ We improved the original algorithm with the following ideas:
8+ - Using FeatureFinderMultiplexAlgorithm instead of FeatureFinder as originally implemented by Andy Lim. This will provide
9+ a more robust way to perform FeatureFinding.
10+ - Remove the filtering of percentile TIC for features, we leave this step to future consuming tools of the data to perform
11+ extra curation of the features based on percentile_tic, or quality of the feature, etc.
12+ - We annotated additional features such as min and max retention time and mz values.
13+ - This algorithm is used to detect MS1 features from mzML files and save them to parquet format.
1114"""
1215
1316import bisect
@@ -28,7 +31,7 @@ class MS1FeatureDetector:
2831 Class for detecting MS1 features from mzML files and saving to parquet format.
2932 """
3033
31- def __init__ (self , min_ptic : float = 0.05 , max_ptic : float = 0.95 , ms_level : int = 1 ):
34+ def __init__ (self , ms_level : int = 1 ):
3235 """
3336 Initialize the MS1 feature detector.
3437
@@ -43,10 +46,7 @@ def __init__(self, min_ptic: float = 0.05, max_ptic: float = 0.95, ms_level: int
4346 """
4447 # Configure logging
4548
46- self .min_ptic = min_ptic
47- self .max_ptic = max_ptic
4849 self .ms_level = ms_level
49-
5050 # Initialize options for file loading
5151 self .options = oms .PeakFileOptions ()
5252 self .options .setMSLevels ([self .ms_level ])
@@ -142,7 +142,11 @@ def _find_ptic_for_rt(rt: float, rt_list: List[float], ptic_list: List[float]) -
142142 return ptic_left + rt_frac * (ptic_right - ptic_left )
143143
144144 def _extract_features (
145- self , features : oms .FeatureMap , rt_list : List [float ], ptic_list : List [float ], scans : List [str ]
145+ self ,
146+ features : oms .FeatureMap ,
147+ rt_list : List [float ],
148+ ptic_list : List [float ],
149+ scans : List [str ],
146150 ) -> List [Dict [str , Any ]]:
147151 """
148152 Extract feature information and filter by pTIC.
@@ -182,30 +186,23 @@ def _extract_features(
182186 select_scans = self ._get_selected_scans (scans , rt_list , minRT , maxRT )
183187 num_scans = len (select_scans )
184188
185-
186-
187- # Filter by pTIC
188- if self .min_ptic <= ptic <= self .max_ptic :
189- feature_list .append (
190- {
191- "feature_mz" : mz ,
192- "feature_intensity" : intensity ,
193- "feature_rt" : rt ,
194- "feature_charge" : charge ,
195- "feature_percentile_tic" : ptic ,
196- "feature_quality" : quality ,
197- "feature_id" : feature .getUniqueId (),
198- "feature_min_rt" : minRT ,
199- "feature_min_mz" : minMZ ,
200- "feature_max_rt" : maxRT ,
201- "feature_max_mz" : maxMZ ,
202- "feature_num_scans" : num_scans ,
203- "feature_scans" : select_scans
204-
205- }
206- )
207- else :
208- logger .debug (f"Skipping feature at RT { rt } due to pTIC { ptic } " )
189+ feature_list .append (
190+ {
191+ "feature_mz" : mz ,
192+ "feature_intensity" : intensity ,
193+ "feature_rt" : rt ,
194+ "feature_charge" : charge ,
195+ "feature_percentile_tic" : ptic ,
196+ "feature_quality" : quality ,
197+ "feature_id" : feature .getUniqueId (),
198+ "feature_min_rt" : minRT ,
199+ "feature_min_mz" : minMZ ,
200+ "feature_max_rt" : maxRT ,
201+ "feature_max_mz" : maxMZ ,
202+ "feature_num_scans" : num_scans ,
203+ "feature_scans" : select_scans ,
204+ }
205+ )
209206
210207 return feature_list
211208
@@ -321,4 +318,3 @@ def _get_selected_scans(scans: List[str], rt_list: List[float], min_rt: float, m
321318 if min_rt <= rt <= max_rt :
322319 selected_scans .append (scans [i ])
323320 return selected_scans
324-
0 commit comments