2323"""
2424
2525
26+ from pathlib import Path
27+
28+ import numpy as np
29+
2630from diffpy .srfit .exceptions import ParseError
31+ from diffpy .utils ._deprecator import build_deprecation_message , deprecated
32+ from diffpy .utils .parsers import load_data
33+
34+ removal_verison = "4.0.0"
35+ pdfparser_base = "diffpy.srfit.pdf.pdfparser.PDFParser"
36+ new_base = "diffpy.srfit.fitbase.ProfileParser"
37+
38+
39+ parseFile_dep_msg = build_deprecation_message (
40+ pdfparser_base ,
41+ "parseFile" ,
42+ "parse_file" ,
43+ removal_verison ,
44+ new_base = new_base ,
45+ )
46+
47+ pp_base = "diffpy.srfit.fitbase.profileparser.ProfileParser"
48+
49+ getNumBanks_dep_msg = build_deprecation_message (
50+ pp_base ,
51+ "getNumBanks" ,
52+ "get_num_banks" ,
53+ removal_verison ,
54+ )
55+
56+ selectBank_dep_msg = build_deprecation_message (
57+ pp_base ,
58+ "selectBank" ,
59+ "select_bank" ,
60+ removal_verison ,
61+ )
62+
63+ getData_dep_msg = build_deprecation_message (
64+ pp_base ,
65+ "getData" ,
66+ "get_data" ,
67+ removal_verison ,
68+ )
69+
70+ getMetaData_dep_msg = build_deprecation_message (
71+ pp_base ,
72+ "getMetaData" ,
73+ "get_metadata" ,
74+ removal_verison ,
75+ )
2776
2877
2978class ProfileParser (object ):
3079 """Class for parsing data from a or string.
3180
3281 Attributes
3382 ----------
34- _format
35- Name of the data format that this parses (string, default
36- "" ). The format string is a unique identifier for the data
83+ _format : str, optional
84+ The name of the data format that this parses (string, default
85+ `""` ). The format string is a unique identifier for the data
3786 format handled by the parser.
38- _banks
87+ _banks : list of tuples
3988 The data from each bank. Each bank contains a (x, y, dx,
4089 dy)
4190 tuple:
42- x
43- A numpy array containing the independent
44- variable read from the file.
45- y
46- A numpy array containing the profile
91+ x : np.ndarray
92+ The independent variable read from the file.
93+ y : np.ndarray
94+ The dependent variable (profile) read
4795 from the file.
48- dx
49- A numpy array containing the uncertainty in x
50- read from the file. This is None if the
96+ dx : np.ndarray
97+ The uncertainties associated with x
98+ read from the file. This is 0 if the
99+ uncertainty cannot be read.
100+ dy : np.ndarray
101+ The uncertainties associated with y
102+ read from the file. This is 0 if the
51103 uncertainty cannot be read.
52- dy
53- A numpy array containing the uncertainty read
54- from the file. This is None if the uncertainty
55- cannot be read.
56- _x
104+ _x : np.ndarray
57105 Independent variable from the chosen bank
58- _y
106+ _y : np.ndarray
59107 Profile from the chosen bank
60- _dx
108+ _dx : np.ndarray
61109 Uncertainty in independent variable from the chosen bank
62- _dy
110+ _dy : np.ndarray
63111 Uncertainty in profile from the chosen bank
64- _meta
112+ _meta : dict
65113 A dictionary containing metadata read from the file.
66114
67115
68116 General Metadata
69117 ----------------
70- filename
118+ filename : str or Path
71119 The name of the file from which data was parsed. This key
72120 will not exist if data was not read from file.
73- nbanks
121+ nbanks : int
74122 The number of banks parsed.
75- bank
123+ bank : int
76124 The chosen bank number.
77125 """
78126
@@ -110,6 +158,8 @@ def parseString(self, patstring):
110158 """
111159 raise NotImplementedError ()
112160
161+ # remove parseString too when this file is removed.
162+ @deprecated (parseFile_dep_msg )
113163 def parseFile (self , filename ):
114164 """Parse a file and set the _x, _y, _dx, _dy and _meta
115165 variables.
@@ -135,14 +185,151 @@ def parseFile(self, filename):
135185 if len (self ._banks ) < 1 :
136186 raise ParseError ("There are no data in the banks" )
137187
138- self .selectBank (0 )
188+ self .select_bank (0 )
139189 return
140190
141- def getNumBanks (self ):
142- """Get the number of banks read by the parser."""
191+ def parse_file (self , filename , column_format = None ):
192+ """Parse a data file and extract data and metadata with
193+ automatic uncertainty detection.
194+
195+ - For files with 2 columns: assumes (x, y) and sets dx, dy to 0.
196+ - For files with 3 columns: assumes (x, y, dy) and sets dx to 0.
197+ - For files with 4 columns: assumes (x, y, dx, dy).
198+ - For other cases: `column_format` must be explicitly specified.
199+
200+ Uncertainty columns (dx, dy) are only considered valid if all values
201+ are positive and not NaN/Inf. Otherwise they are set to 0.
202+
203+ This wipes out the currently loaded data and selected bank number.
204+
205+ Parameters
206+ ----------
207+ filename : str or Path
208+ The name of the file to parse.
209+ column_format : tuple of str, optional
210+ The order in which columns appear in the file.
211+ If None, the format is auto-detected based on the
212+ number of columns.
213+
214+ Valid labels: `"x"`, `"y"`, `"dx"`, `"dy"`
215+
216+ Examples:
217+
218+ - `("x", "y")`
219+ - `("x", "y", "dy")`
220+ - `("x", "y", "dx", "dy")`
221+ - `("x", "dx", "y", "dy")`
222+
223+ Raises
224+ ------
225+ ParseError
226+ If parsing fails or ambiguity detected.
227+ """
228+ # Reset internal state
229+ self ._banks = []
230+ if isinstance (filename , Path ):
231+ filename = str (filename )
232+ # Load metadata and numeric data
233+ self ._meta , data = self ._load_file (filename )
234+ column_format = self ._detect_column_format (data , column_format )
235+ # Map columns to x, y, dx, dy
236+ columns = self ._map_column_labels_to_data (data , column_format )
237+ # Extract required arrays
238+ x = columns ["x" ]
239+ y = columns ["y" ]
240+ x_length = len (x )
241+ y_length = len (y )
242+ dx = self ._validate_uncertainty (columns .get ("dx" ), x_length )
243+ dy = self ._validate_uncertainty (columns .get ("dy" ), y_length )
244+ # Store as single bank
245+ self ._banks = [(x , y , dx , dy )]
246+ self ._meta ["nbanks" ] = 1
247+ self .select_bank (0 )
248+
249+ # --- Private helpers --- #
250+
251+ def _load_file (self , filename ):
252+ """Load metadata and numeric data from a file."""
253+ meta = load_data (filename , headers = True )
254+ meta ["filename" ] = filename
255+ data = load_data (filename )
256+ if data .size == 0 or (data .ndim == 1 ):
257+ raise ParseError (
258+ "Data block must have at least two columns (x, y)."
259+ )
260+ return meta , data
261+
262+ def _detect_column_format (self , data , column_format ):
263+ """Auto-detect or validate column format."""
264+ num_cols = data .shape [1 ]
265+
266+ if column_format is None :
267+ if num_cols == 2 :
268+ column_format = ("x" , "y" )
269+ elif num_cols == 3 :
270+ column_format = ("x" , "y" , "dy" )
271+ elif num_cols == 4 :
272+ column_format = ("x" , "y" , "dx" , "dy" )
273+ else :
274+ raise ParseError (
275+ f"Expected 2 to 4 columns but found { num_cols } ."
276+ )
277+ if len (column_format ) != num_cols :
278+ raise ParseError (
279+ f"column_format has { len (column_format )} "
280+ f"labels but file contains { num_cols } columns."
281+ )
282+ if len (set (column_format )) != len (column_format ):
283+ raise ParseError ("column_format cannot contain duplicate labels." )
284+ for label in column_format :
285+ if label not in {"x" , "y" , "dx" , "dy" }:
286+ raise ParseError (
287+ f"column_format contains invalid label '{ label } '. "
288+ "Valid labels are 'x', 'y', 'dx', and 'dy'."
289+ )
290+ return column_format
291+
292+ def _map_column_labels_to_data (self , data , column_format ):
293+ """Map numeric data to columns by label."""
294+ columns = {}
295+ for i , label in enumerate (column_format ):
296+ columns [label ] = data [:, i ]
297+
298+ if "x" not in columns or "y" not in columns :
299+ raise ParseError (
300+ "Both 'x' and 'y' columns must be present in the data."
301+ )
302+
303+ return columns
304+
305+ @staticmethod
306+ def _validate_uncertainty (data , length ):
307+ """Return the uncertainty data if valid, otherwise 0."""
308+ if data is None or not np .all (np .isfinite (data )) or np .any (data <= 0 ):
309+ return np .zeros (length )
310+ return data
311+
312+ def get_num_banks (self ):
313+ """Get the number of banks read by the parser.
314+
315+ Returns
316+ -------
317+ int
318+ The number of banks read by the parser.
319+ """
143320 return len (self ._banks )
144321
145- def selectBank (self , index ):
322+ @deprecated (getNumBanks_dep_msg )
323+ def getNumBanks (self ):
324+ """This function is deprecated and will be removed in version
325+ 4.0.0.
326+
327+ Please use diffpy.srfit.fitbase.ProfileParser.get_num_banks
328+ instead.
329+ """
330+ return self .get_num_banks ()
331+
332+ def select_bank (self , index ):
146333 """Select which bank to use.
147334
148335 This method should only be called after the data has been parsed. The
@@ -160,7 +347,7 @@ def selectBank(self, index):
160347 if index is None :
161348 index = self ._meta .get ("bank" , 0 )
162349
163- numbanks = self .getNumBanks ()
350+ numbanks = self .get_num_banks ()
164351 if index > numbanks :
165352 raise IndexError ("Bank index out of range" )
166353
@@ -175,7 +362,18 @@ def selectBank(self, index):
175362 self ._x , self ._y , self ._dx , self ._dy = self ._banks [index ]
176363 return
177364
178- def getData (self , index = None ):
365+ @deprecated (selectBank_dep_msg )
366+ def selectBank (self , index ):
367+ """This function is deprecated and will be removed in version
368+ 4.0.0.
369+
370+ Please use diffpy.srfit.fitbase.ProfileParser.select_bank
371+ instead.
372+ """
373+ self .select_bank (index )
374+ return
375+
376+ def get_data (self , index = None ):
179377 """Get the data.
180378
181379 This method should only be called after the data has been parsed. The
@@ -192,12 +390,37 @@ def getData(self, index=None):
192390 This returns (x, y, dx, dy) tuple for the bank. dx is 0 if it cannot
193391 be determined from the data format.
194392 """
195- self .selectBank (index )
393+ self .select_bank (index )
196394
197395 return self ._x , self ._y , self ._dx , self ._dy
198396
397+ @deprecated (getData_dep_msg )
398+ def getData (self , index = None ):
399+ """This function is deprecated and will be removed in version
400+ 4.0.0.
401+
402+ Please use diffpy.srfit.fitbase.ProfileParser.get_data instead.
403+ """
404+ return self .get_data (index )
405+
406+ def get_metadata (self ):
407+ """Get the parsed metadata.
408+
409+ Returns
410+ -------
411+ dict
412+ A dictionary containing metadata read from the file.
413+ """
414+ return self ._meta
415+
416+ @deprecated (getMetaData_dep_msg )
199417 def getMetaData (self ):
200- """Get the parsed metadata."""
418+ """This function is deprecated and will be removed in version
419+ 4.0.0.
420+
421+ Please use diffpy.srfit.fitbase.ProfileParser.get_metadata
422+ instead.
423+ """
201424 return self ._meta
202425
203426
0 commit comments