Skip to content

Commit 567c8a1

Browse files
committed
build parse_file and deprecate getNumBank, selectBank, getFormat, getData, and getMetaData
1 parent de843cf commit 567c8a1

1 file changed

Lines changed: 255 additions & 32 deletions

File tree

src/diffpy/srfit/fitbase/profileparser.py

Lines changed: 255 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,56 +23,104 @@
2323
"""
2424

2525

26+
from pathlib import Path
27+
28+
import numpy as np
29+
2630
from diffpy.srfit.exceptions import ParseError
31+
from diffpy.utils._deprecator import build_deprecation_message, deprecated
32+
from diffpy.utils.parsers import load_data
33+
34+
removal_verison = "4.0.0"
35+
pdfparser_base = "diffpy.srfit.pdf.pdfparser.PDFParser"
36+
new_base = "diffpy.srfit.fitbase.ProfileParser"
37+
38+
39+
parseFile_dep_msg = build_deprecation_message(
40+
pdfparser_base,
41+
"parseFile",
42+
"parse_file",
43+
removal_verison,
44+
new_base=new_base,
45+
)
46+
47+
pp_base = "diffpy.srfit.fitbase.profileparser.ProfileParser"
48+
49+
getNumBanks_dep_msg = build_deprecation_message(
50+
pp_base,
51+
"getNumBanks",
52+
"get_num_banks",
53+
removal_verison,
54+
)
55+
56+
selectBank_dep_msg = build_deprecation_message(
57+
pp_base,
58+
"selectBank",
59+
"select_bank",
60+
removal_verison,
61+
)
62+
63+
getData_dep_msg = build_deprecation_message(
64+
pp_base,
65+
"getData",
66+
"get_data",
67+
removal_verison,
68+
)
69+
70+
getMetaData_dep_msg = build_deprecation_message(
71+
pp_base,
72+
"getMetaData",
73+
"get_metadata",
74+
removal_verison,
75+
)
2776

2877

2978
class ProfileParser(object):
3079
"""Class for parsing data from a or string.
3180
3281
Attributes
3382
----------
34-
_format
35-
Name of the data format that this parses (string, default
36-
""). The format string is a unique identifier for the data
83+
_format : str, optional
84+
The name of the data format that this parses (string, default
85+
`""`). The format string is a unique identifier for the data
3786
format handled by the parser.
38-
_banks
87+
_banks : list of tuples
3988
The data from each bank. Each bank contains a (x, y, dx,
4089
dy)
4190
tuple:
42-
x
43-
A numpy array containing the independent
44-
variable read from the file.
45-
y
46-
A numpy array containing the profile
91+
x : np.ndarray
92+
The independent variable read from the file.
93+
y : np.ndarray
94+
The dependent variable (profile) read
4795
from the file.
48-
dx
49-
A numpy array containing the uncertainty in x
50-
read from the file. This is None if the
96+
dx : np.ndarray
97+
The uncertainties associated with x
98+
read from the file. This is 0 if the
99+
uncertainty cannot be read.
100+
dy : np.ndarray
101+
The uncertainties associated with y
102+
read from the file. This is 0 if the
51103
uncertainty cannot be read.
52-
dy
53-
A numpy array containing the uncertainty read
54-
from the file. This is None if the uncertainty
55-
cannot be read.
56-
_x
104+
_x : np.ndarray
57105
Independent variable from the chosen bank
58-
_y
106+
_y : np.ndarray
59107
Profile from the chosen bank
60-
_dx
108+
_dx : np.ndarray
61109
Uncertainty in independent variable from the chosen bank
62-
_dy
110+
_dy : np.ndarray
63111
Uncertainty in profile from the chosen bank
64-
_meta
112+
_meta : dict
65113
A dictionary containing metadata read from the file.
66114
67115
68116
General Metadata
69117
----------------
70-
filename
118+
filename : str or Path
71119
The name of the file from which data was parsed. This key
72120
will not exist if data was not read from file.
73-
nbanks
121+
nbanks : int
74122
The number of banks parsed.
75-
bank
123+
bank : int
76124
The chosen bank number.
77125
"""
78126

@@ -110,6 +158,8 @@ def parseString(self, patstring):
110158
"""
111159
raise NotImplementedError()
112160

161+
# remove parseString too when this file is removed.
162+
@deprecated(parseFile_dep_msg)
113163
def parseFile(self, filename):
114164
"""Parse a file and set the _x, _y, _dx, _dy and _meta
115165
variables.
@@ -135,14 +185,151 @@ def parseFile(self, filename):
135185
if len(self._banks) < 1:
136186
raise ParseError("There are no data in the banks")
137187

138-
self.selectBank(0)
188+
self.select_bank(0)
139189
return
140190

141-
def getNumBanks(self):
142-
"""Get the number of banks read by the parser."""
191+
def parse_file(self, filename, column_format=None):
192+
"""Parse a data file and extract data and metadata with
193+
automatic uncertainty detection.
194+
195+
- For files with 2 columns: assumes (x, y) and sets dx, dy to 0.
196+
- For files with 3 columns: assumes (x, y, dy) and sets dx to 0.
197+
- For files with 4 columns: assumes (x, y, dx, dy).
198+
- For other cases: `column_format` must be explicitly specified.
199+
200+
Uncertainty columns (dx, dy) are only considered valid if all values
201+
are positive and not NaN/Inf. Otherwise they are set to 0.
202+
203+
This wipes out the currently loaded data and selected bank number.
204+
205+
Parameters
206+
----------
207+
filename : str or Path
208+
The name of the file to parse.
209+
column_format : tuple of str, optional
210+
The order in which columns appear in the file.
211+
If None, the format is auto-detected based on the
212+
number of columns.
213+
214+
Valid labels: `"x"`, `"y"`, `"dx"`, `"dy"`
215+
216+
Examples:
217+
218+
- `("x", "y")`
219+
- `("x", "y", "dy")`
220+
- `("x", "y", "dx", "dy")`
221+
- `("x", "dx", "y", "dy")`
222+
223+
Raises
224+
------
225+
ParseError
226+
If parsing fails or ambiguity detected.
227+
"""
228+
# Reset internal state
229+
self._banks = []
230+
if isinstance(filename, Path):
231+
filename = str(filename)
232+
# Load metadata and numeric data
233+
self._meta, data = self._load_file(filename)
234+
column_format = self._detect_column_format(data, column_format)
235+
# Map columns to x, y, dx, dy
236+
columns = self._map_column_labels_to_data(data, column_format)
237+
# Extract required arrays
238+
x = columns["x"]
239+
y = columns["y"]
240+
x_length = len(x)
241+
y_length = len(y)
242+
dx = self._validate_uncertainty(columns.get("dx"), x_length)
243+
dy = self._validate_uncertainty(columns.get("dy"), y_length)
244+
# Store as single bank
245+
self._banks = [(x, y, dx, dy)]
246+
self._meta["nbanks"] = 1
247+
self.select_bank(0)
248+
249+
# --- Private helpers --- #
250+
251+
def _load_file(self, filename):
252+
"""Load metadata and numeric data from a file."""
253+
meta = load_data(filename, headers=True)
254+
meta["filename"] = filename
255+
data = load_data(filename)
256+
if data.size == 0 or (data.ndim == 1):
257+
raise ParseError(
258+
"Data block must have at least two columns (x, y)."
259+
)
260+
return meta, data
261+
262+
def _detect_column_format(self, data, column_format):
263+
"""Auto-detect or validate column format."""
264+
num_cols = data.shape[1]
265+
266+
if column_format is None:
267+
if num_cols == 2:
268+
column_format = ("x", "y")
269+
elif num_cols == 3:
270+
column_format = ("x", "y", "dy")
271+
elif num_cols == 4:
272+
column_format = ("x", "y", "dx", "dy")
273+
else:
274+
raise ParseError(
275+
f"Expected 2 to 4 columns but found {num_cols}."
276+
)
277+
if len(column_format) != num_cols:
278+
raise ParseError(
279+
f"column_format has {len(column_format)} "
280+
f"labels but file contains {num_cols} columns."
281+
)
282+
if len(set(column_format)) != len(column_format):
283+
raise ParseError("column_format cannot contain duplicate labels.")
284+
for label in column_format:
285+
if label not in {"x", "y", "dx", "dy"}:
286+
raise ParseError(
287+
f"column_format contains invalid label '{label}'. "
288+
"Valid labels are 'x', 'y', 'dx', and 'dy'."
289+
)
290+
return column_format
291+
292+
def _map_column_labels_to_data(self, data, column_format):
293+
"""Map numeric data to columns by label."""
294+
columns = {}
295+
for i, label in enumerate(column_format):
296+
columns[label] = data[:, i]
297+
298+
if "x" not in columns or "y" not in columns:
299+
raise ParseError(
300+
"Both 'x' and 'y' columns must be present in the data."
301+
)
302+
303+
return columns
304+
305+
@staticmethod
306+
def _validate_uncertainty(data, length):
307+
"""Return the uncertainty data if valid, otherwise 0."""
308+
if data is None or not np.all(np.isfinite(data)) or np.any(data <= 0):
309+
return np.zeros(length)
310+
return data
311+
312+
def get_num_banks(self):
313+
"""Get the number of banks read by the parser.
314+
315+
Returns
316+
-------
317+
int
318+
The number of banks read by the parser.
319+
"""
143320
return len(self._banks)
144321

145-
def selectBank(self, index):
322+
@deprecated(getNumBanks_dep_msg)
323+
def getNumBanks(self):
324+
"""This function is deprecated and will be removed in version
325+
4.0.0.
326+
327+
Please use diffpy.srfit.fitbase.ProfileParser.get_num_banks
328+
instead.
329+
"""
330+
return self.get_num_banks()
331+
332+
def select_bank(self, index):
146333
"""Select which bank to use.
147334
148335
This method should only be called after the data has been parsed. The
@@ -160,7 +347,7 @@ def selectBank(self, index):
160347
if index is None:
161348
index = self._meta.get("bank", 0)
162349

163-
numbanks = self.getNumBanks()
350+
numbanks = self.get_num_banks()
164351
if index > numbanks:
165352
raise IndexError("Bank index out of range")
166353

@@ -175,7 +362,18 @@ def selectBank(self, index):
175362
self._x, self._y, self._dx, self._dy = self._banks[index]
176363
return
177364

178-
def getData(self, index=None):
365+
@deprecated(selectBank_dep_msg)
366+
def selectBank(self, index):
367+
"""This function is deprecated and will be removed in version
368+
4.0.0.
369+
370+
Please use diffpy.srfit.fitbase.ProfileParser.select_bank
371+
instead.
372+
"""
373+
self.select_bank(index)
374+
return
375+
376+
def get_data(self, index=None):
179377
"""Get the data.
180378
181379
This method should only be called after the data has been parsed. The
@@ -192,12 +390,37 @@ def getData(self, index=None):
192390
This returns (x, y, dx, dy) tuple for the bank. dx is 0 if it cannot
193391
be determined from the data format.
194392
"""
195-
self.selectBank(index)
393+
self.select_bank(index)
196394

197395
return self._x, self._y, self._dx, self._dy
198396

397+
@deprecated(getData_dep_msg)
398+
def getData(self, index=None):
399+
"""This function is deprecated and will be removed in version
400+
4.0.0.
401+
402+
Please use diffpy.srfit.fitbase.ProfileParser.get_data instead.
403+
"""
404+
return self.get_data(index)
405+
406+
def get_metadata(self):
407+
"""Get the parsed metadata.
408+
409+
Returns
410+
-------
411+
dict
412+
A dictionary containing metadata read from the file.
413+
"""
414+
return self._meta
415+
416+
@deprecated(getMetaData_dep_msg)
199417
def getMetaData(self):
200-
"""Get the parsed metadata."""
418+
"""This function is deprecated and will be removed in version
419+
4.0.0.
420+
421+
Please use diffpy.srfit.fitbase.ProfileParser.get_metadata
422+
instead.
423+
"""
201424
return self._meta
202425

203426

0 commit comments

Comments
 (0)