Skip to content

Commit dd32346

Browse files
committed
add test for ProfileParser
1 parent 7eaea4b commit dd32346

1 file changed

Lines changed: 200 additions & 0 deletions

File tree

tests/test_profileparser.py

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
import re
2+
from pathlib import Path
3+
4+
import pytest
5+
6+
from diffpy.srfit.exceptions import ParseError
7+
from diffpy.srfit.fitbase.profileparser import ProfileParser
8+
9+
# UC1: User loads file with all x, y, dx, dy columns in that format
10+
# expected: x, y, dx, dy, and metadata are all read correctly
11+
# UC2: User loads file with x, y, dy columns in that format (dx is missing)
12+
# expected: x, y, dy, and metadata are all read correctly
13+
# UC3: User loads file with x, y columns in that format (dx and dy are missing)
14+
# expected: x, y, and metadata are all read correctly
15+
# UC4: User loads file with x, dx, y, dy columns in that format and specifies
16+
# column_format
17+
# expected: x, y, dx, dy, and metadata are all read correctly
18+
# UC5: User loads file with dy and dx values containing NaN and inf values
19+
# expected: x, y, and metadata are all read correctly and dx and dy are set to
20+
# 0 for all values
21+
22+
# UC6: User loads file with only one column
23+
# expected: ParseError is raised
24+
# UC7: User loads file with 5 columns
25+
# expected: ParseError is raised
26+
# UC8: User loads file with x, y, and dy but specifies column_format with 4
27+
# columns
28+
# expected: ParseError is raised
29+
# UC9: User loads file with x, y, dx, and dy but specifies column_format with 5
30+
# columns
31+
# expected: ParseError is raised
32+
# UC10: User loads file with x, y, dx, and dy but specifies column_format with
33+
# 3 columns
34+
# expected: ParseError is raised
35+
# UC11: User loads file with x, y, dx, and dy but specifies column_format with
36+
# duplicate values
37+
# expected: ParseError is raised
38+
39+
EXPECTED_META = {
40+
"wavelength": 0.1,
41+
"dataformat": "QA",
42+
"inputfile": "input.iq",
43+
"backgroundfile": "backgroundfile.iq",
44+
"mode": "xray",
45+
"bgscale": 1.0,
46+
"composition": "TiSe2",
47+
"outputtype": "gr",
48+
"qmaxinst": 25.0,
49+
"qmin": 0.1,
50+
"qmax": 25.0,
51+
"rmax": 140.0,
52+
"rmin": 0.0,
53+
"rstep": 0.01,
54+
"rpoly": 0.7,
55+
"inputdir": "/my/data/dir",
56+
"savedir": "/my/save/dir",
57+
"backgroundfilefull": "/my/data/dir/backgroundfile.iq",
58+
"nbanks": 1,
59+
"bank": 0,
60+
}
61+
62+
63+
@pytest.mark.parametrize(
64+
"input_file, column_order, expected_x, "
65+
"expected_y, expected_dx, expected_dy",
66+
[
67+
# UC1: 4-column file (x, y, dx, dy) — all columns present
68+
# expected: x, y, dx, dy, and metadata are all read correctly
69+
(
70+
Path("four_col.gr"),
71+
None,
72+
[1.0, 1.1, 1.2],
73+
[2.0, 2.1, 2.2],
74+
[0.1, 0.3, 0.5],
75+
[0.2, 0.4, 0.6],
76+
),
77+
# UC2: 3-column file (x, y, dy) — dx is missing
78+
# expected: x, y, dy, and metadata are all read correctly
79+
(
80+
Path("three_col.dat"),
81+
None,
82+
[1.0, 1.1, 1.2],
83+
[2.0, 2.1, 2.2],
84+
[0.0, 0.0, 0.0],
85+
[0.2, 0.4, 0.6],
86+
),
87+
# UC3: 2-column file (x, y) — dx and dy are missing
88+
# expected: x, y, and metadata are all read correctly
89+
(
90+
Path("two_col.txt"),
91+
None,
92+
[1.0, 1.1, 1.2],
93+
[2.0, 2.1, 2.2],
94+
[0.0, 0.0, 0.0],
95+
[0.0, 0.0, 0.0],
96+
),
97+
# UC4: 4-column file in (x, dx, y, dy) order with explicit
98+
# column_format
99+
# expected: x, y, dx, dy, and metadata are all read correctly
100+
(
101+
Path("four_col_reordered.txt"),
102+
("x", "dx", "y", "dy"),
103+
[1.0, 1.1, 1.2],
104+
[2.0, 2.1, 2.2],
105+
[0.1, 0.3, 0.5],
106+
[0.2, 0.4, 0.6],
107+
),
108+
# UC5: 4-column file where dx/dy contain NaN and inf values
109+
# expected: x, y, and metadata are read correctly; dx and dy
110+
# are set to 0
111+
(
112+
Path("four_col_nan_inf.gr"),
113+
None,
114+
[1.0, 1.1, 1.2],
115+
[2.0, 2.1, 2.2],
116+
[0.0, 0.0, 0.0],
117+
[0.0, 0.0, 0.0],
118+
),
119+
],
120+
)
121+
def test_parse_file(
122+
parser_datafiles,
123+
input_file,
124+
column_order,
125+
expected_x,
126+
expected_y,
127+
expected_dx,
128+
expected_dy,
129+
):
130+
parser = ProfileParser()
131+
parser.parse_file(parser_datafiles / input_file, column_order)
132+
actual_x = parser._x.tolist()
133+
actual_y = parser._y.tolist()
134+
actual_dx = parser._dx.tolist()
135+
actual_dy = parser._dy.tolist()
136+
actual_metadata = parser._meta
137+
actual_metadata["filename"] = actual_metadata["filename"].split("/")[-1]
138+
139+
EXPECTED_META["filename"] = str(input_file).split("/")[-1]
140+
assert actual_x == expected_x
141+
assert actual_y == expected_y
142+
assert actual_dx == expected_dx
143+
assert actual_dy == expected_dy
144+
assert actual_metadata == EXPECTED_META
145+
146+
147+
@pytest.mark.parametrize(
148+
"input_file, column_order, msg",
149+
[
150+
# UC6: Only one column — cannot form x/y pair
151+
# expected: ParseError is raised
152+
(
153+
"one_col.gr",
154+
None,
155+
"Data block must have at least two columns (x, y).",
156+
),
157+
# UC7: Five columns — ambiguous, no mapping defined
158+
# expected: ParseError is raised
159+
("five_col.gr", None, "Expected 2 to 4 columns but found 5."),
160+
# UC8: 3-column file but column_format expects 4 columns
161+
# expected: ParseError is raised
162+
(
163+
"three_col.dat",
164+
("x", "y", "dx", "dy"),
165+
"column_format has 4 labels but file contains 3 columns.",
166+
),
167+
# UC9: 4-column file but column_format expects 5 columns
168+
# expected: ParseError is raised
169+
(
170+
"four_col.gr",
171+
("x", "y", "dx", "dy", "extra"),
172+
"column_format has 5 labels but file contains 4 columns.",
173+
),
174+
# UC10: 4-column file but column_format expects only 3 columns
175+
# expected: ParseError is raised
176+
(
177+
"four_col.gr",
178+
("x", "y", "dy"),
179+
"column_format has 3 labels but file contains 4 columns.",
180+
),
181+
# UC11: column_format contains duplicate column names
182+
# expected: ParseError is raised
183+
(
184+
"four_col.gr",
185+
("x", "x", "dx", "dy"),
186+
"column_format cannot contain duplicate labels.",
187+
),
188+
# UC12: column_format contains invalid column names
189+
(
190+
"four_col.gr",
191+
("x", "y", "dx", "invalid"),
192+
"column_format contains invalid label 'invalid'. "
193+
"Valid labels are 'x', 'y', 'dx', and 'dy'.",
194+
),
195+
],
196+
)
197+
def test_parse_file_bad(parser_datafiles, input_file, column_order, msg):
198+
parser = ProfileParser()
199+
with pytest.raises(ParseError, match=re.escape(msg)):
200+
parser.parse_file(parser_datafiles / input_file, column_order)

0 commit comments

Comments
 (0)