|
| 1 | +import re |
| 2 | +from pathlib import Path |
| 3 | + |
| 4 | +import pytest |
| 5 | + |
| 6 | +from diffpy.srfit.exceptions import ParseError |
| 7 | +from diffpy.srfit.fitbase.profileparser import ProfileParser |
| 8 | + |
| 9 | +# UC1: User loads file with all x, y, dx, dy columns in that format |
| 10 | +# expected: x, y, dx, dy, and metadata are all read correctly |
| 11 | +# UC2: User loads file with x, y, dy columns in that format (dx is missing) |
| 12 | +# expected: x, y, dy, and metadata are all read correctly |
| 13 | +# UC3: User loads file with x, y columns in that format (dx and dy are missing) |
| 14 | +# expected: x, y, and metadata are all read correctly |
| 15 | +# UC4: User loads file with x, dx, y, dy columns in that format and specifies |
| 16 | +# column_format |
| 17 | +# expected: x, y, dx, dy, and metadata are all read correctly |
| 18 | +# UC5: User loads file with dy and dx values containing NaN and inf values |
| 19 | +# expected: x, y, and metadata are all read correctly and dx and dy are set to |
| 20 | +# 0 for all values |
| 21 | + |
| 22 | +# UC6: User loads file with only one column |
| 23 | +# expected: ParseError is raised |
| 24 | +# UC7: User loads file with 5 columns |
| 25 | +# expected: ParseError is raised |
| 26 | +# UC8: User loads file with x, y, and dy but specifies column_format with 4 |
| 27 | +# columns |
| 28 | +# expected: ParseError is raised |
| 29 | +# UC9: User loads file with x, y, dx, and dy but specifies column_format with 5 |
| 30 | +# columns |
| 31 | +# expected: ParseError is raised |
| 32 | +# UC10: User loads file with x, y, dx, and dy but specifies column_format with |
| 33 | +# 3 columns |
| 34 | +# expected: ParseError is raised |
| 35 | +# UC11: User loads file with x, y, dx, and dy but specifies column_format with |
| 36 | +# duplicate values |
| 37 | +# expected: ParseError is raised |
| 38 | + |
| 39 | +EXPECTED_META = { |
| 40 | + "wavelength": 0.1, |
| 41 | + "dataformat": "QA", |
| 42 | + "inputfile": "input.iq", |
| 43 | + "backgroundfile": "backgroundfile.iq", |
| 44 | + "mode": "xray", |
| 45 | + "bgscale": 1.0, |
| 46 | + "composition": "TiSe2", |
| 47 | + "outputtype": "gr", |
| 48 | + "qmaxinst": 25.0, |
| 49 | + "qmin": 0.1, |
| 50 | + "qmax": 25.0, |
| 51 | + "rmax": 140.0, |
| 52 | + "rmin": 0.0, |
| 53 | + "rstep": 0.01, |
| 54 | + "rpoly": 0.7, |
| 55 | + "inputdir": "/my/data/dir", |
| 56 | + "savedir": "/my/save/dir", |
| 57 | + "backgroundfilefull": "/my/data/dir/backgroundfile.iq", |
| 58 | + "nbanks": 1, |
| 59 | + "bank": 0, |
| 60 | +} |
| 61 | + |
| 62 | + |
| 63 | +@pytest.mark.parametrize( |
| 64 | + "input_file, column_order, expected_x, " |
| 65 | + "expected_y, expected_dx, expected_dy", |
| 66 | + [ |
| 67 | + # UC1: 4-column file (x, y, dx, dy) — all columns present |
| 68 | + # expected: x, y, dx, dy, and metadata are all read correctly |
| 69 | + ( |
| 70 | + Path("four_col.gr"), |
| 71 | + None, |
| 72 | + [1.0, 1.1, 1.2], |
| 73 | + [2.0, 2.1, 2.2], |
| 74 | + [0.1, 0.3, 0.5], |
| 75 | + [0.2, 0.4, 0.6], |
| 76 | + ), |
| 77 | + # UC2: 3-column file (x, y, dy) — dx is missing |
| 78 | + # expected: x, y, dy, and metadata are all read correctly |
| 79 | + ( |
| 80 | + Path("three_col.dat"), |
| 81 | + None, |
| 82 | + [1.0, 1.1, 1.2], |
| 83 | + [2.0, 2.1, 2.2], |
| 84 | + [0.0, 0.0, 0.0], |
| 85 | + [0.2, 0.4, 0.6], |
| 86 | + ), |
| 87 | + # UC3: 2-column file (x, y) — dx and dy are missing |
| 88 | + # expected: x, y, and metadata are all read correctly |
| 89 | + ( |
| 90 | + Path("two_col.txt"), |
| 91 | + None, |
| 92 | + [1.0, 1.1, 1.2], |
| 93 | + [2.0, 2.1, 2.2], |
| 94 | + [0.0, 0.0, 0.0], |
| 95 | + [0.0, 0.0, 0.0], |
| 96 | + ), |
| 97 | + # UC4: 4-column file in (x, dx, y, dy) order with explicit |
| 98 | + # column_format |
| 99 | + # expected: x, y, dx, dy, and metadata are all read correctly |
| 100 | + ( |
| 101 | + Path("four_col_reordered.txt"), |
| 102 | + ("x", "dx", "y", "dy"), |
| 103 | + [1.0, 1.1, 1.2], |
| 104 | + [2.0, 2.1, 2.2], |
| 105 | + [0.1, 0.3, 0.5], |
| 106 | + [0.2, 0.4, 0.6], |
| 107 | + ), |
| 108 | + # UC5: 4-column file where dx/dy contain NaN and inf values |
| 109 | + # expected: x, y, and metadata are read correctly; dx and dy |
| 110 | + # are set to 0 |
| 111 | + ( |
| 112 | + Path("four_col_nan_inf.gr"), |
| 113 | + None, |
| 114 | + [1.0, 1.1, 1.2], |
| 115 | + [2.0, 2.1, 2.2], |
| 116 | + [0.0, 0.0, 0.0], |
| 117 | + [0.0, 0.0, 0.0], |
| 118 | + ), |
| 119 | + ], |
| 120 | +) |
| 121 | +def test_parse_file( |
| 122 | + parser_datafiles, |
| 123 | + input_file, |
| 124 | + column_order, |
| 125 | + expected_x, |
| 126 | + expected_y, |
| 127 | + expected_dx, |
| 128 | + expected_dy, |
| 129 | +): |
| 130 | + parser = ProfileParser() |
| 131 | + parser.parse_file(parser_datafiles / input_file, column_order) |
| 132 | + actual_x = parser._x.tolist() |
| 133 | + actual_y = parser._y.tolist() |
| 134 | + actual_dx = parser._dx.tolist() |
| 135 | + actual_dy = parser._dy.tolist() |
| 136 | + actual_metadata = parser._meta |
| 137 | + actual_metadata["filename"] = actual_metadata["filename"].split("/")[-1] |
| 138 | + |
| 139 | + EXPECTED_META["filename"] = str(input_file).split("/")[-1] |
| 140 | + assert actual_x == expected_x |
| 141 | + assert actual_y == expected_y |
| 142 | + assert actual_dx == expected_dx |
| 143 | + assert actual_dy == expected_dy |
| 144 | + assert actual_metadata == EXPECTED_META |
| 145 | + |
| 146 | + |
| 147 | +@pytest.mark.parametrize( |
| 148 | + "input_file, column_order, msg", |
| 149 | + [ |
| 150 | + # UC6: Only one column — cannot form x/y pair |
| 151 | + # expected: ParseError is raised |
| 152 | + ( |
| 153 | + "one_col.gr", |
| 154 | + None, |
| 155 | + "Data block must have at least two columns (x, y).", |
| 156 | + ), |
| 157 | + # UC7: Five columns — ambiguous, no mapping defined |
| 158 | + # expected: ParseError is raised |
| 159 | + ("five_col.gr", None, "Expected 2 to 4 columns but found 5."), |
| 160 | + # UC8: 3-column file but column_format expects 4 columns |
| 161 | + # expected: ParseError is raised |
| 162 | + ( |
| 163 | + "three_col.dat", |
| 164 | + ("x", "y", "dx", "dy"), |
| 165 | + "column_format has 4 labels but file contains 3 columns.", |
| 166 | + ), |
| 167 | + # UC9: 4-column file but column_format expects 5 columns |
| 168 | + # expected: ParseError is raised |
| 169 | + ( |
| 170 | + "four_col.gr", |
| 171 | + ("x", "y", "dx", "dy", "extra"), |
| 172 | + "column_format has 5 labels but file contains 4 columns.", |
| 173 | + ), |
| 174 | + # UC10: 4-column file but column_format expects only 3 columns |
| 175 | + # expected: ParseError is raised |
| 176 | + ( |
| 177 | + "four_col.gr", |
| 178 | + ("x", "y", "dy"), |
| 179 | + "column_format has 3 labels but file contains 4 columns.", |
| 180 | + ), |
| 181 | + # UC11: column_format contains duplicate column names |
| 182 | + # expected: ParseError is raised |
| 183 | + ( |
| 184 | + "four_col.gr", |
| 185 | + ("x", "x", "dx", "dy"), |
| 186 | + "column_format cannot contain duplicate labels.", |
| 187 | + ), |
| 188 | + # UC12: column_format contains invalid column names |
| 189 | + ( |
| 190 | + "four_col.gr", |
| 191 | + ("x", "y", "dx", "invalid"), |
| 192 | + "column_format contains invalid label 'invalid'. " |
| 193 | + "Valid labels are 'x', 'y', 'dx', and 'dy'.", |
| 194 | + ), |
| 195 | + ], |
| 196 | +) |
| 197 | +def test_parse_file_bad(parser_datafiles, input_file, column_order, msg): |
| 198 | + parser = ProfileParser() |
| 199 | + with pytest.raises(ParseError, match=re.escape(msg)): |
| 200 | + parser.parse_file(parser_datafiles / input_file, column_order) |
0 commit comments