Skip to content

Commit 88b07c5

Browse files
author
Konstantin
committed
fix some more edge cases
1 parent 16618cf commit 88b07c5

2 files changed

Lines changed: 63 additions & 7 deletions

File tree

src/fundamend/utils.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Contains some utility functions that are used in the project.
33
"""
44

5+
import re
56
from typing import Optional
67

78
from fundamend.models.anwendungshandbuch import Kommunikationsrichtung
@@ -78,6 +79,14 @@ def remove_linebreaks_and_hyphens(original: str) -> str:
7879

7980

8081
_UNIFIED_SEPARATOR = "/" # how multiple Marktrollen shall be split in the kommunikation_von attribute
82+
_ALTERNATIVE_SEPARATORS = [","] # other separators that are used in the wild
83+
84+
_an_at_word_boundary = re.compile(r"\ban\b")
85+
86+
87+
def _add_whitespace_before_an(original: str) -> str:
88+
"""adds whitespace before 'an' if it is not already there"""
89+
return _an_at_word_boundary.sub(" an", original)
8190

8291

8392
def _parse_kommunikation_von_line(kommunikation_von_line: str) -> list[Kommunikationsrichtung]:
@@ -88,13 +97,18 @@ def _parse_kommunikation_von_line(kommunikation_von_line: str) -> list[Kommunika
8897
if not kommunikation_von_line or not kommunikation_von_line.strip():
8998
return []
9099
result: list[Kommunikationsrichtung] = []
91-
parts = kommunikation_von_line.split(" an ")
100+
parts = _add_whitespace_before_an(kommunikation_von_line).split(" an ")
92101
if len(parts) != 2:
93-
raise ValueError(f"Invalid kommunikation_von string: {kommunikation_von_line}. Expected format: 'X an Y[/Z]'")
102+
# maybe this line looks different, more like 'NB an LF, MSB an NB (Gas)'
103+
# then we have to split at the comma first and treat each part like it was a single line. wtf
104+
if "," in kommunikation_von_line:
105+
for subpart in kommunikation_von_line.split(","):
106+
result += _parse_kommunikation_von_line(subpart.strip())
107+
return result
108+
raise ValueError(f"Invalid kommunikation_von string: '{kommunikation_von_line}'. Expected format: 'X an Y[/Z]'")
94109
sender_str = parts[0]
95110
receiver_str = parts[1]
96-
alternative_separators = [","]
97-
for alternative_separator in alternative_separators:
111+
for alternative_separator in _ALTERNATIVE_SEPARATORS:
98112
if alternative_separator in receiver_str:
99113
receiver_str = receiver_str.replace(alternative_separator, _UNIFIED_SEPARATOR)
100114
if alternative_separator in sender_str:
@@ -107,7 +121,7 @@ def _parse_kommunikation_von_line(kommunikation_von_line: str) -> list[Kommunika
107121
return result
108122

109123

110-
def parse_kommunikation_von(kommunikation_von: Optional[str]) -> list[Kommunikationsrichtung]:
124+
def parse_kommunikation_von(kommunikation_von: Optional[str]) -> list[Kommunikationsrichtung] | None:
111125
"""Splits the kommunikation_von string into something strongly typed
112126
113127
Args:
@@ -117,7 +131,10 @@ def parse_kommunikation_von(kommunikation_von: Optional[str]) -> list[Kommunikat
117131
Properly typed list of Kommunikationsrichtung objects:
118132
[Kommunikationsrichtung(sender='NB', empfaenger='LF'),
119133
Kommunikationsrichtung(sender='NB', empfaenger='MSB')]
134+
or none in case there are no information given (directly).
120135
"""
136+
if kommunikation_von == "Beteiligte aus Ursprungs-nachricht":
137+
return None
121138
result: list[Kommunikationsrichtung] = []
122139
for line in (kommunikation_von or "").splitlines():
123140
line = line.strip()

unittests/test_utils.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
1+
from pathlib import Path
2+
from typing import Generator
3+
14
import pytest
25

3-
from fundamend.models.anwendungshandbuch import Kommunikationsrichtung
6+
from fundamend import AhbReader
7+
from fundamend.models.anwendungshandbuch import Anwendungsfall, Kommunikationsrichtung
48
from fundamend.utils import parse_kommunikation_von, remove_linebreaks_and_hyphens
59

10+
from .conftest import is_private_submodule_checked_out
11+
612

713
@pytest.mark.parametrize(
814
"original, expected",
@@ -100,8 +106,41 @@ def test_anwendungsfall_beschreibung_normalization(original: str, expected: str)
100106
],
101107
id="many receivers",
102108
),
109+
pytest.param(
110+
"NB an LF, MSB an NB (Gas)",
111+
[
112+
Kommunikationsrichtung(sender="NB", empfaenger="LF"),
113+
Kommunikationsrichtung(sender="MSB", empfaenger="NB (Gas)"),
114+
],
115+
),
116+
pytest.param("NB (VNB)an NB (LPB)", [Kommunikationsrichtung(sender="NB (VNB)", empfaenger="NB (LPB)")]),
117+
pytest.param("Beteiligte aus Ursprungs-nachricht", None),
103118
],
104119
)
105-
def test_parsing_kommunikation_von(original: str, expected: list[Kommunikationsrichtung]) -> None:
120+
def test_parsing_kommunikation_von(original: str, expected: list[Kommunikationsrichtung] | None) -> None:
106121
actual = parse_kommunikation_von(original)
107122
assert actual == expected
123+
124+
125+
def _all_anwendungsfaelle() -> Generator[Anwendungsfall, None, None]:
126+
if not is_private_submodule_checked_out():
127+
pytest.skip("Skipping test because of missing private submodule")
128+
private_submodule_root = Path(__file__).parent.parent / "xml-migs-and-ahbs"
129+
assert private_submodule_root.exists() and private_submodule_root.is_dir()
130+
for ahb_file_path in private_submodule_root.rglob("**/*AHB*.xml"):
131+
ahb = AhbReader(ahb_file_path).read()
132+
for anwendungsfall in ahb.anwendungsfaelle:
133+
if anwendungsfall.is_outdated:
134+
continue
135+
yield anwendungsfall
136+
137+
138+
def test_parsing_all_kommunikation_von_there_is() -> None:
139+
"""loop over all AHB files and read the 'Kommunikation Von' Attribute of all the Anwendungsfälle"""
140+
if not is_private_submodule_checked_out():
141+
pytest.skip("Skipping test because of missing private submodule")
142+
for anwendungsfall in _all_anwendungsfaelle():
143+
kommunikation_von = anwendungsfall.kommunikation_von
144+
if not isinstance(kommunikation_von, str):
145+
pytest.skip("Skipping test because 'Kommunikation Von' is not a string (anymore)")
146+
_ = parse_kommunikation_von(kommunikation_von) # must not crash

0 commit comments

Comments
 (0)