22Contains some utility functions that are used in the project.
33"""
44
5+ import re
56from typing import Optional
67
78from fundamend .models .anwendungshandbuch import Kommunikationsrichtung
@@ -78,6 +79,14 @@ def remove_linebreaks_and_hyphens(original: str) -> str:
7879
7980
8081_UNIFIED_SEPARATOR = "/" # how multiple Marktrollen shall be split in the kommunikation_von attribute
82+ _ALTERNATIVE_SEPARATORS = ["," ] # other separators that are used in the wild
83+
84+ _an_at_word_boundary = re .compile (r"\ban\b" )
85+
86+
87+ def _add_whitespace_before_an (original : str ) -> str :
88+ """adds whitespace before 'an' if it is not already there"""
89+ return _an_at_word_boundary .sub (" an" , original )
8190
8291
8392def _parse_kommunikation_von_line (kommunikation_von_line : str ) -> list [Kommunikationsrichtung ]:
@@ -88,13 +97,18 @@ def _parse_kommunikation_von_line(kommunikation_von_line: str) -> list[Kommunika
8897 if not kommunikation_von_line or not kommunikation_von_line .strip ():
8998 return []
9099 result : list [Kommunikationsrichtung ] = []
91- parts = kommunikation_von_line .split (" an " )
100+ parts = _add_whitespace_before_an ( kommunikation_von_line ) .split (" an " )
92101 if len (parts ) != 2 :
93- raise ValueError (f"Invalid kommunikation_von string: { kommunikation_von_line } . Expected format: 'X an Y[/Z]'" )
102+ # maybe this line looks different, more like 'NB an LF, MSB an NB (Gas)'
103+ # then we have to split at the comma first and treat each part like it was a single line. wtf
104+ if "," in kommunikation_von_line :
105+ for subpart in kommunikation_von_line .split ("," ):
106+ result += _parse_kommunikation_von_line (subpart .strip ())
107+ return result
108+ raise ValueError (f"Invalid kommunikation_von string: '{ kommunikation_von_line } '. Expected format: 'X an Y[/Z]'" )
94109 sender_str = parts [0 ]
95110 receiver_str = parts [1 ]
96- alternative_separators = ["," ]
97- for alternative_separator in alternative_separators :
111+ for alternative_separator in _ALTERNATIVE_SEPARATORS :
98112 if alternative_separator in receiver_str :
99113 receiver_str = receiver_str .replace (alternative_separator , _UNIFIED_SEPARATOR )
100114 if alternative_separator in sender_str :
@@ -107,7 +121,7 @@ def _parse_kommunikation_von_line(kommunikation_von_line: str) -> list[Kommunika
107121 return result
108122
109123
110- def parse_kommunikation_von (kommunikation_von : Optional [str ]) -> list [Kommunikationsrichtung ]:
124+ def parse_kommunikation_von (kommunikation_von : Optional [str ]) -> list [Kommunikationsrichtung ] | None :
111125 """Splits the kommunikation_von string into something strongly typed
112126
113127 Args:
@@ -117,7 +131,10 @@ def parse_kommunikation_von(kommunikation_von: Optional[str]) -> list[Kommunikat
117131 Properly typed list of Kommunikationsrichtung objects:
118132 [Kommunikationsrichtung(sender='NB', empfaenger='LF'),
119133 Kommunikationsrichtung(sender='NB', empfaenger='MSB')]
134+ or none in case there are no information given (directly).
120135 """
136+ if kommunikation_von == "Beteiligte aus Ursprungs-nachricht" :
137+ return None
121138 result : list [Kommunikationsrichtung ] = []
122139 for line in (kommunikation_von or "" ).splitlines ():
123140 line = line .strip ()
0 commit comments