Skip to content

Commit ce946b7

Browse files
authored
Merge pull request #376 from networktocode/372-fix_openain_iCal
Refactor provider to use OpenAI parser as a fallback and enhance test…
2 parents 759cad1 + f5707cd commit ce946b7

3 files changed

Lines changed: 108 additions & 22 deletions

File tree

changes/372.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Refactored OpenAI parser to be used as a fallback and fixed subject injection to skip text/calendar parts.

circuit_maintenance_parser/provider.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,6 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]:
130130
logger.debug("Skipping notification %s due filtering policy for %s.", data, self.__class__.__name__)
131131
return []
132132

133-
if os.getenv("PARSER_OPENAI_API_KEY"):
134-
self._processors.append(CombinedProcessor(data_parsers=[EmailDateParser, OpenAIParser]))
135-
136-
# Add subject to all html or text/* data_parts if not already present.
137-
self.add_subject_to_text(data)
138-
139133
for processor in self._processors:
140134
try:
141135
return processor.process(data, self.get_extended_data())
@@ -150,6 +144,22 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]:
150144
related_exceptions.append(exc)
151145
continue
152146

147+
# Use OpenAI parser as a last resort if all other processors failed.
148+
if os.getenv("PARSER_OPENAI_API_KEY"):
149+
self.add_subject_to_text(data)
150+
openai_processor = CombinedProcessor(data_parsers=[EmailDateParser, OpenAIParser])
151+
try:
152+
return openai_processor.process(data, self.get_extended_data())
153+
except ProcessorError as exc:
154+
process_error_message = (
155+
f"- Processor {openai_processor.__class__.__name__} from {provider_name} failed due to: %s\n"
156+
)
157+
logger.debug(process_error_message, traceback.format_exc())
158+
159+
related_exc = rgetattr(exc, "__cause__")
160+
error_message += process_error_message % related_exc
161+
related_exceptions.append(exc)
162+
153163
raise ProviderError(
154164
(f"Failed creating Maintenance notification for {provider_name}.\nDetails:\n{error_message}"),
155165
related_exceptions=related_exceptions,
@@ -165,7 +175,7 @@ def add_subject_to_text(self, data: NotificationData):
165175
if subject:
166176
new_data_parts = []
167177
for part in data.data_parts:
168-
if part.type.startswith("text/") or part.type.startswith("html"):
178+
if (part.type.startswith("text/") or part.type.startswith("html")) and part.type != "text/calendar":
169179
content_str = part.content.decode(errors="ignore")
170180
if subject not in content_str:
171181
# Append subject and update content

tests/unit/test_providers.py

Lines changed: 90 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@
77

88
from circuit_maintenance_parser.data import NotificationData
99
from circuit_maintenance_parser.errors import ProcessorError, ProviderError
10-
from circuit_maintenance_parser.parser import EmailDateParser, Parser
11-
from circuit_maintenance_parser.parsers.openai import OpenAIParser
12-
from circuit_maintenance_parser.processor import CombinedProcessor, SimpleProcessor
10+
from circuit_maintenance_parser.parser import Parser
11+
from circuit_maintenance_parser.processor import SimpleProcessor
1312
from circuit_maintenance_parser.provider import AquaComms, GenericProvider
1413

1514
# pylint: disable=use-implicit-booleaness-not-comparison
@@ -116,21 +115,73 @@ class ProviderWithIncludeFilter(GenericProvider):
116115
"provider_class",
117116
[GenericProvider, AquaComms],
118117
)
119-
def test_provider_gets_mlparser(provider_class):
120-
"""Test to check the any provider gets a default ML parser when ENV is activated."""
121-
os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key"
122-
data = NotificationData.init_from_raw("text/plain", b"fake data")
123-
data.add_data_part("text/html", b"other data")
118+
def test_provider_falls_back_to_openai(provider_class):
119+
"""Test that OpenAI parser is used as last resort when all other processors fail."""
120+
with patch.dict(os.environ, {"PARSER_OPENAI_API_KEY": "some_api_key"}):
121+
data = NotificationData.init_from_raw("text/plain", b"fake data")
122+
data.add_data_part("text/html", b"other data")
124123

125-
provider = provider_class()
124+
provider = provider_class()
125+
original_processor_count = len(provider._processors) # pylint: disable=protected-access
126126

127-
with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor:
128-
mock_processor.return_value = [{"a": "b"}]
129-
provider.get_maintenances(data)
127+
with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor:
128+
# All native processors fail, then OpenAI processor succeeds
129+
mock_processor.side_effect = [ProcessorError] * original_processor_count + [[{"a": "b"}]]
130+
provider.get_maintenances(data)
131+
# Native processors + 1 OpenAI call
132+
assert mock_processor.call_count == original_processor_count + 1
130133

131-
assert provider._processors[-1] == CombinedProcessor( # pylint: disable=protected-access
132-
data_parsers=[EmailDateParser, OpenAIParser]
133-
)
134+
# OpenAI processor should NOT be appended to the provider's processor list
135+
assert len(provider._processors) == original_processor_count # pylint: disable=protected-access
136+
137+
138+
def test_provider_does_not_use_openai_when_native_succeeds():
139+
"""Test that OpenAI parser is not invoked when a native processor succeeds."""
140+
with patch.dict(os.environ, {"PARSER_OPENAI_API_KEY": "some_api_key"}):
141+
data = NotificationData.init_from_raw("text/plain", b"fake data")
142+
143+
provider = GenericProvider()
144+
145+
with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor:
146+
mock_processor.return_value = [{"a": "b"}]
147+
provider.get_maintenances(data)
148+
# Only the native processor should be called
149+
assert mock_processor.call_count == 1
150+
151+
152+
def test_provider_data_not_mutated_when_native_succeeds():
153+
"""Test that add_subject_to_text is not called when native processors succeed."""
154+
with patch.dict(os.environ, {"PARSER_OPENAI_API_KEY": "some_api_key"}):
155+
data = NotificationData.init_from_raw("text/plain", b"fake data")
156+
data.add_data_part("email-header-subject", b"Test Subject")
157+
158+
original_content = data.data_parts[0].content
159+
provider = GenericProvider()
160+
161+
with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor:
162+
mock_processor.return_value = [{"a": "b"}]
163+
provider.get_maintenances(data)
164+
165+
# Data should not have been mutated since native processor succeeded
166+
assert data.data_parts[0].content == original_content
167+
168+
169+
def test_provider_no_repeated_append_on_multiple_calls():
170+
"""Test that calling get_maintenances multiple times doesn't grow the processor list."""
171+
with patch.dict(os.environ, {"PARSER_OPENAI_API_KEY": "some_api_key"}):
172+
data = NotificationData.init_from_raw("text/plain", b"fake data")
173+
174+
provider = GenericProvider()
175+
original_processor_count = len(provider._processors) # pylint: disable=protected-access
176+
177+
with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor:
178+
mock_processor.return_value = [{"a": "b"}]
179+
provider.get_maintenances(data)
180+
provider.get_maintenances(data)
181+
provider.get_maintenances(data)
182+
183+
# Processor list should never grow
184+
assert len(provider._processors) == original_processor_count # pylint: disable=protected-access
134185

135186

136187
def test_add_subject_to_text_appends_subject_to_text_parts():
@@ -230,3 +281,27 @@ def test_add_subject_to_text_handles_decode_errors():
230281

231282
# This should not raise an exception due to errors="ignore" in decode()
232283
provider.add_subject_to_text(data)
284+
285+
286+
def test_add_subject_to_text_skips_text_calendar():
287+
"""Test that add_subject_to_text does not modify text/calendar parts."""
288+
provider = GenericProvider()
289+
290+
data = NotificationData()
291+
data.add_data_part("email-header-subject", b"Planned Work Notification")
292+
data.add_data_part(
293+
"text/calendar",
294+
b"BEGIN:VCALENDAR\r\nVERSION:2.0\r\nBEGIN:VEVENT\r\nEND:VEVENT\r\nEND:VCALENDAR",
295+
)
296+
data.add_data_part("text/plain", b"Some plain text content")
297+
298+
original_calendar_content = data.data_parts[1].content
299+
300+
provider.add_subject_to_text(data)
301+
302+
# text/calendar part should remain unchanged
303+
assert data.data_parts[1].content == original_calendar_content
304+
assert b"Planned Work Notification" not in data.data_parts[1].content
305+
306+
# text/plain part should have subject appended
307+
assert b"Planned Work Notification" in data.data_parts[2].content

0 commit comments

Comments
 (0)