Skip to content

Commit d07a720

Browse files
authored
fix: prevent infrastructure logs from flooding webhook JSONL files (#1035) (#1037)
## Summary - Only attach `JsonLogHandler` to the main webhook logger, not infrastructure loggers (mcp_server, logs_server) - Filter infrastructure noise entries in log viewer streaming - Add tests for logging separation and noise filtering ## Problem Infrastructure loggers (MCP server, log viewer) were writing to `webhooks_*.json` via `JsonLogHandler`, flooding the file with context-less entries. When querying via MCP, these noise entries were the most recent (generated by the MCP call itself), drowning out actual webhook data and returning 0 useful results. ## Changes | File | Change | |---|---| | `webhook_server/utils/helpers.py` | Only attach `JsonLogHandler` when no explicit `log_file_name` is provided | | `webhook_server/web/log_viewer.py` | Add `_is_infrastructure_noise()` filter for streaming, skip entries from infra loggers with no webhook context | | `webhook_server/tests/test_logging_separation.py` | **NEW** — 9 tests for handler attachment and noise filtering | ## Test plan - [x] All 1425 tests pass, 90.37% coverage - [ ] Deploy and verify MCP queries return webhook data - [ ] Verify `mcpl call github-webhook-logs-myakove get_log_entries '{"limit": 5}'` returns entries Closes #1035 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit ## Release Notes * **New Features** * Webhook log queries now automatically exclude infrastructure and system noise entries that are not associated with a specific webhook, providing cleaner and more focused results. * **Tests** * Added comprehensive test coverage for logging separation and infrastructure noise filtering functionality. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent ffb4c8a commit d07a720

3 files changed

Lines changed: 151 additions & 9 deletions

File tree

webhook_server/tests/test_logging_separation.py

Lines changed: 106 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1+
import datetime
12
from unittest.mock import AsyncMock, MagicMock, patch
23

34
import pytest
45

56
import webhook_server.app
67
from webhook_server.app import get_log_viewer_controller, lifespan
78
from webhook_server.libs.config import Config
8-
from webhook_server.utils.helpers import get_log_file_path
9+
from webhook_server.libs.log_parser import LogEntry
10+
from webhook_server.utils.helpers import get_log_file_path, get_logger_with_params
11+
from webhook_server.utils.json_log_handler import JsonLogHandler
12+
from webhook_server.web.log_viewer import LogViewerController
913

1014

1115
def test_get_log_file_path_absolute():
@@ -18,8 +22,8 @@ def test_get_log_file_path_absolute():
1822
def test_get_log_file_path_relative():
1923
config = MagicMock(spec=Config)
2024
config.data_dir = "/tmp/data"
21-
# Mock os.makedirs to avoid filesystem side effects
22-
with patch("os.makedirs") as mock_makedirs:
25+
# Mock os.path.isdir to force makedirs path, and os.makedirs to avoid filesystem side effects
26+
with patch("os.path.isdir", return_value=False), patch("os.makedirs") as mock_makedirs:
2327
path = get_log_file_path(config, "server.log")
2428
assert path == "/tmp/data/logs/server.log"
2529
mock_makedirs.assert_called_once_with("/tmp/data/logs", exist_ok=True)
@@ -114,3 +118,102 @@ def test_log_viewer_controller_logging_separation():
114118
# Verify
115119
mock_get_logger_params.assert_called_with(log_file_name="logs_server.log")
116120
MockController.assert_called_with(logger=mock_logger)
121+
122+
123+
class TestJsonLogHandlerNotAttachedForInfrastructureLoggers:
124+
"""Verify that JsonLogHandler is NOT attached when log_file_name is explicit.
125+
126+
Infrastructure loggers (MCP server, log viewer) use explicit log_file_name
127+
parameters. They must not write to webhooks_*.json to avoid polluting
128+
webhook log queries with noise entries.
129+
"""
130+
131+
def test_no_json_handler_when_log_file_name_explicit(self) -> None:
132+
"""get_logger_with_params with explicit log_file_name must skip JsonLogHandler."""
133+
with (
134+
patch("webhook_server.utils.helpers.Config") as MockConfig,
135+
patch("webhook_server.utils.helpers.get_logger") as mock_get_logger,
136+
):
137+
mock_config_instance = MockConfig.return_value
138+
mock_config_instance.data_dir = "/tmp/data"
139+
mock_config_instance.get_value.side_effect = lambda value, return_on_none=None: return_on_none
140+
141+
mock_logger = MagicMock()
142+
mock_logger.handlers = []
143+
mock_get_logger.return_value = mock_logger
144+
145+
get_logger_with_params(log_file_name="mcp_server.log")
146+
147+
# JsonLogHandler must NOT be added
148+
mock_logger.addHandler.assert_not_called()
149+
150+
def test_json_handler_attached_when_log_file_name_default(self) -> None:
151+
"""get_logger_with_params without log_file_name must attach JsonLogHandler."""
152+
with (
153+
patch("webhook_server.utils.helpers.Config") as MockConfig,
154+
patch("webhook_server.utils.helpers.get_logger") as mock_get_logger,
155+
patch("os.path.isdir", return_value=False),
156+
patch("os.makedirs"),
157+
):
158+
mock_config_instance = MockConfig.return_value
159+
mock_config_instance.data_dir = "/tmp/data"
160+
# Return a log file config so a file handler exists
161+
mock_config_instance.get_value.side_effect = lambda value, return_on_none=None: (
162+
"webhook_server.log" if value == "log-file" else return_on_none
163+
)
164+
165+
mock_logger = MagicMock()
166+
mock_logger.handlers = [] # No existing handlers
167+
mock_get_logger.return_value = mock_logger
168+
169+
get_logger_with_params() # No explicit log_file_name
170+
171+
# JsonLogHandler MUST be added
172+
mock_logger.addHandler.assert_called_once()
173+
174+
added_handler = mock_logger.addHandler.call_args[0][0]
175+
assert isinstance(added_handler, JsonLogHandler)
176+
177+
178+
class TestIsInfrastructureNoise:
179+
"""Verify _is_infrastructure_noise correctly identifies noise entries."""
180+
181+
@staticmethod
182+
def _make_entry(logger_name: str, hook_id: str | None = None) -> LogEntry:
183+
return LogEntry(
184+
timestamp=datetime.datetime.now(tz=datetime.UTC),
185+
level="INFO",
186+
logger_name=logger_name,
187+
message="test message",
188+
hook_id=hook_id,
189+
)
190+
191+
def test_mcp_logger_without_hook_id_is_noise(self) -> None:
192+
entry = self._make_entry("mcp.server.streamable_http", hook_id=None)
193+
assert LogViewerController._is_infrastructure_noise(entry) is True
194+
195+
def test_logs_server_logger_without_hook_id_is_noise(self) -> None:
196+
entry = self._make_entry("logs_server.log", hook_id=None)
197+
assert LogViewerController._is_infrastructure_noise(entry) is True
198+
199+
def test_mcp_server_log_without_hook_id_is_noise(self) -> None:
200+
entry = self._make_entry("mcp_server.log", hook_id=None)
201+
assert LogViewerController._is_infrastructure_noise(entry) is True
202+
203+
def test_log_parser_without_hook_id_is_noise(self) -> None:
204+
entry = self._make_entry("log_parser", hook_id=None)
205+
assert LogViewerController._is_infrastructure_noise(entry) is True
206+
207+
def test_infra_logger_with_hook_id_is_not_noise(self) -> None:
208+
"""Infrastructure logger entry WITH hook_id should be preserved."""
209+
entry = self._make_entry("mcp.server.streamable_http", hook_id="abc-123")
210+
assert LogViewerController._is_infrastructure_noise(entry) is False
211+
212+
def test_webhook_logger_without_hook_id_is_not_noise(self) -> None:
213+
"""Non-infrastructure logger without hook_id is kept (could be startup log)."""
214+
entry = self._make_entry("GithubWebhook", hook_id=None)
215+
assert LogViewerController._is_infrastructure_noise(entry) is False
216+
217+
def test_webhook_logger_with_hook_id_is_not_noise(self) -> None:
218+
entry = self._make_entry("GithubWebhook", hook_id="abc-123")
219+
assert LogViewerController._is_infrastructure_noise(entry) is False

webhook_server/utils/helpers.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,16 @@ def get_logger_with_params(
104104
)
105105

106106
# Attach JsonLogHandler for writing log records to the webhook JSONL file.
107-
# Only attach when a log file path is configured (skip console-only loggers)
108-
# and only once per logger instance to avoid duplicate handlers.
107+
# Only attach when:
108+
# - A log file path is configured (skip console-only loggers)
109+
# - The logger is for the main webhook log (log_file_name not explicitly set)
110+
# Infrastructure loggers (mcp_server.log, logs_server.log) must NOT write
111+
# to webhooks_*.json because their entries lack webhook context (hook_id,
112+
# event_type, etc.) and pollute the webhook log with noise entries.
113+
# - Only once per logger instance to avoid duplicate handlers.
109114
# Uses _config.data_dir/logs (same directory as StructuredLogWriter) instead
110115
# of deriving from the text log file path, which may differ for absolute paths.
111-
if log_file_path_resolved:
116+
if log_file_path_resolved and not log_file_name:
112117
log_dir = os.path.join(_config.data_dir, "logs")
113118
with _JSON_HANDLER_LOCK:
114119
if not any(isinstance(h, JsonLogHandler) and h.log_dir == Path(log_dir) for h in logger.handlers):

webhook_server/web/log_viewer.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,17 @@ class LogViewerController:
2929
# 60 seconds provides a reasonable maximum window for log correlation
3030
_DEFAULT_STEP_DURATION_MS = 60000
3131

32+
# Logger names from infrastructure components (MCP server, log viewer itself) that
33+
# should be excluded from webhook log queries. These loggers produce high-frequency
34+
# noise entries without webhook context (hook_id, event_type are None) and would
35+
# drown out actual webhook processing entries in unfiltered queries.
36+
_INFRASTRUCTURE_LOGGERS: frozenset[str] = frozenset({
37+
"mcp.server.streamable_http",
38+
"logs_server.log",
39+
"log_parser",
40+
"mcp_server.log",
41+
})
42+
3243
# Workflow stage patterns for PR flow analysis
3344
# These patterns match log messages to identify workflow stages and can be updated
3445
# when log message formats change without modifying the analysis logic
@@ -288,6 +299,26 @@ def _entry_matches_filters(
288299

289300
return True
290301

302+
@staticmethod
303+
def _is_infrastructure_noise(entry: LogEntry) -> bool:
304+
"""Check if a log entry is infrastructure noise that should be excluded.
305+
306+
Infrastructure loggers (MCP server, log viewer) produce high-frequency
307+
entries without webhook context. These are filtered out to prevent them
308+
from drowning actual webhook processing entries in unfiltered queries.
309+
310+
Only excludes entries that have NO webhook context (hook_id is None),
311+
preserving any infrastructure log that happens to correlate with a webhook.
312+
313+
Args:
314+
entry: LogEntry to check
315+
316+
Returns:
317+
True if the entry is infrastructure noise and should be excluded
318+
319+
"""
320+
return entry.logger_name in LogViewerController._INFRASTRUCTURE_LOGGERS and entry.hook_id is None
321+
291322
async def export_logs(
292323
self,
293324
format_type: str,
@@ -1092,15 +1123,17 @@ def sort_key(f: Path) -> tuple[int, float]:
10921123
if log_file.suffix == ".json":
10931124
# JSONL files: one compact JSON object per line
10941125
# Process both "log_entry" and "webhook_summary" entries
1126+
# Skip infrastructure logger entries that lack webhook context
10951127
async for line in f:
10961128
entry = self.log_parser.parse_json_log_entry(line)
1097-
if entry:
1129+
if entry and not LogViewerController._is_infrastructure_noise(entry):
10981130
buffer.append(entry)
10991131
else:
11001132
# Text log files: parse line by line
1133+
# Skip infrastructure logger entries that lack webhook context
11011134
async for line in f:
11021135
entry = self.log_parser.parse_log_entry(line)
1103-
if entry:
1136+
if entry and not LogViewerController._is_infrastructure_noise(entry):
11041137
buffer.append(entry)
11051138

11061139
for entry in reversed(buffer):
@@ -1162,9 +1195,10 @@ async def _stream_text_log_entries(self, max_files: int = 25, max_entries: int =
11621195
buffer: deque[LogEntry] = deque(maxlen=remaining_capacity)
11631196

11641197
async with aiofiles.open(log_file, encoding="utf-8") as f:
1198+
# Skip infrastructure logger entries that lack webhook context
11651199
async for line in f:
11661200
entry = self.log_parser.parse_log_entry(line)
1167-
if entry:
1201+
if entry and not LogViewerController._is_infrastructure_noise(entry):
11681202
buffer.append(entry)
11691203

11701204
for entry in reversed(buffer):

0 commit comments

Comments
 (0)