From 1abe858368df8a4ee215a346fad90efbd33dcecd Mon Sep 17 00:00:00 2001 From: Pigbibi <20649888+Pigbibi@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:15:36 +0800 Subject: [PATCH] fix: harden Codex startup and completion notices --- src/telegram_codex_bot/bot.py | 37 +++++++++++++++ src/telegram_codex_bot/terminal_parser.py | 17 +++++++ src/telegram_codex_bot/transcript_parser.py | 18 ++++++-- .../test_agent_input_queue.py | 45 +++++++++++++++++++ .../test_existing_window_binding.py | 8 +++- .../test_terminal_parser.py | 18 ++++++++ .../test_transcript_parser.py | 29 ++++++++++++ 7 files changed, 168 insertions(+), 4 deletions(-) diff --git a/src/telegram_codex_bot/bot.py b/src/telegram_codex_bot/bot.py index d9738f5..3683a58 100644 --- a/src/telegram_codex_bot/bot.py +++ b/src/telegram_codex_bot/bot.py @@ -187,6 +187,7 @@ from .session_monitor import NewMessage from .terminal_parser import ( extract_bash_output, + extract_interactive_content, is_codex_input_ready, is_interactive_ui, parse_status_update, @@ -2765,6 +2766,7 @@ async def _rotate_thread_after_usage_limit( thread_id, created_wid, text, + auto_confirm_startup_trust=True, ) if send_ok: await mark_window_working(context.bot, user_id, created_wid, thread_id) @@ -2793,6 +2795,22 @@ async def _rotate_thread_after_usage_limit( return True +async def _maybe_confirm_startup_trust_prompt( + window_id: str, + pane_text: str, +) -> tuple[bool, str]: + """Confirm safe startup-only trust prompts before the first forwarded text.""" + content = extract_interactive_content(pane_text) + if content is None: + return False, "" + if content.name != "DirectoryTrust": + return False, f"Codex is waiting for interactive input: {content.name}" + logger.info("Auto-confirming Codex directory trust prompt in window %s", window_id) + if await tmux_manager.send_control_key(window_id, "Enter"): + return True, "Confirmed Codex directory trust prompt" + return False, "Failed to confirm Codex directory trust prompt" + + async def _send_to_window_when_codex_ready( user_id: int, thread_id: int | None, @@ -2801,6 +2819,7 @@ async def _send_to_window_when_codex_ready( *, timeout: float = 60.0, interval: float = 0.5, + auto_confirm_startup_trust: bool = False, ) -> tuple[bool, str]: """Send text once the new Codex TUI is ready to accept input.""" deadline = asyncio.get_event_loop().time() + timeout @@ -2812,6 +2831,22 @@ async def _send_to_window_when_codex_ready( if capture.missing: return False, "Window not found (may have been closed)" pane_text = capture.text + interactive = extract_interactive_content(pane_text or "") + if interactive is not None: + if auto_confirm_startup_trust: + confirmed, trust_message = await _maybe_confirm_startup_trust_prompt( + window_id, + pane_text or "", + ) + last_message = trust_message + if confirmed: + await asyncio.sleep(interval) + continue + if trust_message.startswith("Failed to confirm"): + return False, trust_message + last_message = f"Codex is waiting for interactive input: {interactive.name}" + await asyncio.sleep(interval) + continue if not is_codex_input_ready(pane_text or ""): status = parse_status_update(pane_text or "") if status: @@ -3072,6 +3107,7 @@ async def _recover_missing_bound_window( thread_id, created_wid, text, + auto_confirm_startup_trust=True, ) if send_ok: await _refresh_session_map_after_first_prompt( @@ -3681,6 +3717,7 @@ async def _create_and_bind_window( pending_thread_id, created_wid, pending_text, + auto_confirm_startup_trust=True, ) if send_ok: await mark_window_working( diff --git a/src/telegram_codex_bot/terminal_parser.py b/src/telegram_codex_bot/terminal_parser.py index 486777c..e3cd72b 100644 --- a/src/telegram_codex_bot/terminal_parser.py +++ b/src/telegram_codex_bot/terminal_parser.py @@ -87,6 +87,20 @@ class UIPattern: re.compile(r"^\s*enter to submit\s*\|\s*esc to cancel", re.IGNORECASE), ), ), + UIPattern( + # Startup directory trust prompt shown before a fresh Codex TUI is ready. + name="DirectoryTrust", + top=( + re.compile( + r"^\s*Do you trust the contents of this directory\?", + re.IGNORECASE, + ), + ), + bottom=( + re.compile(r"^\s*Press enter to continue", re.IGNORECASE), + ), + min_gap=2, + ), UIPattern( # Codex command approval prompt shown by newer CLIs. name="CommandApproval", @@ -292,6 +306,9 @@ def codex_input_text(pane_text: str) -> str | None: if parse_status_update(pane_text): return None + if is_interactive_ui(pane_text): + return None + lines = pane_text.split("\n") tail = lines[-12:] prompt_idx: int | None = None diff --git a/src/telegram_codex_bot/transcript_parser.py b/src/telegram_codex_bot/transcript_parser.py index 4d0e1aa..fe89436 100644 --- a/src/telegram_codex_bot/transcript_parser.py +++ b/src/telegram_codex_bot/transcript_parser.py @@ -360,10 +360,22 @@ def parse_line(cls, line: str) -> dict | None: return None # Codex already emits assistant output as response_item/message. - # task_complete repeats the same final answer, so skip it to avoid - # duplicate Telegram notifications and duplicate history entries. + # When last_agent_message is present, task_complete repeats the + # final answer, so skip it to avoid duplicate Telegram notifications + # and duplicate history entries. A null last_agent_message means + # Codex ended without a final assistant message; emit a small + # completion notice so Telegram topics do not look silently stuck. if payload.get("type") == "task_complete": - return None + last_agent_message = payload.get("last_agent_message") + if isinstance(last_agent_message, str) and last_agent_message.strip(): + return None + return cls._build_message_entry( + role="assistant", + timestamp=timestamp, + content=( + "✅ Codex finished. No additional final message was emitted." + ), + ) if payload.get("type") == "error": if payload.get("codex_error_info") == "usage_limit_exceeded": diff --git a/tests/telegram_codex_bot/test_agent_input_queue.py b/tests/telegram_codex_bot/test_agent_input_queue.py index 033813d..9e5bd4d 100644 --- a/tests/telegram_codex_bot/test_agent_input_queue.py +++ b/tests/telegram_codex_bot/test_agent_input_queue.py @@ -417,6 +417,51 @@ async def test_send_to_window_when_ready_sends_with_visible_idle_prompt(monkeypa send_message.assert_awaited_once_with(12345, 42, "@1", "queued prompt") +@pytest.mark.asyncio +async def test_send_to_window_when_ready_confirms_startup_directory_trust( + monkeypatch, +): + trust_prompt = SimpleNamespace( + text=( + " Do you trust the contents of this directory?\n" + "\n" + "› 1. Yes, continue\n" + " 2. No, quit\n" + "\n" + " Press enter to continue\n" + ), + missing=False, + ) + ready_prompt = SimpleNamespace( + text="previous output\n\n› \n\n gpt-5.5 · ~/repo", + missing=False, + ) + send_control = AsyncMock(return_value=True) + send_message = AsyncMock(return_value=(True, "Sent")) + + monkeypatch.setattr( + bot_module, + "capture_agent_output", + AsyncMock(side_effect=[trust_prompt, ready_prompt]), + ) + monkeypatch.setattr(bot_module.tmux_manager, "send_control_key", send_control) + monkeypatch.setattr(bot_module, "_send_message_to_agent", send_message) + monkeypatch.setattr(bot_module.asyncio, "sleep", AsyncMock()) + + ok, message = await bot_module._send_to_window_when_codex_ready( + 12345, + 42, + "@1", + "first prompt", + timeout=1.0, + auto_confirm_startup_trust=True, + ) + + assert (ok, message) == (True, "Sent") + send_control.assert_awaited_once_with("@1", "Enter") + send_message.assert_awaited_once_with(12345, 42, "@1", "first prompt") + + @pytest.mark.asyncio async def test_handle_non_codex_bound_window_recovers_resumable_shell(monkeypatch): update_message = MagicMock() diff --git a/tests/telegram_codex_bot/test_existing_window_binding.py b/tests/telegram_codex_bot/test_existing_window_binding.py index dbc1acb..025d302 100644 --- a/tests/telegram_codex_bot/test_existing_window_binding.py +++ b/tests/telegram_codex_bot/test_existing_window_binding.py @@ -1040,7 +1040,13 @@ async def test_bound_topic_recovers_missing_window_and_forwards_text(self): assert mock_sm.user_window_offsets == {12345: {"@3": 99}} assert new_state.session_id == "session-1" assert new_state.cwd == "/tmp/project" - send_when_ready.assert_awaited_once_with(12345, 42, "@3", "continue") + send_when_ready.assert_awaited_once_with( + 12345, + 42, + "@3", + "continue", + auto_confirm_startup_trust=True, + ) refresh_session_map.assert_awaited_once_with( "@3", text="continue", diff --git a/tests/telegram_codex_bot/test_terminal_parser.py b/tests/telegram_codex_bot/test_terminal_parser.py index a221d13..39818f9 100644 --- a/tests/telegram_codex_bot/test_terminal_parser.py +++ b/tests/telegram_codex_bot/test_terminal_parser.py @@ -353,6 +353,24 @@ def test_hook_trust_prompt(self): assert "needs review" in result.content assert "Press t to trust all" in result.content + def test_directory_trust_prompt_is_not_input_ready(self): + pane = ( + " Do you trust the contents of this directory?\n" + "\n" + "› 1. Yes, continue\n" + " 2. No, quit\n" + "\n" + " Press enter to continue\n" + ) + + result = extract_interactive_content(pane) + + assert result is not None + assert result.name == "DirectoryTrust" + assert is_interactive_ui(pane) is True + assert codex_input_text(pane) is None + assert is_codex_input_ready(pane) is False + @pytest.mark.parametrize( "pane", [ diff --git a/tests/telegram_codex_bot/test_transcript_parser.py b/tests/telegram_codex_bot/test_transcript_parser.py index 833a2ec..b2af773 100644 --- a/tests/telegram_codex_bot/test_transcript_parser.py +++ b/tests/telegram_codex_bot/test_transcript_parser.py @@ -64,6 +64,35 @@ def test_usage_limit_error_is_left_for_monitor_handling(self): assert TranscriptParser.parse_line(json.dumps(event)) is None + def test_task_complete_without_final_message_emits_completion_notice(self): + event = { + "type": "event_msg", + "timestamp": "2026-06-09T07:57:37.121Z", + "payload": { + "type": "task_complete", + "last_agent_message": None, + }, + } + + parsed = TranscriptParser.parse_line(json.dumps(event)) + + assert parsed is not None + assert parsed["type"] == "assistant" + assert parsed["text"] == ( + "✅ Codex finished. No additional final message was emitted." + ) + + def test_task_complete_with_final_message_is_skipped_to_avoid_duplicate(self): + event = { + "type": "event_msg", + "payload": { + "type": "task_complete", + "last_agent_message": "Already emitted as response_item/message.", + }, + } + + assert TranscriptParser.parse_line(json.dumps(event)) is None + def test_response_item_encrypted_reasoning_renders_thinking_placeholder( self, monkeypatch ):