5858ACP_AGENT_THOUGHT_CHUNK = "agent_thought_chunk"
5959ACP_CANCEL_METHODS = ("session/cancel" , "session/stop" , "session/end" )
6060ACP_GRACEFUL_CANCEL_TIMEOUT_SECONDS = 12.0
61+ # Increment when the canonicalization used for ACP history prefix hashes changes.
62+ HISTORY_PREFIX_HASH_VERSION = 2
63+
64+
65+ def _canonical_chat_message_for_history_hash (message : ChatMessage ) -> dict [str , Any ]:
66+ """Return stable identity fields for divergence detection.
67+
68+ Uses :meth:`ChatMessage.to_dict` so fields such as ``metadata`` that are not
69+ part of the visible transcript do not spuriously invalidate the prefix hash.
70+ """
71+
72+ return message .to_dict ()
73+
74+
75+ def _hash_chat_messages_prefix_stable (
76+ messages : Sequence [ChatMessage ],
77+ end_exclusive : int ,
78+ ) -> str :
79+ """SHA-256 hex digest of the first ``end_exclusive`` messages (conversation prefix)."""
80+
81+ if end_exclusive <= 0 :
82+ return hashlib .sha256 (b"" ).hexdigest ()
83+ slice_msgs = messages [:end_exclusive ]
84+ payload = [_canonical_chat_message_for_history_hash (m ) for m in slice_msgs ]
85+ canonical = json .dumps (
86+ {"m" : payload , "v" : HISTORY_PREFIX_HASH_VERSION },
87+ sort_keys = True ,
88+ separators = ("," , ":" ),
89+ )
90+ return hashlib .sha256 (canonical .encode ("utf-8" )).hexdigest ()
6191
6292
6393class _RuntimeCancellable :
@@ -175,6 +205,15 @@ def _create_runtime(
175205
176206 @staticmethod
177207 def _resolve_client_session_id (request : ConnectorChatCompletionsRequest ) -> str :
208+ """Resolve the logical client session used to key ACP subprocess pools.
209+
210+ When neither ``ConnectorRequestContext.session_id`` nor
211+ ``request.request.session_id`` is set, callers share the pool key
212+ ``"default"`` (one ACP runtime per ``(project_dir, model)`` for all
213+ such traffic). Upstream layers should set a stable session id when
214+ isolation between clients or tabs is required.
215+ """
216+
178217 sid : str | None = None
179218 if request .context is not None and request .context .session_id :
180219 sid = request .context .session_id
@@ -190,12 +229,7 @@ def _resolve_client_session_id(request: ConnectorChatCompletionsRequest) -> str:
190229 def _hash_messages_prefix (
191230 messages : Sequence [ChatMessage ], end_exclusive : int
192231 ) -> str :
193- if end_exclusive <= 0 :
194- return hashlib .sha256 (b"" ).hexdigest ()
195- slice_msgs = messages [:end_exclusive ]
196- payload = [m .model_dump (mode = "json" , exclude_none = True ) for m in slice_msgs ]
197- canonical = json .dumps (payload , sort_keys = True , separators = ("," , ":" ))
198- return hashlib .sha256 (canonical .encode ("utf-8" )).hexdigest ()
232+ return _hash_chat_messages_prefix_stable (messages , end_exclusive )
199233
200234 async def _acquire_runtime (
201235 self , request : ConnectorChatCompletionsRequest
@@ -218,8 +252,7 @@ async def _acquire_runtime(
218252 )
219253 self ._runtimes [runtime_key ] = runtime
220254
221- await self ._reap_idle_runtime (runtime )
222- return runtime
255+ return await self ._reap_idle_runtime (runtime_key , runtime )
223256
224257 def _resolve_project_dir_override (
225258 self , request : ConnectorChatCompletionsRequest
@@ -259,19 +292,46 @@ def _resolve_project_dir_for_request(
259292
260293 return self ._default_project_dir
261294
262- async def _reap_idle_runtime (self , runtime : ACPProcessRuntime ) -> None :
295+ async def _reap_idle_runtime (
296+ self ,
297+ runtime_key : tuple [str , str , str ],
298+ runtime : ACPProcessRuntime ,
299+ ) -> ACPProcessRuntime :
300+ """Drop idle subprocesses and swap in a fresh :class:`ACPProcessRuntime` slot.
301+
302+ Replacing the pool entry (instead of only clearing ``runtime.process``)
303+ avoids unbounded growth of dead :class:`ACPProcessRuntime` objects while
304+ ensuring concurrent acquirers always resolve to the canonical instance
305+ currently registered for ``runtime_key``.
306+ """
307+
263308 if self ._idle_timeout <= 0 :
264- return
309+ return runtime
265310 if runtime .request_lock is None or runtime .request_lock .locked ():
266- return
311+ return runtime
267312 if runtime .process is None :
268- return
313+ return runtime
269314 if runtime .last_activity <= 0 :
270- return
315+ return runtime
271316 if (time .monotonic () - runtime .last_activity ) < self ._idle_timeout :
272- return
317+ return runtime
318+
273319 await self ._kill_runtime (runtime )
274320
321+ async with self ._runtime_pool_lock :
322+ current = self ._runtimes .get (runtime_key )
323+ if current is runtime :
324+ replacement = self ._create_runtime (
325+ runtime .project_dir ,
326+ runtime .model ,
327+ runtime .client_session_id ,
328+ )
329+ self ._runtimes [runtime_key ] = replacement
330+ return replacement
331+ if current is not None :
332+ return current
333+ return runtime
334+
275335 async def _spawn_process (self , runtime : ACPProcessRuntime ) -> None :
276336 assert runtime .process_lock is not None
277337 async with runtime .process_lock :
@@ -800,6 +860,13 @@ async def _prepare_prompt_request_locked(
800860 runtime : ACPProcessRuntime ,
801861 request : ConnectorChatCompletionsRequest ,
802862 ) -> tuple [int , str ]:
863+ """Build ``session/prompt`` text and JSON-RPC id under ``runtime.request_lock``.
864+
865+ History is tracked with :class:`HistoryState` so we can send a compact
866+ tail transcript on append-only turns, resend the full transcript after
867+ detected divergence, or send only the last user line on idempotent retries.
868+ """
869+
803870 await self ._spawn_process (runtime )
804871 await self ._initialize_runtime (runtime )
805872
@@ -811,6 +878,7 @@ async def _prepare_prompt_request_locked(
811878 new_history_state : HistoryState
812879 user_message : str
813880
881+ # First prompt for this subprocess: full Markdown transcript + state seed.
814882 if state is None :
815883 user_message = ACPTranscriptSerializer .serialize (messages )
816884 new_history_state = HistoryState (
@@ -825,6 +893,7 @@ async def _prepare_prompt_request_locked(
825893 or self ._hash_messages_prefix (messages , n ) != prefix_hash
826894 )
827895
896+ # Prefix edit, branch switch, or truncated history vs. what ACP saw.
828897 if diverged :
829898 if logger .isEnabledFor (logging .INFO ):
830899 logger .info (
@@ -842,9 +911,11 @@ async def _prepare_prompt_request_locked(
842911 message_count = len (messages ),
843912 prefix_hash = self ._hash_messages_prefix (messages , len (messages )),
844913 )
914+ # Same message list as last successful prompt (e.g. client retry).
845915 elif len (messages ) == n :
846916 user_message = self ._extract_user_message_as_string (messages )
847917 new_history_state = state
918+ # Append-only: agent already saw messages[:n]; ship incremental context.
848919 else :
849920 user_message = ACPTranscriptSerializer .serialize_tail (messages , n )
850921 if not user_message .strip ():
@@ -1035,6 +1106,8 @@ async def chat_completions( # type: ignore[override]
10351106 if runtime .request_lock is None :
10361107 raise BackendError (message = "ACP runtime is missing request lock" )
10371108
1109+ # Streaming holds the per-runtime lock for the full SSE response so idle reap
1110+ # cannot swap the pool entry until the stream completes (see ``_reap_idle_runtime``).
10381111 if bool (getattr (request .request , "stream" , False )):
10391112 await runtime .request_lock .acquire ()
10401113 try :
0 commit comments