@@ -24,6 +24,7 @@ public class EventConsumer {
2424 private volatile boolean agentCompleted = false ;
2525 private volatile int pollTimeoutsAfterAgentCompleted = 0 ;
2626 private volatile @ Nullable TaskState lastSeenTaskState = null ;
27+ private volatile int pollTimeoutsWhileAwaitingFinal = 0 ;
2728
2829 private static final String ERROR_MSG = "Agent did not return any response" ;
2930 private static final int NO_WAIT = -1 ;
@@ -32,6 +33,10 @@ public class EventConsumer {
3233 // Grace period allows Kafka replication to deliver late-arriving events
3334 // 3 timeouts * 500ms = 1500ms grace period for replication delays
3435 private static final int MAX_POLL_TIMEOUTS_AFTER_AGENT_COMPLETED = 3 ;
36+ // Maximum time to wait for final event when awaitingFinalEvent is set
37+ // If event doesn't arrive after this many timeouts, assume it won't arrive
38+ // 6 timeouts * 500ms = 3000ms maximum wait for final event arrival
39+ private static final int MAX_POLL_TIMEOUTS_AWAITING_FINAL = 6 ;
3540
3641 public EventConsumer (EventQueue queue ) {
3742 this .queue = queue ;
@@ -82,8 +87,9 @@ public Flow.Publisher<EventQueueItem> consumeAll() {
8287 item = queue .dequeueEventItem (QUEUE_WAIT_MILLISECONDS );
8388 if (item == null ) {
8489 int queueSize = queue .size ();
85- LOGGER .debug ("EventConsumer poll timeout (null item), agentCompleted={}, queue.size()={}, timeoutCount={}" ,
86- agentCompleted , queueSize , pollTimeoutsAfterAgentCompleted );
90+ boolean awaitingFinal = queue .isAwaitingFinalEvent ();
91+ LOGGER .debug ("EventConsumer poll timeout (null item), agentCompleted={}, queue.size()={}, awaitingFinalEvent={}, timeoutCount={}, awaitingTimeoutCount={}" ,
92+ agentCompleted , queueSize , awaitingFinal , pollTimeoutsAfterAgentCompleted , pollTimeoutsWhileAwaitingFinal );
8793 // If agent completed, a poll timeout means no more events are coming
8894 // MainEventBusProcessor has 500ms to distribute events from MainEventBus
8995 // If we timeout with agentCompleted=true, all events have been distributed
@@ -94,8 +100,31 @@ public Flow.Publisher<EventQueueItem> consumeAll() {
94100 // CRITICAL: Do NOT close if task is in interrupted state (INPUT_REQUIRED, AUTH_REQUIRED)
95101 // Per A2A spec, interrupted states are NOT terminal - the stream must stay open
96102 // for future state updates even after agent completes (agent will be re-invoked later).
103+ //
104+ // CRITICAL: Don't start timeout counter if we're awaiting a final event.
105+ // The awaitingFinalEvent flag is set when MainQueue enqueues a final event
106+ // but it hasn't been distributed to this ChildQueue yet.
107+ // HOWEVER: If we've been waiting too long for the final event (>3s), give up and
108+ // proceed with normal timeout logic to prevent infinite waiting.
97109 boolean isInterruptedState = lastSeenTaskState != null && lastSeenTaskState .isInterrupted ();
98- if (agentCompleted && queueSize == 0 && !isInterruptedState ) {
110+
111+ // Track how long we've been waiting for the final event
112+ if (awaitingFinal && queueSize == 0 ) {
113+ pollTimeoutsWhileAwaitingFinal ++;
114+ if (pollTimeoutsWhileAwaitingFinal >= MAX_POLL_TIMEOUTS_AWAITING_FINAL ) {
115+ LOGGER .debug ("Waited {} timeouts for final event but it hasn't arrived - proceeding with normal timeout logic (queue={})" ,
116+ pollTimeoutsWhileAwaitingFinal , System .identityHashCode (queue ));
117+ // Clear the flag on the queue itself, not just the local variable
118+ if (queue instanceof EventQueue .ChildQueue ) {
119+ ((EventQueue .ChildQueue ) queue ).clearAwaitingFinalEvent ();
120+ }
121+ awaitingFinal = false ; // Also update local variable for this iteration
122+ }
123+ } else {
124+ pollTimeoutsWhileAwaitingFinal = 0 ; // Reset when event arrives or queue not awaiting
125+ }
126+
127+ if (agentCompleted && queueSize == 0 && !isInterruptedState && !awaitingFinal ) {
99128 pollTimeoutsAfterAgentCompleted ++;
100129 if (pollTimeoutsAfterAgentCompleted >= MAX_POLL_TIMEOUTS_AFTER_AGENT_COMPLETED ) {
101130 LOGGER .debug ("Agent completed with {} consecutive poll timeouts and empty queue, closing for graceful completion (queue={})" ,
@@ -116,11 +145,16 @@ public Flow.Publisher<EventQueueItem> consumeAll() {
116145 LOGGER .debug ("Agent completed but queue has {} pending events, resetting timeout counter and continuing to poll (queue={})" ,
117146 queueSize , System .identityHashCode (queue ));
118147 pollTimeoutsAfterAgentCompleted = 0 ; // Reset counter when events arrive
148+ } else if (agentCompleted && awaitingFinal ) {
149+ LOGGER .debug ("Agent completed, awaiting final event (timeout {}/{}), continuing to poll (queue={})" ,
150+ pollTimeoutsWhileAwaitingFinal , MAX_POLL_TIMEOUTS_AWAITING_FINAL , System .identityHashCode (queue ));
151+ pollTimeoutsAfterAgentCompleted = 0 ; // Reset counter while awaiting final
119152 }
120153 continue ;
121154 }
122- // Event received - reset timeout counter
155+ // Event received - reset timeout counters
123156 pollTimeoutsAfterAgentCompleted = 0 ;
157+ pollTimeoutsWhileAwaitingFinal = 0 ;
124158 event = item .getEvent ();
125159 LOGGER .debug ("EventConsumer received event: {} (queue={})" ,
126160 event .getClass ().getSimpleName (), System .identityHashCode (queue ));
@@ -179,10 +213,11 @@ public Flow.Publisher<EventQueueItem> consumeAll() {
179213 // the stream-end signal can reach the client BEFORE the buffered final event,
180214 // causing the client to close the connection and never receive the final event.
181215 // This is especially important in replicated scenarios where events arrive via Kafka
182- // and timing is less deterministic. A small delay ensures the buffer flushes.
216+ // and timing is less deterministic. A delay ensures the buffer flushes.
217+ // Increased to 150ms to account for CI environment latency and JVM scheduling delays.
183218 if (isFinalSent ) {
184219 try {
185- Thread .sleep (50 ); // 50ms to allow SSE buffer flush
220+ Thread .sleep (150 ); // 150ms to allow SSE buffer flush in CI environments
186221 } catch (InterruptedException e ) {
187222 Thread .currentThread ().interrupt ();
188223 }
0 commit comments