From 4b9fedc999a3dab9cc61ba12df6bb14d0f45ddf7 Mon Sep 17 00:00:00 2001
From: github-aelf <yue.zheng@aelf.io>
Date: Wed, 13 May 2026 14:50:52 +0800
Subject: [PATCH 1/5] Add lark-bot reply-chain coverage audit

---
 ...ark-bot-reply-chain-test-coverage-audit.md | 307 ++++++++++++++++++
 1 file changed, 307 insertions(+)
 create mode 100644 docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md

diff --git a/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md b/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md
new file mode 100644
index 000000000..bc17d851a
--- /dev/null
+++ b/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md
@@ -0,0 +1,307 @@
+---
+title: feature/lark-bot reply-chain test coverage audit
+status: active
+owner: codex
+issue: 634
+branch: test/2026-05-12_lark-bot-reply-chain-regressions
+---
+
+# `feature/lark-bot` 回复链测试覆盖审计
+
+> 对应 issue: [#634](https://github.com/aevatarAI/aevatar/issues/634)
+>
+> 目标：不是罗列“这个分支有很多测试”，而是明确回答四件事：
+>
+> 1. 这条回复链现在的高风险点是什么
+> 2. 已有测试到底锁住了哪些不变量
+> 3. 还缺哪些关键回归保障
+> 4. 后续 `#635 / #636 / #637` 应该先打哪里
+
+## 范围与基线
+
+本次审计以 `feature/lark-bot` 当前链路为基线，重点查看以下六组测试：
+
+- `test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs`
+- `test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs`
+- `test/Aevatar.GAgents.ChannelRuntime.Tests/ChannelConversationTurnRunnerTests.cs`
+- `test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs`
+- `test/Aevatar.GAgents.ChannelRuntime.Tests/TurnStreamingReplySinkTests.cs`
+- `test/Aevatar.AI.Tests/ToolCallLoopTests.cs`
+
+另外，以下测试与护栏作为辅助证据使用，用来校正对 `ChatRuntime`、AI 组件边界和结构约束的判断：
+
+- `test/Aevatar.AI.Tests/ChatRuntimeStreamingBufferTests.cs`
+- `test/Aevatar.AI.Tests/AIComponentCoverageTests.cs`
+- `test/Aevatar.Architecture.Tests/Rules/*`
+- `tools/ci/*guard.sh`
+
+并对照以下实现文件确认风险面：
+
+- `agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.cs`
+- `agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.LarkCardStreaming.cs`
+- `agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.NyxRelayStreaming.cs`
+- `agents/Aevatar.GAgents.NyxidChat/AgentRunGAgent.cs`
+- `agents/Aevatar.GAgents.NyxidChat/ConversationReplyGenerator.cs`
+- `agents/Aevatar.GAgents.Channel.Runtime/TurnStreamingReplySink.cs`
+- `src/Aevatar.AI.Core/Tools/ToolCallLoop.cs`
+- `src/Aevatar.AI.Core/Chat/ChatRuntime.cs`
+
+当前主链路为：
+
+`ConversationGAgent -> IChannelLlmReplyRunDispatcher -> AgentRunDispatcher -> AgentRunGAgent -> ConversationReplyGenerator -> ChatRuntime`
+
+## 总结论
+
+先说结论：这个分支**不是“测试不够”**，而是**测试分布不均**。
+
+已经覆盖得比较强的部分：
+
+- `ConversationGAgent` 的 dedup、retry、reply token strip/re-enrich、streaming/text fallback
+- `TurnStreamingReplySink` 的 throttle/cap/finalize/dispatch race
+- `ChannelConversationTurnRunner` 的 inbound metadata、reaction、relay/direct reply、card action
+- `ToolCallLoop` 的 tool round、middleware、reasoning propagation、length recovery
+
+相对薄弱的部分：
+
+- `ConversationGAgent -> AgentRunGAgent` 之间“accepted / committed / delivered”语义边界是否足够诚实
+- `AgentRunGAgent` 的 duplicate terminal signal / repeated callback / stale continuation 组合情形
+- `ConversationReplyGenerator` 与 `ToolCallLoop` 的 closeout 联动，而不只是各自单测
+- 新引入 seam `IChannelLlmReplyRunDispatcher` 的结构护栏
+
+所以，`#634` 的产出不应该是“建议多补一些 happy path”，而应该是：
+
+1. 承认哪些地方已经很扎实，避免重复造轮子
+2. 明确指出后续只需要补少量但高价值的回归
+3. 把结构护栏缺口单独拎出来，交给 `#637`
+
+## 风险矩阵
+
+| 风险点 | 现有覆盖 | 结论 | 缺口 / 后续动作 | 本轮优先级 |
+| --- | --- | --- | --- | --- |
+| `ConversationGAgent` 入站 dedup 与 retry ownership | `ConversationGAgentDedupTests` 覆盖 duplicate activity、duplicate command、retry scheduled / success / exhausted / permanent failure | 覆盖强，主不变量已锁住 | 暂不补功能测试 | P2 |
+| relay reply token 不进入 committed fact | `HandleNyxRelayInboundActivityAsync_NeverPersistsReplyTokenIntoEventStore`、`StripsReplyTokenFromPersistedNeedsLlmReplyEvent_ButKeepsItOnRunCommandCopy`、`RehydratesRelayToken...`、`PrefersRunEchoedReplyToken...` | 覆盖强，边界意识明确 | 后续只需补 chain-level 诚实性，不必再补“有没有 strip”基础测试 | P1 |
+| `ConversationGAgent` streaming/text/card fallback | `ConversationGAgentDedupTests` 大量覆盖 text chunk、final fallback、card create/stream/finalize | 覆盖强 | 暂不补同类 happy path | P2 |
+| `AgentRunDispatcher` 创建 run actor 并发起 start | `DispatchAsync_ShouldCreateRunActorAndDispatchStartCommand` | 基本覆盖到位 | 不缺基础测试 | P2 |
+| `AgentRunGAgent` duplicate start / timeout / empty / throw / missing target / missing activity / stale / missing token | `AgentRunGAgentTests` 已覆盖对应单点场景 | 覆盖中上，单点保护不少 | 还缺“terminal 后重复信号”和“组合情形”回归 | P1 |
+| `AgentRunGAgent` cleanup 语义 | 已覆盖 schedule cleanup、cleanup destroy | 基础覆盖到位 | 还缺“非 terminal cleanup 请求是否 no-op / 幂等”类测试 | P2 |
+| `AgentRunGAgent` streaming ready/card chunk/text-only 路径 | 已覆盖 streaming enabled/disabled、card mode、non-relay path | 覆盖中上 | 可暂缓 | P2 |
+| `ChannelConversationTurnRunner` inbound metadata / relay reaction / workflow card action / reply delivery | 覆盖很广，包括 sender binding、owner prefs、reaction clear、interactive relay reply | 覆盖强 | 暂不扩 | P2 |
+| `TurnStreamingReplySink` throttle/cap/finalize race | 15 个测试，覆盖 cap、throttle、timer、dispatch in flight、idempotent dispose、duplicate suppression、dispatch throw | 覆盖非常强 | 本轮不需要再做大面积补测，只需补 review 指向的新竞态时再加 | P2 |
+| `ConversationReplyGenerator` placeholder / owner vs sender prefs / route fallback / approval middleware | 已覆盖主要分支 | 覆盖中等 | 缺 warning/closeout 联动测试 | P1 |
+| `ToolCallLoop` tool round / middleware / request identity / reasoning propagation / length recovery | 覆盖强 | 单体循环语义稳定 | 缺和 generator / runtime 的联动 closeout 测试 | P1 |
+| `ChatRuntime` 多轮流式收尾语义 | `ChatRuntimeStreamingBufferTests` 已直接覆盖 `ChatStreamAsync` 的流式 chunk、tool-call follow-up、reasoning 透传；另有 `ToolCallLoopTests` 与 AI tests 辅助覆盖 | 覆盖中等，流式主路径已有直接保护 | 仍缺更贴 reply-chain closeout 的联动表达，建议只补一到两个 closeout 级回归，不要大规模重写测试 | P1 |
+| 新 seam `IChannelLlmReplyRunDispatcher` 的依赖方向 | 现有 architecture/channel guard 已限制“不要直连 NyxIdChatGAgent”，但没有直接锁住这条 seam | 结构护栏缺口明确 | 在 `#637` 增加 architecture test 或 CI guard | P0 |
+
+## 分文件审计
+
+### 1. `AgentRunGAgentTests`
+
+现有覆盖亮点：
+
+- run actor 创建与 `AgentRunStartRequested` 投递
+- duplicate start 幂等
+- ready/drop signal not accepted 时的 retry
+- unexpected exception / timeout / empty reply / generator throw
+- relay token echo、missing token drop、stale request drop
+- streaming text / card 路径
+- owner LLM config 与 bearer token 透传
+- terminal cleanup schedule + destroy
+
+结论：
+
+- 这组测试已经不是“空白区”
+- 真正缺的不是单一错误分支，而是**terminal 之后重复信号是否还可能造成二次回传 / 二次调度**
+
+建议补点：
+
+- terminal 状态下再次收到 `AgentRunStartRequested` / cleanup / internal retry 时的 no-op 幂等
+- duplicate `LlmReplyReadyEvent` / `Dropped` / `Failed` 对 conversation 端是否仍可能造成二次 closeout
+
+对应后续：
+
+- 归入 `#635`
+
+### 2. `ConversationGAgentDedupTests`
+
+现有覆盖亮点：
+
+- activity / command dedup
+- inbound retry 调度归 actor 所有
+- reply token strip-on-persist 与 runtime re-enrich
+- 运行后 `ReplyToken` 回传优先级
+- deferred reply dispatch
+- relay streaming chunk / text fallback / card mode fallback
+- final edit 与 partial degradation
+
+结论：
+
+- 这是当前分支覆盖最厚的一块之一
+- 很多“边界安全”基础测试已经有了，后续不需要再从零补“有没有 strip token”
+
+仍有缺口：
+
+- 更高一层的“accepted / committed / delivered”语义诚实性还没有被单独表达出来
+- 例如：conversation 持久化了 `NeedsLlmReplyEvent`，并不等于用户已经收到 reply；目前这类语义主要靠代码结构理解，而不是显式测试名称锁住
+
+对应后续：
+
+- 归入 `#635` 做 chain-level contract test
+
+### 3. `ChannelConversationTurnRunnerTests`
+
+现有覆盖亮点：
+
+- inbound metadata 组装
+- owner/sender config layering
+- relay typing reaction post / clear
+- slash / card action / workflow resume
+- direct reply / relay reply / adapter rejection
+- `OnReplyDeliveredAsync` 对 streaming path 的 reaction clear
+
+结论：
+
+- 这组测试已经像一个“适配层回归套件”
+- 不建议在当前 issue 再扩 happy path
+
+仍有缺口：
+
+- 和 `AgentRunGAgent` 的 end-to-end 语义边界不是在这层表达的
+- 所以不应把后续的 actor 语义测试继续塞到 runner tests 里
+
+对应后续：
+
+- 本轮只作为引用基线，不新增任务
+
+### 4. `TurnStreamingReplySinkTests`
+
+现有覆盖亮点：
+
+- interim cap 后 stash，不提前 dispatch
+- dispatch 中 stash + throttle gate + deferred timer
+- finalize bypass throttle
+- finalize in flight wait
+- pending == last emitted duplicate suppression
+- dispatch throw swallow
+- dispose idempotency
+
+结论：
+
+- 这组已经相当扎实，是本链路里并发回归保护最强的一部分
+- `#636` 不应该被理解成“这里完全没测”，而是“只补后续 review 指向的新竞态”
+
+仍有缺口：
+
+- 当前没有发现明显大洞
+- 若要补，也应只补“新 code path 引入的新 race”，不要做覆盖率型补测
+
+对应后续：
+
+- `#636` 的范围应收窄，避免过度施工
+
+### 5. `ConversationReplyGeneratorTests`
+
+现有覆盖亮点：
+
+- relay callback URL 注入
+- placeholder emit / skip
+- approval middleware per turn
+- owner vs sender preferences layering
+- route fallback / no token fallback
+
+结论：
+
+- 配置和偏好层面的覆盖不错
+- 但 generator 与 `ToolCallLoop` / `ChatRuntime` 的 closeout 联动还比较少
+
+主要缺口：
+
+- `SkillRegistry` 存在但 `IRemoteSkillFetcher` 缺失时的 warning 行为未见直接测试
+- 还缺“tool call -> tool result -> final answer”在 generator 这一层只收尾一次的测试表达
+
+对应后续：
+
+- 归入 `#637`
+
+### 6. `ToolCallLoopTests`
+
+现有覆盖亮点：
+
+- no tool / tool then follow-up
+- request id 与 per-call metadata
+- hook / middleware mutation 与 terminate
+- max rounds exhausted
+- length recovery
+- reasoning content propagation
+- DSML tool call 变体
+
+结论：
+
+- `ToolCallLoop` 的单体语义已经很全
+- 目前缺的不是 loop 内部逻辑，而是它和 `ConversationReplyGenerator` / `ChatRuntime` 的交界处
+
+主要缺口：
+
+- final answer closeout 与 streaming sink / reply ready 的整体联动没有被直接表达
+- “warning path + final content + no duplicate closeout” 这种跨层问题还未集中锁住
+
+对应后续：
+
+- 归入 `#637`
+
+## 已有护栏与缺口
+
+已有护栏：
+
+- `test/Aevatar.Architecture.Tests/Rules/ForbiddenPatternTests.cs`
+  - 已限制中间层 `actor/entity/run/session` ID -> context 字典事实态
+- `tools/ci/channel_relay_nyx_chat_direct_create_guard.sh`
+  - 已限制 channel relay/runtime 直连 `NyxIdChatGAgent`
+- `test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs`
+  - 已限制外部入口绕过 `ConversationGAgent`
+
+当前缺口：
+
+- 还没有一个专门护栏明确锁住：
+  - `ConversationGAgent` 依赖的是 `IChannelLlmReplyRunDispatcher`
+  - 而不是重新回退为直接依赖旧 inbox runtime 或具体 NyxIdChat 实现
+
+这说明：
+
+- `#637` 很适合新增一个非常小但高价值的 architecture test / CI guard
+
+## 建议执行顺序
+
+基于这次审计，后续 issue 的执行顺序建议是：
+
+1. `#635`
+   - 先补 `ConversationGAgent <-> AgentRunGAgent` 的 chain-level actor 语义与 credential boundary
+2. `#637`
+   - 再补 closeout 回归和最小结构护栏
+3. `#636`
+   - 最后只做 review 指向的新 sink 竞态回归，不做大面积补测
+
+原因很简单：
+
+- `TurnStreamingReplySink` 现在不是最薄的位置
+- 真正最薄的是“跨 actor handoff 的诚实性”和“closeout 是否只发生一次”
+
+## 结论清单
+
+可以明确认为“已足够，不需要优先再补”的：
+
+- `ConversationGAgent` dedup / retry 基础语义
+- relay token strip/re-enrich 基础机制
+- `TurnStreamingReplySink` 的大部分并发收尾行为
+- `ToolCallLoop` 的单体循环逻辑
+- `ChannelConversationTurnRunner` 的大部分 adapter / reaction / relay 分支
+
+可以明确认为“下一步最值得补”的：
+
+- `ConversationGAgent -> AgentRunGAgent` handoff 的 chain-level contract tests
+- `AgentRunGAgent` terminal 后重复信号 / cleanup / retry 组合幂等
+- `ConversationReplyGenerator` 与 `ToolCallLoop` / `ChatRuntime` 的 closeout 联动
+- `IChannelLlmReplyRunDispatcher` 这条新 seam 的结构护栏
+
+一句话总结：
+
+`feature/lark-bot` 现在缺的不是“更多测试”，而是**更少但更准的回归测试与护栏**。

From 9462fa129f2885b5c8044a833b37fb5c548a1064 Mon Sep 17 00:00:00 2001
From: github-aelf <yue.zheng@aelf.io>
Date: Wed, 13 May 2026 18:09:22 +0800
Subject: [PATCH 2/5] Add actor handoff regression tests for lark reply chain

---
 .../ConversationGAgentDedupTests.cs           | 34 +++++++++
 .../AgentRunGAgentTests.cs                    | 73 +++++++++++++++++++
 2 files changed, 107 insertions(+)

diff --git a/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs b/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs
index 565685d11..3b5b183b2 100644
--- a/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs
+++ b/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs
@@ -356,6 +356,40 @@ public async Task HandleInboundActivityAsync_WhenRunnerRequestsDeferredReply_Per
         parsed.Activity.Id.ShouldBe("act-llm");
     }
 
+    [Fact]
+    public async Task HandleInboundActivityAsync_WhenRunDispatcherAcceptsRequest_ShouldNotPersistCompletedReplyUntilReadyArrives()
+    {
+        // Accepted-for-run is weaker than committed/user-visible reply. The actor may persist
+        // NeedsLlmReplyEvent and dispatch it immediately, but must not emit a completed fact
+        // until the run actor sends LlmReplyReadyEvent (or a terminal failure) back.
+        var dispatcher = new RecordingRunDispatcher();
+        var runner = new RecordingTurnRunner
+        {
+            InboundResultFactory = activity => ConversationTurnResult.LlmReplyRequested(
+                new NeedsLlmReplyEvent
+                {
+                    CorrelationId = activity.Id,
+                    TargetActorId = "conversation:actor",
+                    RegistrationId = "reg-1",
+                    Activity = activity.Clone(),
+                    RequestedAtUnixMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
+                }),
+        };
+        var (agent, store) = CreateAgent(runner, "conv-accepted-not-committed", dispatcher);
+
+        await agent.HandleInboundActivityAsync(CreateActivity("act-accepted-only", "conv:slack:C1"));
+
+        dispatcher.Dispatched.Count.ShouldBe(1);
+        runner.LlmReplyCount.ShouldBe(0);
+        agent.State.PendingLlmReplyRequests.ShouldContain(req => req.CorrelationId == "act-accepted-only");
+
+        var events = await store.GetEventsAsync(agent.Id);
+        events.Count.ShouldBe(1);
+        events[0].EventType.ShouldContain(nameof(NeedsLlmReplyEvent));
+        events.ShouldNotContain(record =>
+            record.EventType.Contains(nameof(ConversationTurnCompletedEvent), StringComparison.Ordinal));
+    }
+
     [Fact]
     public async Task HandleLlmReplyReadyAsync_WhenDuplicateCorrelationId_CollapsesToSingleOutboundCommit()
     {
diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs
index 105740f84..c4beb003b 100644
--- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs
+++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs
@@ -280,6 +280,44 @@ await runtime.HandleStartAsync(new NeedsLlmReplyEvent
         cleanupCommand.RunId.Should().Be("corr-cleanup-schedule");
     }
 
+    [Fact]
+    public async Task HandleStartAsync_TerminalRun_ShouldNotEmitDuplicateReadyEvent()
+    {
+        var actor = Substitute.For<IActor>();
+        actor.Id.Returns("actor-1");
+        var handled = new List<EventEnvelope>();
+        actor.When(x => x.HandleEventAsync(Arg.Any<EventEnvelope>(), Arg.Any<CancellationToken>()))
+            .Do(call => handled.Add(call.Arg<EventEnvelope>()));
+        var actorRuntime = new DispatchingActorRuntime(("actor-1", actor));
+        var scheduler = new RecordingCallbackScheduler();
+        var replyGenerator = new RecordingReplyGenerator(() => false) { ReplyText = "ok" };
+        var runtime = CreateRunAgent(
+            actorRuntime,
+            replyGenerator,
+            new AsyncLocalInteractiveReplyCollector(),
+            new Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions
+            {
+                InteractiveRepliesEnabled = true,
+                StreamingRepliesEnabled = false,
+            },
+            callbackScheduler: scheduler);
+        var request = new NeedsLlmReplyEvent
+        {
+            CorrelationId = "corr-terminal-idempotent",
+            TargetActorId = "actor-1",
+            RegistrationId = "reg-1",
+            Activity = BuildRelayActivity(),
+            ReplyToken = "relay-token-terminal-idempotent",
+        };
+
+        await runtime.HandleStartAsync(request);
+        await runtime.HandleStartAsync(request.Clone());
+
+        replyGenerator.CallCount.Should().Be(1);
+        handled.Should().ContainSingle(e => e.Payload.Is(LlmReplyReadyEvent.Descriptor));
+        runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced);
+    }
+
     [Fact]
     public async Task HandleCleanupAsync_ShouldDestroyTerminalRunActor()
     {
@@ -312,6 +350,41 @@ await runtime.HandleCleanupAsync(new AgentRunCleanupRequested
         actorRuntime.DestroyedIds.Should().Contain(runtime.Id);
     }
 
+    [Fact]
+    public async Task HandleStartAsync_TerminalDrop_ShouldNotDispatchDuplicateDropNotification()
+    {
+        var actor = Substitute.For<IActor>();
+        actor.Id.Returns("actor-1");
+        var handled = new List<EventEnvelope>();
+        actor.When(x => x.HandleEventAsync(Arg.Any<EventEnvelope>(), Arg.Any<CancellationToken>()))
+            .Do(call => handled.Add(call.Arg<EventEnvelope>()));
+        var actorRuntime = new DispatchingActorRuntime(("actor-1", actor));
+        var runtime = CreateRunAgent(
+            actorRuntime,
+            new RecordingReplyGenerator(() => false) { ReplyText = "ok" },
+            new AsyncLocalInteractiveReplyCollector(),
+            new Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions
+            {
+                InteractiveRepliesEnabled = true,
+                StreamingRepliesEnabled = false,
+            });
+
+        var request = new NeedsLlmReplyEvent
+        {
+            CorrelationId = "corr-terminal-drop-idempotent",
+            TargetActorId = "actor-1",
+            RegistrationId = "reg-1",
+            Activity = BuildRelayActivity(),
+            // Relay request with no command-carried ReplyToken should drop before LLM execution.
+        };
+
+        await runtime.HandleStartAsync(request);
+        await runtime.HandleStartAsync(request.Clone());
+
+        handled.Should().ContainSingle(e => e.Payload.Is(DeferredLlmReplyDroppedEvent.Descriptor));
+        runtime.State.Status.Should().Be(AgentRunStatus.Dropped);
+    }
+
     [Fact]
     public async Task HandleStartAsync_OnOutputDispatchFailure_PersistsProducedReply_AndRetryReDispatchesWithoutRerunningLlm()
     {

From f60337227d5008d0320ba32a0ee749418c16d640 Mon Sep 17 00:00:00 2001
From: github-aelf <yue.zheng@aelf.io>
Date: Thu, 14 May 2026 15:14:36 +0800
Subject: [PATCH 3/5] Add lark-bot reply chain regression tests

---
 ...ark-bot-reply-chain-test-coverage-audit.md |  10 +-
 .../Rules/ChannelArchitectureTests.cs         |  17 +++
 .../AgentRunGAgentTests.cs                    |  92 ++++++++++++++++
 .../ConversationReplyGeneratorTests.cs        | 102 ++++++++++++++++++
 .../TurnStreamingReplySinkTests.cs            |  71 ++++++++++++
 5 files changed, 291 insertions(+), 1 deletion(-)

diff --git a/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md b/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md
index bc17d851a..c9610190d 100644
--- a/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md
+++ b/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md
@@ -89,7 +89,7 @@ branch: test/2026-05-12_lark-bot-reply-chain-regressions
 | `TurnStreamingReplySink` throttle/cap/finalize race | 15 个测试，覆盖 cap、throttle、timer、dispatch in flight、idempotent dispose、duplicate suppression、dispatch throw | 覆盖非常强 | 本轮不需要再做大面积补测，只需补 review 指向的新竞态时再加 | P2 |
 | `ConversationReplyGenerator` placeholder / owner vs sender prefs / route fallback / approval middleware | 已覆盖主要分支 | 覆盖中等 | 缺 warning/closeout 联动测试 | P1 |
 | `ToolCallLoop` tool round / middleware / request identity / reasoning propagation / length recovery | 覆盖强 | 单体循环语义稳定 | 缺和 generator / runtime 的联动 closeout 测试 | P1 |
-| `ChatRuntime` 多轮流式收尾语义 | `ChatRuntimeStreamingBufferTests` 已直接覆盖 `ChatStreamAsync` 的流式 chunk、tool-call follow-up、reasoning 透传；另有 `ToolCallLoopTests` 与 AI tests 辅助覆盖 | 覆盖中等，流式主路径已有直接保护 | 仍缺更贴 reply-chain closeout 的联动表达，建议只补一到两个 closeout 级回归，不要大规模重写测试 | P1 |
+| `ChatRuntime` 多轮流式收尾语义 | `ChatRuntimeStreamingBufferTests` 已直接覆盖 `ChatStreamAsync` 的流式 chunk、tool-call follow-up、reasoning 透传；另有 `ToolCallLoopTests` 与 AI tests 辅助覆盖 | 覆盖中等，流式主路径已有直接保护 | `outer stream` 是否应该把 per-round terminal signal 收口成 single overall terminal chunk 目前仍属目标态问题，不是已接受契约；后续应先定契约，再决定是否补 failing test 或实现改动 | P1 |
 | 新 seam `IChannelLlmReplyRunDispatcher` 的依赖方向 | 现有 architecture/channel guard 已限制“不要直连 NyxIdChatGAgent”，但没有直接锁住这条 seam | 结构护栏缺口明确 | 在 `#637` 增加 architecture test 或 CI guard | P0 |
 
 ## 分文件审计
@@ -193,6 +193,7 @@ branch: test/2026-05-12_lark-bot-reply-chain-regressions
 
 - 当前没有发现明显大洞
 - 若要补，也应只补“新 code path 引入的新 race”，不要做覆盖率型补测
+- 本轮新增的两条回归更接近这个方向：一条锁 `FinalizeAsync` 等待 drain 时 `Dispose()` 不应再把 stashed final flush 发出去；一条锁 deferred flush dispatch 失败后，后续 delta 仍可恢复推进且不重复计数
 
 对应后续：
 
@@ -212,6 +213,7 @@ branch: test/2026-05-12_lark-bot-reply-chain-regressions
 
 - 配置和偏好层面的覆盖不错
 - 但 generator 与 `ToolCallLoop` / `ChatRuntime` 的 closeout 联动还比较少
+- 其中 `ChatRuntime` 的 terminal chunk contract 目前还没有稳定成仓库内共识，不适合在这一层直接写死“single overall terminal chunk”断言；这更像后续需要和研发先对齐的目标态
 
 主要缺口：
 
@@ -259,6 +261,12 @@ branch: test/2026-05-12_lark-bot-reply-chain-regressions
 - `test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs`
   - 已限制外部入口绕过 `ConversationGAgent`
 
+新增补强：
+
+- `test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs`
+  - 可以补一条直接锁 `ConversationGAgent -> IChannelLlmReplyRunDispatcher` seam 的最小门禁
+  - 但当前更适合先作为 regex / source-text 级最小护栏看待，不应误认为已经升级为 Roslyn / 编译级依赖门禁
+
 当前缺口：
 
 - 还没有一个专门护栏明确锁住：
diff --git a/test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs b/test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs
index 4620727bf..1ef115003 100644
--- a/test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs
+++ b/test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs
@@ -354,6 +354,23 @@ public void DurableInboxImplementations_Must_DependOn_AsyncStream_ChatActivity()
             + string.Join("\n", violators));
     }
 
+    [Fact]
+    public void ConversationGAgent_SourceTextGuard_ShouldReference_RunDispatcherSeam_AndAvoid_Concrete_RunOrInbox_Runtime_Types()
+    {
+        var conversationPath = ChannelSourceIndex.EnumerateProductionSourceFiles()
+            .Single(path => path.EndsWith(
+                "/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.cs",
+                System.StringComparison.Ordinal));
+        var text = File.ReadAllText(conversationPath);
+
+        // Minimal source-text guard only: keeps the intended seam visible in review/CI,
+        // but is not a Roslyn/compile-level dependency rule.
+        Assert.Matches(@"\bIChannelLlmReplyRunDispatcher\b", text);
+        Assert.DoesNotMatch(@"\bAgentRunDispatcher\b", text);
+        Assert.DoesNotMatch(@"\bDurableInboxSubscriber\b", text);
+        Assert.DoesNotMatch(@"\bIChannelDurableInbox\b", text);
+    }
+
     private static bool IsAllowedOutboundSendCaller(string normalizedPath)
     {
         if (normalizedPath.Contains("/agents/channels/", System.StringComparison.Ordinal))
diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs
index c4beb003b..4ccf482d1 100644
--- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs
+++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs
@@ -350,6 +350,64 @@ await runtime.HandleCleanupAsync(new AgentRunCleanupRequested
         actorRuntime.DestroyedIds.Should().Contain(runtime.Id);
     }
 
+    [Fact]
+    public async Task HandleCleanupAsync_ShouldIgnoreNonTerminalRun()
+    {
+        var actor = Substitute.For<IActor>();
+        actor.Id.Returns("actor-1");
+        var actorRuntime = new DispatchingActorRuntime(("actor-1", actor));
+        var runtime = CreateRunAgent(
+            actorRuntime,
+            new RecordingReplyGenerator(() => false) { ReplyText = "ok" },
+            new AsyncLocalInteractiveReplyCollector(),
+            new Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions
+            {
+                InteractiveRepliesEnabled = true,
+                StreamingRepliesEnabled = false,
+            });
+
+        await runtime.HandleCleanupAsync(new AgentRunCleanupRequested
+        {
+            RunId = "corr-non-terminal-cleanup",
+        });
+
+        actorRuntime.DestroyedIds.Should().BeEmpty();
+    }
+
+    [Fact]
+    public async Task HandleCleanupAsync_ShouldIgnoreMismatchedTerminalRunId()
+    {
+        var actor = Substitute.For<IActor>();
+        actor.Id.Returns("actor-1");
+        var actorRuntime = new DispatchingActorRuntime(("actor-1", actor));
+        var runtime = CreateRunAgent(
+            actorRuntime,
+            new RecordingReplyGenerator(() => false) { ReplyText = "ok" },
+            new AsyncLocalInteractiveReplyCollector(),
+            new Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions
+            {
+                InteractiveRepliesEnabled = true,
+                StreamingRepliesEnabled = false,
+            });
+
+        await runtime.HandleStartAsync(new NeedsLlmReplyEvent
+        {
+            CorrelationId = "corr-cleanup-mismatch",
+            TargetActorId = "actor-1",
+            RegistrationId = "reg-1",
+            Activity = BuildRelayActivity(),
+            ReplyToken = "relay-token-cleanup-mismatch",
+        });
+        await runtime.HandleCleanupAsync(new AgentRunCleanupRequested
+        {
+            RunId = "corr-some-other-run",
+        });
+
+        actorRuntime.DestroyedIds.Should().BeEmpty();
+        runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced);
+        runtime.State.ReplyDispatched.Should().BeTrue();
+    }
+
     [Fact]
     public async Task HandleStartAsync_TerminalDrop_ShouldNotDispatchDuplicateDropNotification()
     {
@@ -385,6 +443,40 @@ public async Task HandleStartAsync_TerminalDrop_ShouldNotDispatchDuplicateDropNo
         runtime.State.Status.Should().Be(AgentRunStatus.Dropped);
     }
 
+    [Fact]
+    public async Task HandleStartAsync_TerminalFailure_ShouldNotDispatchDuplicateFailureReadyEvent()
+    {
+        var collector = new AsyncLocalInteractiveReplyCollector();
+        var replyGenerator = new ThrowingReplyGenerator(new InvalidOperationException("boom"));
+        var actor = Substitute.For<IActor>();
+        actor.Id.Returns("channel-conversation:lark:group:oc_group_chat_1");
+        var handled = new List<EventEnvelope>();
+        actor.When(x => x.HandleEventAsync(Arg.Any<EventEnvelope>(), Arg.Any<CancellationToken>()))
+            .Do(call => handled.Add(call.Arg<EventEnvelope>()));
+        var actorRuntime = new DispatchingActorRuntime(("actor-1", actor));
+        var runtime = CreateRunAgent(
+            actorRuntime,
+            replyGenerator,
+            collector,
+            new Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions { InteractiveRepliesEnabled = true });
+        var request = new NeedsLlmReplyEvent
+        {
+            CorrelationId = "corr-terminal-failed-idempotent",
+            TargetActorId = "actor-1",
+            RegistrationId = "reg-1",
+            Activity = BuildRelayActivity(),
+            ReplyToken = "relay-token-terminal-failed-idempotent",
+        };
+
+        await runtime.HandleStartAsync(request);
+        await runtime.HandleStartAsync(request.Clone());
+
+        handled.Should().ContainSingle(e => e.Payload.Is(LlmReplyReadyEvent.Descriptor));
+        runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced);
+        runtime.State.ReplyDispatched.Should().BeTrue();
+        runtime.State.ProducedTerminalState.Should().Be(LlmReplyTerminalState.Failed);
+    }
+
     [Fact]
     public async Task HandleStartAsync_OnOutputDispatchFailure_PersistsProducedReply_AndRetryReDispatchesWithoutRerunningLlm()
     {
diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs
index 3bfb1d327..a804c03e3 100644
--- a/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs
+++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs
@@ -8,6 +8,7 @@
 using Aevatar.GAgents.Channel.NyxIdRelay;
 using Aevatar.GAgents.Channel.Runtime;
 using Aevatar.GAgents.NyxidChat;
+using Microsoft.Extensions.Logging;
 
 namespace Aevatar.GAgents.ChannelRuntime.Tests;
 
@@ -161,6 +162,75 @@ public async Task GenerateReplyAsync_CreatesApprovalMiddlewarePerTurn()
         approvalHandler.RequestCount.Should().Be(4);
     }
 
+    [Fact]
+    public async Task GenerateReplyAsync_WithSkillRegistryButNoRemoteFetcher_LogsWarningOnlyOnceAcrossTurns()
+    {
+        var logger = new ListLogger<NyxIdConversationReplyGenerator>();
+        var skillRegistry = new SkillRegistry();
+        skillRegistry.Register(new SkillDefinition
+        {
+            Name = "remote-skill",
+            Description = "Remote skill",
+            Instructions = "Does remote work",
+            Source = SkillSource.Remote,
+            RemoteId = "remote-skill-id",
+        });
+        var generator = new NyxIdConversationReplyGenerator(
+            new RecordingProviderFactory(),
+            skillRegistry: skillRegistry,
+            remoteSkillFetcher: null,
+            logger: logger);
+
+        for (var i = 0; i < 2; i++)
+        {
+            var reply = await generator.GenerateReplyAsync(
+                new ChatActivity
+                {
+                    Id = $"msg-warning-{i}",
+                    Conversation = new ConversationReference { CanonicalKey = $"lark:dm:user-warning-{i}" },
+                    Content = new MessageContent { Text = "hello" },
+                },
+                new Dictionary<string, string>(),
+                streamingSink: null,
+                CancellationToken.None);
+
+            reply.Should().Be("ok");
+        }
+
+        logger.WarningMessages.Should().ContainSingle(message =>
+            message.Contains("SkillRegistry is registered without IRemoteSkillFetcher", StringComparison.Ordinal));
+    }
+
+    [Fact]
+    public async Task GenerateReplyAsync_WithStreamingSink_EmitsPlaceholderThenFinalTextAcrossToolFollowUp()
+    {
+        var providerFactory = new ToolCallingProviderFactory();
+        var generator = new NyxIdConversationReplyGenerator(
+            providerFactory,
+            toolSources: [new SingleToolSource(new ApprovalRequiredTool())],
+            relayOptions: new global::Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions
+            {
+                StreamingPlaceholderText = "…",
+            });
+        var sink = new RecordingStreamingSink();
+
+        var reply = await generator.GenerateReplyAsync(
+            new ChatActivity
+            {
+                Id = "msg-tool-follow-up",
+                Conversation = new ConversationReference { CanonicalKey = "lark:dm:user-tool-follow-up" },
+                Content = new MessageContent { Text = "run tool" },
+            },
+            new Dictionary<string, string>(),
+            sink,
+            CancellationToken.None);
+
+        reply.Should().Be("done");
+        providerFactory.Requests.Should().HaveCount(2);
+        providerFactory.Requests[1].Messages.Should().Contain(message => message.Role == "tool");
+        sink.Emissions.Should().Equal("…", "done");
+    }
+
     [Fact]
     public async Task GenerateReplyAsync_AppliesSenderPrefsOverChainOwnerDefault()
     {
@@ -569,6 +639,8 @@ private sealed class ToolCallingProviderFactory : ILLMProviderFactory, ILLMProvi
     {
         public string Name => "tool-calling";
 
+        public List<LLMRequest> Requests { get; } = [];
+
         public ILLMProvider GetProvider(string name) => this;
 
         public ILLMProvider GetDefault() => this;
@@ -582,6 +654,7 @@ public async IAsyncEnumerable<LLMStreamChunk> ChatStreamAsync(
             LLMRequest request,
             [EnumeratorCancellation] CancellationToken ct = default)
         {
+            Requests.Add(request);
             if (request.Messages.Any(static message => message.Role == "tool"))
             {
                 yield return new LLMStreamChunk { DeltaContent = "done" };
@@ -636,4 +709,33 @@ public Task<ToolApprovalResult> RequestApprovalAsync(ToolApprovalRequest request
             return Task.FromResult(ToolApprovalResult.Denied("test denial"));
         }
     }
+
+    private sealed class ListLogger<T> : ILogger<T>
+    {
+        public List<string> WarningMessages { get; } = [];
+
+        public IDisposable BeginScope<TState>(TState state) where TState : notnull => NullScope.Instance;
+
+        public bool IsEnabled(LogLevel logLevel) => true;
+
+        public void Log<TState>(
+            LogLevel logLevel,
+            EventId eventId,
+            TState state,
+            Exception? exception,
+            Func<TState, Exception?, string> formatter)
+        {
+            if (logLevel == LogLevel.Warning)
+                WarningMessages.Add(formatter(state, exception));
+        }
+
+        private sealed class NullScope : IDisposable
+        {
+            public static readonly NullScope Instance = new();
+
+            public void Dispose()
+            {
+            }
+        }
+    }
 }
diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/TurnStreamingReplySinkTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/TurnStreamingReplySinkTests.cs
index 7d609bc4d..b0e5fadec 100644
--- a/test/Aevatar.GAgents.ChannelRuntime.Tests/TurnStreamingReplySinkTests.cs
+++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/TurnStreamingReplySinkTests.cs
@@ -233,6 +233,77 @@ public async Task FinalizeAsync_DispatchInFlight_WaitsForFinalChunkOnWire()
         sink.ChunksEmitted.Should().Be(2);
     }
 
+    [Fact]
+    public async Task Dispose_WhenFinalizeIsAwaitingDrain_UnblocksWithoutDispatchingStashedFinalText()
+    {
+        var firstDispatchGate = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+        var envelopes = new List<EventEnvelope>();
+        var dispatchCount = 0;
+
+        var dispatchPort = Substitute.For<IActorDispatchPort>();
+        dispatchPort.DispatchAsync("target-actor", Arg.Any<EventEnvelope>(), Arg.Any<CancellationToken>())
+            .Returns(call =>
+            {
+                envelopes.Add(call.Arg<EventEnvelope>());
+                dispatchCount++;
+                return dispatchCount == 1 ? firstDispatchGate.Task : Task.CompletedTask;
+            });
+
+        var sink = CreateSink(dispatchPort, throttleMs: 0, out _);
+
+        var deltaTask = sink.OnDeltaAsync("first", CancellationToken.None);
+        var finalizeTask = sink.FinalizeAsync("first plus final", CancellationToken.None);
+
+        deltaTask.IsCompleted.Should().BeFalse();
+        finalizeTask.IsCompleted.Should().BeFalse();
+        envelopes.Should().ContainSingle();
+
+        sink.Dispose();
+        await finalizeTask;
+
+        envelopes.Should().ContainSingle("disposing the sink must drop the stashed final flush");
+
+        firstDispatchGate.SetResult();
+        await deltaTask;
+
+        envelopes.Should().ContainSingle();
+        sink.ChunksEmitted.Should().Be(1);
+    }
+
+    [Fact]
+    public async Task OnDeltaAsync_WhenDeferredFlushDispatchFails_LaterDeltaStillPublishesLatestTextOnce()
+    {
+        var dispatchAttempts = 0;
+        var dispatchPort = Substitute.For<IActorDispatchPort>();
+        dispatchPort.DispatchAsync("target-actor", Arg.Any<EventEnvelope>(), Arg.Any<CancellationToken>())
+            .Returns(_ =>
+            {
+                dispatchAttempts++;
+                if (dispatchAttempts == 2)
+                    return Task.FromException(new InvalidOperationException("boom"));
+
+                return Task.CompletedTask;
+            });
+
+        var sink = CreateSink(dispatchPort, throttleMs: 750, out var time);
+
+        await sink.OnDeltaAsync("chunk 1", CancellationToken.None);
+        time.Advance(TimeSpan.FromMilliseconds(100));
+        await sink.OnDeltaAsync("chunk 1 + 2", CancellationToken.None);
+
+        time.Advance(TimeSpan.FromMilliseconds(800));
+
+        sink.ChunksEmitted.Should().Be(1, "the timer-driven dispatch failed and must not count as emitted");
+
+        time.Advance(TimeSpan.FromMilliseconds(100));
+        await sink.OnDeltaAsync("chunk 1 + 3", CancellationToken.None);
+
+        sink.ChunksEmitted.Should().Be(2);
+        dispatchAttempts.Should().Be(3);
+        time.Advance(TimeSpan.FromMilliseconds(2000));
+        sink.ChunksEmitted.Should().Be(2);
+    }
+
     [Fact]
     public async Task PendingTimerEqualsLastEmitted_DoesNotEmitDuplicate()
     {

From ad5f2bbc5ccab72f4a4438cc6a83c1bcf467a484 Mon Sep 17 00:00:00 2001
From: github-aelf <yue.zheng@aelf.io>
Date: Fri, 15 May 2026 16:37:59 +0800
Subject: [PATCH 4/5] Update lark-bot reply chain regression coverage

---
 .../ConversationGAgentDedupTests.cs           |  71 +++++++++++++
 .../AgentRunGAgentTests.cs                    | 100 +++++++++++++++++-
 2 files changed, 166 insertions(+), 5 deletions(-)

diff --git a/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs b/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs
index 1ab4759a8..70a1a574d 100644
--- a/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs
+++ b/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs
@@ -419,6 +419,77 @@ public async Task HandleLlmReplyReadyAsync_WhenDuplicateCorrelationId_CollapsesT
         events.Count.ShouldBe(3);
         events.Last().EventType.ShouldContain(nameof(ConversationTurnCompletedEvent));
         events.Select(e => e.EventType).ShouldContain(s => s.Contains(nameof(LlmReplyDeliveredEvent)));
+        agent.State.LastReplyDelivery.RunId.ShouldBe("act-llm-ready");
+        agent.State.LastReplyDelivery.OutcomeCase.ShouldBe(ReplyDeliveryStatus.OutcomeOneofCase.Delivered);
+        agent.State.LastReplyDelivery.Delivered.ChannelMessageId.ShouldBe(string.Empty);
+    }
+
+    [Fact]
+    public async Task HandleLlmReplyReadyAsync_WhenDeliverySucceeds_UpdatesLastReplyDeliveryState()
+    {
+        var runner = new RecordingTurnRunner
+        {
+            LlmReplyResultFactory = reply => ConversationTurnResult.Sent(
+                "sent:llm:" + reply.CorrelationId,
+                reply.Outbound?.Clone() ?? new MessageContent { Text = "ack" },
+                "bot",
+                new OutboundDeliveryContext
+                {
+                    ReplyMessageId = "om_delivery_ok",
+                    CorrelationId = reply.CorrelationId,
+                }),
+        };
+        var (agent, store) = CreateAgent(runner, "conv-llm-delivered");
+
+        await agent.HandleLlmReplyReadyAsync(new LlmReplyReadyEvent
+        {
+            CorrelationId = "corr-delivered",
+            RegistrationId = "reg-1",
+            SourceActorId = "agent-run",
+            Activity = CreateActivity("corr-delivered", "conv:slack:C1"),
+            Outbound = new MessageContent { Text = "reply-from-llm" },
+            TerminalState = LlmReplyTerminalState.Completed,
+            ReadyAtUnixMs = 43,
+        });
+
+        agent.State.LastReplyDelivery.RunId.ShouldBe("corr-delivered");
+        agent.State.LastReplyDelivery.OutcomeCase.ShouldBe(ReplyDeliveryStatus.OutcomeOneofCase.Delivered);
+        agent.State.LastReplyDelivery.Delivered.ChannelMessageId.ShouldBe("om_delivery_ok");
+        agent.State.LastReplyDelivery.Delivered.AckedAtUnixMs.ShouldBeGreaterThan(0);
+
+        var events = await store.GetEventsAsync(agent.Id);
+        events.Select(e => e.EventType).ShouldContain(s => s.Contains(nameof(LlmReplyDeliveredEvent)));
+    }
+
+    [Fact]
+    public async Task HandleLlmReplyReadyAsync_WhenDeliveryFails_UpdatesLastReplyDeliveryState()
+    {
+        var runner = new RecordingTurnRunner
+        {
+            LlmReplyResultFactory = _ => ConversationTurnResult.PermanentFailure("lark_send_failed", "lark rejected send"),
+        };
+        var (agent, store) = CreateAgent(runner, "conv-llm-delivery-failed");
+
+        await agent.HandleLlmReplyReadyAsync(new LlmReplyReadyEvent
+        {
+            CorrelationId = "corr-delivery-failed",
+            RegistrationId = "reg-1",
+            SourceActorId = "agent-run",
+            Activity = CreateActivity("corr-delivery-failed", "conv:slack:C1"),
+            Outbound = new MessageContent { Text = "reply-from-llm" },
+            TerminalState = LlmReplyTerminalState.Completed,
+            ReadyAtUnixMs = 43,
+        });
+
+        agent.State.LastReplyDelivery.RunId.ShouldBe("corr-delivery-failed");
+        agent.State.LastReplyDelivery.OutcomeCase.ShouldBe(ReplyDeliveryStatus.OutcomeOneofCase.Failed);
+        agent.State.LastReplyDelivery.Failed.ErrorCode.ShouldBe("lark_send_failed");
+        agent.State.LastReplyDelivery.Failed.ErrorMessage.ShouldBe("lark rejected send");
+        agent.State.LastReplyDelivery.Failed.FailedAtUnixMs.ShouldBeGreaterThan(0);
+
+        var events = await store.GetEventsAsync(agent.Id);
+        events.Select(e => e.EventType).ShouldContain(s => s.Contains(nameof(LlmReplyDeliveryFailedEvent)));
+        events.Last().EventType.ShouldContain(nameof(ConversationContinueFailedEvent));
     }
 
     [Fact]
diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs
index 7349cce4f..3f28ffa27 100644
--- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs
+++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs
@@ -14,6 +14,7 @@
 using Google.Protobuf.WellKnownTypes;
 using Microsoft.Extensions.DependencyInjection;
 using Microsoft.Extensions.Logging.Abstractions;
+using Microsoft.Extensions.Time.Testing;
 using NSubstitute;
 using Xunit;
 
@@ -50,6 +51,96 @@ await dispatcher.DispatchAsync(new NeedsLlmReplyEvent
         command.Request.ReplyToken.Should().Be("relay-token-dispatch");
     }
 
+    [Fact]
+    public async Task DispatchAsync_ShouldReturnAcceptedOutcomeOnly()
+    {
+        var actorRuntime = new DispatchingActorRuntime();
+        var streamProvider = new RecordingStreamProvider();
+        var clock = new FakeTimeProvider(new DateTimeOffset(2026, 5, 15, 8, 0, 0, TimeSpan.Zero));
+        var dispatcher = new AgentRunDispatcher(
+            actorRuntime,
+            streamProvider,
+            NullLogger<AgentRunDispatcher>.Instance,
+            clock);
+
+        var result = await dispatcher.DispatchAsync(new NeedsLlmReplyEvent
+        {
+            CorrelationId = "corr-accepted",
+            TargetActorId = "conversation-actor",
+            RegistrationId = "reg-1",
+            Activity = BuildRelayActivity(),
+            ReplyToken = "relay-token-accepted",
+            RequestedAtUnixMs = clock.GetUtcNow().ToUnixTimeMilliseconds(),
+        }, CancellationToken.None);
+
+        var produced = streamProvider.Produced.Should().ContainSingle().Subject;
+        result.Phase.Should().Be(DispatchPhase.Accepted);
+        result.CommandId.Should().NotBeNullOrWhiteSpace();
+        result.CommandId.Should().Be(produced.Envelope.Id);
+        result.RunActorId.Should().Be(AgentRunGAgent.BuildActorId("corr-accepted"));
+        result.RunActorId.Should().Be(produced.StreamId);
+        result.AcceptedAtUnixMs.Should().Be(clock.GetUtcNow().ToUnixTimeMilliseconds());
+    }
+
+    [Fact]
+    public async Task DispatchAsync_WhenRequestIsStale_ShouldRejectBeforeEnqueue()
+    {
+        var actorRuntime = new DispatchingActorRuntime();
+        var streamProvider = new RecordingStreamProvider();
+        var clock = new FakeTimeProvider(new DateTimeOffset(2026, 5, 15, 8, 10, 0, TimeSpan.Zero));
+        var dispatcher = new AgentRunDispatcher(
+            actorRuntime,
+            streamProvider,
+            NullLogger<AgentRunDispatcher>.Instance,
+            clock);
+
+        var result = await dispatcher.DispatchAsync(new NeedsLlmReplyEvent
+        {
+            CorrelationId = "corr-stale",
+            TargetActorId = "conversation-actor",
+            RegistrationId = "reg-1",
+            Activity = BuildRelayActivity(),
+            RequestedAtUnixMs = clock.GetUtcNow().AddMinutes(-6).ToUnixTimeMilliseconds(),
+        }, CancellationToken.None);
+
+        result.Phase.Should().Be(DispatchPhase.RejectedStale);
+        result.CommandId.Should().BeEmpty();
+        result.RunActorId.Should().BeNull();
+        result.AcceptedAtUnixMs.Should().Be(0);
+        streamProvider.Produced.Should().BeEmpty();
+    }
+
+    [Fact(Skip = "RejectedDuplicate is part of the ADR-0021 dispatcher contract but duplicate suppression is not implemented in AgentRunDispatcher yet.")]
+    public async Task DispatchAsync_WhenRequestIsDuplicate_ShouldReturnRejectedDuplicate_AndAvoidSecondEnqueue()
+    {
+        var actorRuntime = new DispatchingActorRuntime();
+        var streamProvider = new RecordingStreamProvider();
+        var clock = new FakeTimeProvider(new DateTimeOffset(2026, 5, 15, 8, 20, 0, TimeSpan.Zero));
+        var dispatcher = new AgentRunDispatcher(
+            actorRuntime,
+            streamProvider,
+            NullLogger<AgentRunDispatcher>.Instance,
+            clock);
+        var request = new NeedsLlmReplyEvent
+        {
+            CorrelationId = "corr-duplicate-dispatch",
+            TargetActorId = "conversation-actor",
+            RegistrationId = "reg-1",
+            Activity = BuildRelayActivity(),
+            RequestedAtUnixMs = clock.GetUtcNow().ToUnixTimeMilliseconds(),
+        };
+
+        await dispatcher.DispatchAsync(request, CancellationToken.None);
+        var duplicate = await dispatcher.DispatchAsync(request.Clone(), CancellationToken.None);
+
+        duplicate.Phase.Should().Be(DispatchPhase.RejectedDuplicate);
+        // ADR-0021 only locks the duplicate receipt phase and the fact that the
+        // duplicate request must not be enqueued again. The exact duplicate
+        // receipt payload shape (e.g. whether RunActorId/AcceptedAtUnixMs are
+        // populated) is intentionally left to the eventual implementation.
+        streamProvider.Produced.Should().ContainSingle("duplicate dispatch must not enqueue a second run start command");
+    }
+
     [Fact]
     public void ApplyReplyProduced_HistoricalEventWithoutReplyText_MarksAsAlreadyDispatched()
     {
@@ -319,7 +410,7 @@ public async Task HandleStartAsync_TerminalRun_ShouldNotEmitDuplicateReadyEvent(
 
         replyGenerator.CallCount.Should().Be(1);
         handled.Should().ContainSingle(e => e.Payload.Is(LlmReplyReadyEvent.Descriptor));
-        runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced);
+        runtime.State.Status.Should().Be(AgentRunStatus.ReplyHandedOff);
     }
 
     [Fact]
@@ -607,8 +698,8 @@ await runtime.HandleCleanupAsync(new AgentRunCleanupRequested
         });
 
         actorRuntime.DestroyedIds.Should().BeEmpty();
-        runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced);
-        runtime.State.ReplyDispatched.Should().BeTrue();
+        runtime.State.Status.Should().Be(AgentRunStatus.ReplyHandedOff);
+        runtime.State.CleanupCompletedAtUnixMs.Should().Be(0);
     }
 
     [Fact]
@@ -675,8 +766,7 @@ public async Task HandleStartAsync_TerminalFailure_ShouldNotDispatchDuplicateFai
         await runtime.HandleStartAsync(request.Clone());
 
         handled.Should().ContainSingle(e => e.Payload.Is(LlmReplyReadyEvent.Descriptor));
-        runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced);
-        runtime.State.ReplyDispatched.Should().BeTrue();
+        runtime.State.Status.Should().Be(AgentRunStatus.ReplyHandedOff);
         runtime.State.ProducedTerminalState.Should().Be(LlmReplyTerminalState.Failed);
     }
 

From 04a8920b71c4d49b64a52dd4ba07da3be94e6db5 Mon Sep 17 00:00:00 2001
From: github-aelf <yue.zheng@aelf.io>
Date: Fri, 15 May 2026 18:04:33 +0800
Subject: [PATCH 5/5] Fix reply generator regression assertions

---
 .../ConversationReplyGeneratorTests.cs                        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs
index cb4110681..c2f688f9b 100644
--- a/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs
+++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs
@@ -229,7 +229,7 @@ public async Task GenerateReplyAsync_WithSkillRegistryButNoRemoteFetcher_LogsWar
                 streamingSink: null,
                 CancellationToken.None);
 
-            reply.Should().Be("ok");
+            reply.Text.Should().Be("ok");
         }
 
         logger.WarningMessages.Should().ContainSingle(message =>
@@ -260,7 +260,7 @@ public async Task GenerateReplyAsync_WithStreamingSink_EmitsPlaceholderThenFinal
             sink,
             CancellationToken.None);
 
-        reply.Should().Be("done");
+        reply.Text.Should().Be("done");
         providerFactory.Requests.Should().HaveCount(2);
         providerFactory.Requests[1].Messages.Should().Contain(message => message.Role == "tool");
         sink.Emissions.Should().Equal("…", "done");