From 4b9fedc999a3dab9cc61ba12df6bb14d0f45ddf7 Mon Sep 17 00:00:00 2001 From: github-aelf Date: Wed, 13 May 2026 14:50:52 +0800 Subject: [PATCH 1/5] Add lark-bot reply-chain coverage audit --- ...ark-bot-reply-chain-test-coverage-audit.md | 307 ++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md diff --git a/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md b/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md new file mode 100644 index 000000000..bc17d851a --- /dev/null +++ b/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md @@ -0,0 +1,307 @@ +--- +title: feature/lark-bot reply-chain test coverage audit +status: active +owner: codex +issue: 634 +branch: test/2026-05-12_lark-bot-reply-chain-regressions +--- + +# `feature/lark-bot` 回复链测试覆盖审计 + +> 对应 issue: [#634](https://github.com/aevatarAI/aevatar/issues/634) +> +> 目标:不是罗列“这个分支有很多测试”,而是明确回答四件事: +> +> 1. 这条回复链现在的高风险点是什么 +> 2. 已有测试到底锁住了哪些不变量 +> 3. 还缺哪些关键回归保障 +> 4. 后续 `#635 / #636 / #637` 应该先打哪里 + +## 范围与基线 + +本次审计以 `feature/lark-bot` 当前链路为基线,重点查看以下六组测试: + +- `test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs` +- `test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs` +- `test/Aevatar.GAgents.ChannelRuntime.Tests/ChannelConversationTurnRunnerTests.cs` +- `test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs` +- `test/Aevatar.GAgents.ChannelRuntime.Tests/TurnStreamingReplySinkTests.cs` +- `test/Aevatar.AI.Tests/ToolCallLoopTests.cs` + +另外,以下测试与护栏作为辅助证据使用,用来校正对 `ChatRuntime`、AI 组件边界和结构约束的判断: + +- `test/Aevatar.AI.Tests/ChatRuntimeStreamingBufferTests.cs` +- `test/Aevatar.AI.Tests/AIComponentCoverageTests.cs` +- `test/Aevatar.Architecture.Tests/Rules/*` +- `tools/ci/*guard.sh` + +并对照以下实现文件确认风险面: + +- `agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.cs` +- `agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.LarkCardStreaming.cs` +- `agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.NyxRelayStreaming.cs` +- `agents/Aevatar.GAgents.NyxidChat/AgentRunGAgent.cs` +- `agents/Aevatar.GAgents.NyxidChat/ConversationReplyGenerator.cs` +- `agents/Aevatar.GAgents.Channel.Runtime/TurnStreamingReplySink.cs` +- `src/Aevatar.AI.Core/Tools/ToolCallLoop.cs` +- `src/Aevatar.AI.Core/Chat/ChatRuntime.cs` + +当前主链路为: + +`ConversationGAgent -> IChannelLlmReplyRunDispatcher -> AgentRunDispatcher -> AgentRunGAgent -> ConversationReplyGenerator -> ChatRuntime` + +## 总结论 + +先说结论:这个分支**不是“测试不够”**,而是**测试分布不均**。 + +已经覆盖得比较强的部分: + +- `ConversationGAgent` 的 dedup、retry、reply token strip/re-enrich、streaming/text fallback +- `TurnStreamingReplySink` 的 throttle/cap/finalize/dispatch race +- `ChannelConversationTurnRunner` 的 inbound metadata、reaction、relay/direct reply、card action +- `ToolCallLoop` 的 tool round、middleware、reasoning propagation、length recovery + +相对薄弱的部分: + +- `ConversationGAgent -> AgentRunGAgent` 之间“accepted / committed / delivered”语义边界是否足够诚实 +- `AgentRunGAgent` 的 duplicate terminal signal / repeated callback / stale continuation 组合情形 +- `ConversationReplyGenerator` 与 `ToolCallLoop` 的 closeout 联动,而不只是各自单测 +- 新引入 seam `IChannelLlmReplyRunDispatcher` 的结构护栏 + +所以,`#634` 的产出不应该是“建议多补一些 happy path”,而应该是: + +1. 承认哪些地方已经很扎实,避免重复造轮子 +2. 明确指出后续只需要补少量但高价值的回归 +3. 把结构护栏缺口单独拎出来,交给 `#637` + +## 风险矩阵 + +| 风险点 | 现有覆盖 | 结论 | 缺口 / 后续动作 | 本轮优先级 | +| --- | --- | --- | --- | --- | +| `ConversationGAgent` 入站 dedup 与 retry ownership | `ConversationGAgentDedupTests` 覆盖 duplicate activity、duplicate command、retry scheduled / success / exhausted / permanent failure | 覆盖强,主不变量已锁住 | 暂不补功能测试 | P2 | +| relay reply token 不进入 committed fact | `HandleNyxRelayInboundActivityAsync_NeverPersistsReplyTokenIntoEventStore`、`StripsReplyTokenFromPersistedNeedsLlmReplyEvent_ButKeepsItOnRunCommandCopy`、`RehydratesRelayToken...`、`PrefersRunEchoedReplyToken...` | 覆盖强,边界意识明确 | 后续只需补 chain-level 诚实性,不必再补“有没有 strip”基础测试 | P1 | +| `ConversationGAgent` streaming/text/card fallback | `ConversationGAgentDedupTests` 大量覆盖 text chunk、final fallback、card create/stream/finalize | 覆盖强 | 暂不补同类 happy path | P2 | +| `AgentRunDispatcher` 创建 run actor 并发起 start | `DispatchAsync_ShouldCreateRunActorAndDispatchStartCommand` | 基本覆盖到位 | 不缺基础测试 | P2 | +| `AgentRunGAgent` duplicate start / timeout / empty / throw / missing target / missing activity / stale / missing token | `AgentRunGAgentTests` 已覆盖对应单点场景 | 覆盖中上,单点保护不少 | 还缺“terminal 后重复信号”和“组合情形”回归 | P1 | +| `AgentRunGAgent` cleanup 语义 | 已覆盖 schedule cleanup、cleanup destroy | 基础覆盖到位 | 还缺“非 terminal cleanup 请求是否 no-op / 幂等”类测试 | P2 | +| `AgentRunGAgent` streaming ready/card chunk/text-only 路径 | 已覆盖 streaming enabled/disabled、card mode、non-relay path | 覆盖中上 | 可暂缓 | P2 | +| `ChannelConversationTurnRunner` inbound metadata / relay reaction / workflow card action / reply delivery | 覆盖很广,包括 sender binding、owner prefs、reaction clear、interactive relay reply | 覆盖强 | 暂不扩 | P2 | +| `TurnStreamingReplySink` throttle/cap/finalize race | 15 个测试,覆盖 cap、throttle、timer、dispatch in flight、idempotent dispose、duplicate suppression、dispatch throw | 覆盖非常强 | 本轮不需要再做大面积补测,只需补 review 指向的新竞态时再加 | P2 | +| `ConversationReplyGenerator` placeholder / owner vs sender prefs / route fallback / approval middleware | 已覆盖主要分支 | 覆盖中等 | 缺 warning/closeout 联动测试 | P1 | +| `ToolCallLoop` tool round / middleware / request identity / reasoning propagation / length recovery | 覆盖强 | 单体循环语义稳定 | 缺和 generator / runtime 的联动 closeout 测试 | P1 | +| `ChatRuntime` 多轮流式收尾语义 | `ChatRuntimeStreamingBufferTests` 已直接覆盖 `ChatStreamAsync` 的流式 chunk、tool-call follow-up、reasoning 透传;另有 `ToolCallLoopTests` 与 AI tests 辅助覆盖 | 覆盖中等,流式主路径已有直接保护 | 仍缺更贴 reply-chain closeout 的联动表达,建议只补一到两个 closeout 级回归,不要大规模重写测试 | P1 | +| 新 seam `IChannelLlmReplyRunDispatcher` 的依赖方向 | 现有 architecture/channel guard 已限制“不要直连 NyxIdChatGAgent”,但没有直接锁住这条 seam | 结构护栏缺口明确 | 在 `#637` 增加 architecture test 或 CI guard | P0 | + +## 分文件审计 + +### 1. `AgentRunGAgentTests` + +现有覆盖亮点: + +- run actor 创建与 `AgentRunStartRequested` 投递 +- duplicate start 幂等 +- ready/drop signal not accepted 时的 retry +- unexpected exception / timeout / empty reply / generator throw +- relay token echo、missing token drop、stale request drop +- streaming text / card 路径 +- owner LLM config 与 bearer token 透传 +- terminal cleanup schedule + destroy + +结论: + +- 这组测试已经不是“空白区” +- 真正缺的不是单一错误分支,而是**terminal 之后重复信号是否还可能造成二次回传 / 二次调度** + +建议补点: + +- terminal 状态下再次收到 `AgentRunStartRequested` / cleanup / internal retry 时的 no-op 幂等 +- duplicate `LlmReplyReadyEvent` / `Dropped` / `Failed` 对 conversation 端是否仍可能造成二次 closeout + +对应后续: + +- 归入 `#635` + +### 2. `ConversationGAgentDedupTests` + +现有覆盖亮点: + +- activity / command dedup +- inbound retry 调度归 actor 所有 +- reply token strip-on-persist 与 runtime re-enrich +- 运行后 `ReplyToken` 回传优先级 +- deferred reply dispatch +- relay streaming chunk / text fallback / card mode fallback +- final edit 与 partial degradation + +结论: + +- 这是当前分支覆盖最厚的一块之一 +- 很多“边界安全”基础测试已经有了,后续不需要再从零补“有没有 strip token” + +仍有缺口: + +- 更高一层的“accepted / committed / delivered”语义诚实性还没有被单独表达出来 +- 例如:conversation 持久化了 `NeedsLlmReplyEvent`,并不等于用户已经收到 reply;目前这类语义主要靠代码结构理解,而不是显式测试名称锁住 + +对应后续: + +- 归入 `#635` 做 chain-level contract test + +### 3. `ChannelConversationTurnRunnerTests` + +现有覆盖亮点: + +- inbound metadata 组装 +- owner/sender config layering +- relay typing reaction post / clear +- slash / card action / workflow resume +- direct reply / relay reply / adapter rejection +- `OnReplyDeliveredAsync` 对 streaming path 的 reaction clear + +结论: + +- 这组测试已经像一个“适配层回归套件” +- 不建议在当前 issue 再扩 happy path + +仍有缺口: + +- 和 `AgentRunGAgent` 的 end-to-end 语义边界不是在这层表达的 +- 所以不应把后续的 actor 语义测试继续塞到 runner tests 里 + +对应后续: + +- 本轮只作为引用基线,不新增任务 + +### 4. `TurnStreamingReplySinkTests` + +现有覆盖亮点: + +- interim cap 后 stash,不提前 dispatch +- dispatch 中 stash + throttle gate + deferred timer +- finalize bypass throttle +- finalize in flight wait +- pending == last emitted duplicate suppression +- dispatch throw swallow +- dispose idempotency + +结论: + +- 这组已经相当扎实,是本链路里并发回归保护最强的一部分 +- `#636` 不应该被理解成“这里完全没测”,而是“只补后续 review 指向的新竞态” + +仍有缺口: + +- 当前没有发现明显大洞 +- 若要补,也应只补“新 code path 引入的新 race”,不要做覆盖率型补测 + +对应后续: + +- `#636` 的范围应收窄,避免过度施工 + +### 5. `ConversationReplyGeneratorTests` + +现有覆盖亮点: + +- relay callback URL 注入 +- placeholder emit / skip +- approval middleware per turn +- owner vs sender preferences layering +- route fallback / no token fallback + +结论: + +- 配置和偏好层面的覆盖不错 +- 但 generator 与 `ToolCallLoop` / `ChatRuntime` 的 closeout 联动还比较少 + +主要缺口: + +- `SkillRegistry` 存在但 `IRemoteSkillFetcher` 缺失时的 warning 行为未见直接测试 +- 还缺“tool call -> tool result -> final answer”在 generator 这一层只收尾一次的测试表达 + +对应后续: + +- 归入 `#637` + +### 6. `ToolCallLoopTests` + +现有覆盖亮点: + +- no tool / tool then follow-up +- request id 与 per-call metadata +- hook / middleware mutation 与 terminate +- max rounds exhausted +- length recovery +- reasoning content propagation +- DSML tool call 变体 + +结论: + +- `ToolCallLoop` 的单体语义已经很全 +- 目前缺的不是 loop 内部逻辑,而是它和 `ConversationReplyGenerator` / `ChatRuntime` 的交界处 + +主要缺口: + +- final answer closeout 与 streaming sink / reply ready 的整体联动没有被直接表达 +- “warning path + final content + no duplicate closeout” 这种跨层问题还未集中锁住 + +对应后续: + +- 归入 `#637` + +## 已有护栏与缺口 + +已有护栏: + +- `test/Aevatar.Architecture.Tests/Rules/ForbiddenPatternTests.cs` + - 已限制中间层 `actor/entity/run/session` ID -> context 字典事实态 +- `tools/ci/channel_relay_nyx_chat_direct_create_guard.sh` + - 已限制 channel relay/runtime 直连 `NyxIdChatGAgent` +- `test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs` + - 已限制外部入口绕过 `ConversationGAgent` + +当前缺口: + +- 还没有一个专门护栏明确锁住: + - `ConversationGAgent` 依赖的是 `IChannelLlmReplyRunDispatcher` + - 而不是重新回退为直接依赖旧 inbox runtime 或具体 NyxIdChat 实现 + +这说明: + +- `#637` 很适合新增一个非常小但高价值的 architecture test / CI guard + +## 建议执行顺序 + +基于这次审计,后续 issue 的执行顺序建议是: + +1. `#635` + - 先补 `ConversationGAgent <-> AgentRunGAgent` 的 chain-level actor 语义与 credential boundary +2. `#637` + - 再补 closeout 回归和最小结构护栏 +3. `#636` + - 最后只做 review 指向的新 sink 竞态回归,不做大面积补测 + +原因很简单: + +- `TurnStreamingReplySink` 现在不是最薄的位置 +- 真正最薄的是“跨 actor handoff 的诚实性”和“closeout 是否只发生一次” + +## 结论清单 + +可以明确认为“已足够,不需要优先再补”的: + +- `ConversationGAgent` dedup / retry 基础语义 +- relay token strip/re-enrich 基础机制 +- `TurnStreamingReplySink` 的大部分并发收尾行为 +- `ToolCallLoop` 的单体循环逻辑 +- `ChannelConversationTurnRunner` 的大部分 adapter / reaction / relay 分支 + +可以明确认为“下一步最值得补”的: + +- `ConversationGAgent -> AgentRunGAgent` handoff 的 chain-level contract tests +- `AgentRunGAgent` terminal 后重复信号 / cleanup / retry 组合幂等 +- `ConversationReplyGenerator` 与 `ToolCallLoop` / `ChatRuntime` 的 closeout 联动 +- `IChannelLlmReplyRunDispatcher` 这条新 seam 的结构护栏 + +一句话总结: + +`feature/lark-bot` 现在缺的不是“更多测试”,而是**更少但更准的回归测试与护栏**。 From 9462fa129f2885b5c8044a833b37fb5c548a1064 Mon Sep 17 00:00:00 2001 From: github-aelf Date: Wed, 13 May 2026 18:09:22 +0800 Subject: [PATCH 2/5] Add actor handoff regression tests for lark reply chain --- .../ConversationGAgentDedupTests.cs | 34 +++++++++ .../AgentRunGAgentTests.cs | 73 +++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs b/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs index 565685d11..3b5b183b2 100644 --- a/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs +++ b/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs @@ -356,6 +356,40 @@ public async Task HandleInboundActivityAsync_WhenRunnerRequestsDeferredReply_Per parsed.Activity.Id.ShouldBe("act-llm"); } + [Fact] + public async Task HandleInboundActivityAsync_WhenRunDispatcherAcceptsRequest_ShouldNotPersistCompletedReplyUntilReadyArrives() + { + // Accepted-for-run is weaker than committed/user-visible reply. The actor may persist + // NeedsLlmReplyEvent and dispatch it immediately, but must not emit a completed fact + // until the run actor sends LlmReplyReadyEvent (or a terminal failure) back. + var dispatcher = new RecordingRunDispatcher(); + var runner = new RecordingTurnRunner + { + InboundResultFactory = activity => ConversationTurnResult.LlmReplyRequested( + new NeedsLlmReplyEvent + { + CorrelationId = activity.Id, + TargetActorId = "conversation:actor", + RegistrationId = "reg-1", + Activity = activity.Clone(), + RequestedAtUnixMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), + }), + }; + var (agent, store) = CreateAgent(runner, "conv-accepted-not-committed", dispatcher); + + await agent.HandleInboundActivityAsync(CreateActivity("act-accepted-only", "conv:slack:C1")); + + dispatcher.Dispatched.Count.ShouldBe(1); + runner.LlmReplyCount.ShouldBe(0); + agent.State.PendingLlmReplyRequests.ShouldContain(req => req.CorrelationId == "act-accepted-only"); + + var events = await store.GetEventsAsync(agent.Id); + events.Count.ShouldBe(1); + events[0].EventType.ShouldContain(nameof(NeedsLlmReplyEvent)); + events.ShouldNotContain(record => + record.EventType.Contains(nameof(ConversationTurnCompletedEvent), StringComparison.Ordinal)); + } + [Fact] public async Task HandleLlmReplyReadyAsync_WhenDuplicateCorrelationId_CollapsesToSingleOutboundCommit() { diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs index 105740f84..c4beb003b 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs @@ -280,6 +280,44 @@ await runtime.HandleStartAsync(new NeedsLlmReplyEvent cleanupCommand.RunId.Should().Be("corr-cleanup-schedule"); } + [Fact] + public async Task HandleStartAsync_TerminalRun_ShouldNotEmitDuplicateReadyEvent() + { + var actor = Substitute.For(); + actor.Id.Returns("actor-1"); + var handled = new List(); + actor.When(x => x.HandleEventAsync(Arg.Any(), Arg.Any())) + .Do(call => handled.Add(call.Arg())); + var actorRuntime = new DispatchingActorRuntime(("actor-1", actor)); + var scheduler = new RecordingCallbackScheduler(); + var replyGenerator = new RecordingReplyGenerator(() => false) { ReplyText = "ok" }; + var runtime = CreateRunAgent( + actorRuntime, + replyGenerator, + new AsyncLocalInteractiveReplyCollector(), + new Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions + { + InteractiveRepliesEnabled = true, + StreamingRepliesEnabled = false, + }, + callbackScheduler: scheduler); + var request = new NeedsLlmReplyEvent + { + CorrelationId = "corr-terminal-idempotent", + TargetActorId = "actor-1", + RegistrationId = "reg-1", + Activity = BuildRelayActivity(), + ReplyToken = "relay-token-terminal-idempotent", + }; + + await runtime.HandleStartAsync(request); + await runtime.HandleStartAsync(request.Clone()); + + replyGenerator.CallCount.Should().Be(1); + handled.Should().ContainSingle(e => e.Payload.Is(LlmReplyReadyEvent.Descriptor)); + runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced); + } + [Fact] public async Task HandleCleanupAsync_ShouldDestroyTerminalRunActor() { @@ -312,6 +350,41 @@ await runtime.HandleCleanupAsync(new AgentRunCleanupRequested actorRuntime.DestroyedIds.Should().Contain(runtime.Id); } + [Fact] + public async Task HandleStartAsync_TerminalDrop_ShouldNotDispatchDuplicateDropNotification() + { + var actor = Substitute.For(); + actor.Id.Returns("actor-1"); + var handled = new List(); + actor.When(x => x.HandleEventAsync(Arg.Any(), Arg.Any())) + .Do(call => handled.Add(call.Arg())); + var actorRuntime = new DispatchingActorRuntime(("actor-1", actor)); + var runtime = CreateRunAgent( + actorRuntime, + new RecordingReplyGenerator(() => false) { ReplyText = "ok" }, + new AsyncLocalInteractiveReplyCollector(), + new Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions + { + InteractiveRepliesEnabled = true, + StreamingRepliesEnabled = false, + }); + + var request = new NeedsLlmReplyEvent + { + CorrelationId = "corr-terminal-drop-idempotent", + TargetActorId = "actor-1", + RegistrationId = "reg-1", + Activity = BuildRelayActivity(), + // Relay request with no command-carried ReplyToken should drop before LLM execution. + }; + + await runtime.HandleStartAsync(request); + await runtime.HandleStartAsync(request.Clone()); + + handled.Should().ContainSingle(e => e.Payload.Is(DeferredLlmReplyDroppedEvent.Descriptor)); + runtime.State.Status.Should().Be(AgentRunStatus.Dropped); + } + [Fact] public async Task HandleStartAsync_OnOutputDispatchFailure_PersistsProducedReply_AndRetryReDispatchesWithoutRerunningLlm() { From f60337227d5008d0320ba32a0ee749418c16d640 Mon Sep 17 00:00:00 2001 From: github-aelf Date: Thu, 14 May 2026 15:14:36 +0800 Subject: [PATCH 3/5] Add lark-bot reply chain regression tests --- ...ark-bot-reply-chain-test-coverage-audit.md | 10 +- .../Rules/ChannelArchitectureTests.cs | 17 +++ .../AgentRunGAgentTests.cs | 92 ++++++++++++++++ .../ConversationReplyGeneratorTests.cs | 102 ++++++++++++++++++ .../TurnStreamingReplySinkTests.cs | 71 ++++++++++++ 5 files changed, 291 insertions(+), 1 deletion(-) diff --git a/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md b/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md index bc17d851a..c9610190d 100644 --- a/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md +++ b/docs/audit-scorecard/2026-05-13-lark-bot-reply-chain-test-coverage-audit.md @@ -89,7 +89,7 @@ branch: test/2026-05-12_lark-bot-reply-chain-regressions | `TurnStreamingReplySink` throttle/cap/finalize race | 15 个测试,覆盖 cap、throttle、timer、dispatch in flight、idempotent dispose、duplicate suppression、dispatch throw | 覆盖非常强 | 本轮不需要再做大面积补测,只需补 review 指向的新竞态时再加 | P2 | | `ConversationReplyGenerator` placeholder / owner vs sender prefs / route fallback / approval middleware | 已覆盖主要分支 | 覆盖中等 | 缺 warning/closeout 联动测试 | P1 | | `ToolCallLoop` tool round / middleware / request identity / reasoning propagation / length recovery | 覆盖强 | 单体循环语义稳定 | 缺和 generator / runtime 的联动 closeout 测试 | P1 | -| `ChatRuntime` 多轮流式收尾语义 | `ChatRuntimeStreamingBufferTests` 已直接覆盖 `ChatStreamAsync` 的流式 chunk、tool-call follow-up、reasoning 透传;另有 `ToolCallLoopTests` 与 AI tests 辅助覆盖 | 覆盖中等,流式主路径已有直接保护 | 仍缺更贴 reply-chain closeout 的联动表达,建议只补一到两个 closeout 级回归,不要大规模重写测试 | P1 | +| `ChatRuntime` 多轮流式收尾语义 | `ChatRuntimeStreamingBufferTests` 已直接覆盖 `ChatStreamAsync` 的流式 chunk、tool-call follow-up、reasoning 透传;另有 `ToolCallLoopTests` 与 AI tests 辅助覆盖 | 覆盖中等,流式主路径已有直接保护 | `outer stream` 是否应该把 per-round terminal signal 收口成 single overall terminal chunk 目前仍属目标态问题,不是已接受契约;后续应先定契约,再决定是否补 failing test 或实现改动 | P1 | | 新 seam `IChannelLlmReplyRunDispatcher` 的依赖方向 | 现有 architecture/channel guard 已限制“不要直连 NyxIdChatGAgent”,但没有直接锁住这条 seam | 结构护栏缺口明确 | 在 `#637` 增加 architecture test 或 CI guard | P0 | ## 分文件审计 @@ -193,6 +193,7 @@ branch: test/2026-05-12_lark-bot-reply-chain-regressions - 当前没有发现明显大洞 - 若要补,也应只补“新 code path 引入的新 race”,不要做覆盖率型补测 +- 本轮新增的两条回归更接近这个方向:一条锁 `FinalizeAsync` 等待 drain 时 `Dispose()` 不应再把 stashed final flush 发出去;一条锁 deferred flush dispatch 失败后,后续 delta 仍可恢复推进且不重复计数 对应后续: @@ -212,6 +213,7 @@ branch: test/2026-05-12_lark-bot-reply-chain-regressions - 配置和偏好层面的覆盖不错 - 但 generator 与 `ToolCallLoop` / `ChatRuntime` 的 closeout 联动还比较少 +- 其中 `ChatRuntime` 的 terminal chunk contract 目前还没有稳定成仓库内共识,不适合在这一层直接写死“single overall terminal chunk”断言;这更像后续需要和研发先对齐的目标态 主要缺口: @@ -259,6 +261,12 @@ branch: test/2026-05-12_lark-bot-reply-chain-regressions - `test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs` - 已限制外部入口绕过 `ConversationGAgent` +新增补强: + +- `test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs` + - 可以补一条直接锁 `ConversationGAgent -> IChannelLlmReplyRunDispatcher` seam 的最小门禁 + - 但当前更适合先作为 regex / source-text 级最小护栏看待,不应误认为已经升级为 Roslyn / 编译级依赖门禁 + 当前缺口: - 还没有一个专门护栏明确锁住: diff --git a/test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs b/test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs index 4620727bf..1ef115003 100644 --- a/test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs +++ b/test/Aevatar.Architecture.Tests/Rules/ChannelArchitectureTests.cs @@ -354,6 +354,23 @@ public void DurableInboxImplementations_Must_DependOn_AsyncStream_ChatActivity() + string.Join("\n", violators)); } + [Fact] + public void ConversationGAgent_SourceTextGuard_ShouldReference_RunDispatcherSeam_AndAvoid_Concrete_RunOrInbox_Runtime_Types() + { + var conversationPath = ChannelSourceIndex.EnumerateProductionSourceFiles() + .Single(path => path.EndsWith( + "/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.cs", + System.StringComparison.Ordinal)); + var text = File.ReadAllText(conversationPath); + + // Minimal source-text guard only: keeps the intended seam visible in review/CI, + // but is not a Roslyn/compile-level dependency rule. + Assert.Matches(@"\bIChannelLlmReplyRunDispatcher\b", text); + Assert.DoesNotMatch(@"\bAgentRunDispatcher\b", text); + Assert.DoesNotMatch(@"\bDurableInboxSubscriber\b", text); + Assert.DoesNotMatch(@"\bIChannelDurableInbox\b", text); + } + private static bool IsAllowedOutboundSendCaller(string normalizedPath) { if (normalizedPath.Contains("/agents/channels/", System.StringComparison.Ordinal)) diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs index c4beb003b..4ccf482d1 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs @@ -350,6 +350,64 @@ await runtime.HandleCleanupAsync(new AgentRunCleanupRequested actorRuntime.DestroyedIds.Should().Contain(runtime.Id); } + [Fact] + public async Task HandleCleanupAsync_ShouldIgnoreNonTerminalRun() + { + var actor = Substitute.For(); + actor.Id.Returns("actor-1"); + var actorRuntime = new DispatchingActorRuntime(("actor-1", actor)); + var runtime = CreateRunAgent( + actorRuntime, + new RecordingReplyGenerator(() => false) { ReplyText = "ok" }, + new AsyncLocalInteractiveReplyCollector(), + new Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions + { + InteractiveRepliesEnabled = true, + StreamingRepliesEnabled = false, + }); + + await runtime.HandleCleanupAsync(new AgentRunCleanupRequested + { + RunId = "corr-non-terminal-cleanup", + }); + + actorRuntime.DestroyedIds.Should().BeEmpty(); + } + + [Fact] + public async Task HandleCleanupAsync_ShouldIgnoreMismatchedTerminalRunId() + { + var actor = Substitute.For(); + actor.Id.Returns("actor-1"); + var actorRuntime = new DispatchingActorRuntime(("actor-1", actor)); + var runtime = CreateRunAgent( + actorRuntime, + new RecordingReplyGenerator(() => false) { ReplyText = "ok" }, + new AsyncLocalInteractiveReplyCollector(), + new Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions + { + InteractiveRepliesEnabled = true, + StreamingRepliesEnabled = false, + }); + + await runtime.HandleStartAsync(new NeedsLlmReplyEvent + { + CorrelationId = "corr-cleanup-mismatch", + TargetActorId = "actor-1", + RegistrationId = "reg-1", + Activity = BuildRelayActivity(), + ReplyToken = "relay-token-cleanup-mismatch", + }); + await runtime.HandleCleanupAsync(new AgentRunCleanupRequested + { + RunId = "corr-some-other-run", + }); + + actorRuntime.DestroyedIds.Should().BeEmpty(); + runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced); + runtime.State.ReplyDispatched.Should().BeTrue(); + } + [Fact] public async Task HandleStartAsync_TerminalDrop_ShouldNotDispatchDuplicateDropNotification() { @@ -385,6 +443,40 @@ public async Task HandleStartAsync_TerminalDrop_ShouldNotDispatchDuplicateDropNo runtime.State.Status.Should().Be(AgentRunStatus.Dropped); } + [Fact] + public async Task HandleStartAsync_TerminalFailure_ShouldNotDispatchDuplicateFailureReadyEvent() + { + var collector = new AsyncLocalInteractiveReplyCollector(); + var replyGenerator = new ThrowingReplyGenerator(new InvalidOperationException("boom")); + var actor = Substitute.For(); + actor.Id.Returns("channel-conversation:lark:group:oc_group_chat_1"); + var handled = new List(); + actor.When(x => x.HandleEventAsync(Arg.Any(), Arg.Any())) + .Do(call => handled.Add(call.Arg())); + var actorRuntime = new DispatchingActorRuntime(("actor-1", actor)); + var runtime = CreateRunAgent( + actorRuntime, + replyGenerator, + collector, + new Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions { InteractiveRepliesEnabled = true }); + var request = new NeedsLlmReplyEvent + { + CorrelationId = "corr-terminal-failed-idempotent", + TargetActorId = "actor-1", + RegistrationId = "reg-1", + Activity = BuildRelayActivity(), + ReplyToken = "relay-token-terminal-failed-idempotent", + }; + + await runtime.HandleStartAsync(request); + await runtime.HandleStartAsync(request.Clone()); + + handled.Should().ContainSingle(e => e.Payload.Is(LlmReplyReadyEvent.Descriptor)); + runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced); + runtime.State.ReplyDispatched.Should().BeTrue(); + runtime.State.ProducedTerminalState.Should().Be(LlmReplyTerminalState.Failed); + } + [Fact] public async Task HandleStartAsync_OnOutputDispatchFailure_PersistsProducedReply_AndRetryReDispatchesWithoutRerunningLlm() { diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs index 3bfb1d327..a804c03e3 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs @@ -8,6 +8,7 @@ using Aevatar.GAgents.Channel.NyxIdRelay; using Aevatar.GAgents.Channel.Runtime; using Aevatar.GAgents.NyxidChat; +using Microsoft.Extensions.Logging; namespace Aevatar.GAgents.ChannelRuntime.Tests; @@ -161,6 +162,75 @@ public async Task GenerateReplyAsync_CreatesApprovalMiddlewarePerTurn() approvalHandler.RequestCount.Should().Be(4); } + [Fact] + public async Task GenerateReplyAsync_WithSkillRegistryButNoRemoteFetcher_LogsWarningOnlyOnceAcrossTurns() + { + var logger = new ListLogger(); + var skillRegistry = new SkillRegistry(); + skillRegistry.Register(new SkillDefinition + { + Name = "remote-skill", + Description = "Remote skill", + Instructions = "Does remote work", + Source = SkillSource.Remote, + RemoteId = "remote-skill-id", + }); + var generator = new NyxIdConversationReplyGenerator( + new RecordingProviderFactory(), + skillRegistry: skillRegistry, + remoteSkillFetcher: null, + logger: logger); + + for (var i = 0; i < 2; i++) + { + var reply = await generator.GenerateReplyAsync( + new ChatActivity + { + Id = $"msg-warning-{i}", + Conversation = new ConversationReference { CanonicalKey = $"lark:dm:user-warning-{i}" }, + Content = new MessageContent { Text = "hello" }, + }, + new Dictionary(), + streamingSink: null, + CancellationToken.None); + + reply.Should().Be("ok"); + } + + logger.WarningMessages.Should().ContainSingle(message => + message.Contains("SkillRegistry is registered without IRemoteSkillFetcher", StringComparison.Ordinal)); + } + + [Fact] + public async Task GenerateReplyAsync_WithStreamingSink_EmitsPlaceholderThenFinalTextAcrossToolFollowUp() + { + var providerFactory = new ToolCallingProviderFactory(); + var generator = new NyxIdConversationReplyGenerator( + providerFactory, + toolSources: [new SingleToolSource(new ApprovalRequiredTool())], + relayOptions: new global::Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions + { + StreamingPlaceholderText = "…", + }); + var sink = new RecordingStreamingSink(); + + var reply = await generator.GenerateReplyAsync( + new ChatActivity + { + Id = "msg-tool-follow-up", + Conversation = new ConversationReference { CanonicalKey = "lark:dm:user-tool-follow-up" }, + Content = new MessageContent { Text = "run tool" }, + }, + new Dictionary(), + sink, + CancellationToken.None); + + reply.Should().Be("done"); + providerFactory.Requests.Should().HaveCount(2); + providerFactory.Requests[1].Messages.Should().Contain(message => message.Role == "tool"); + sink.Emissions.Should().Equal("…", "done"); + } + [Fact] public async Task GenerateReplyAsync_AppliesSenderPrefsOverChainOwnerDefault() { @@ -569,6 +639,8 @@ private sealed class ToolCallingProviderFactory : ILLMProviderFactory, ILLMProvi { public string Name => "tool-calling"; + public List Requests { get; } = []; + public ILLMProvider GetProvider(string name) => this; public ILLMProvider GetDefault() => this; @@ -582,6 +654,7 @@ public async IAsyncEnumerable ChatStreamAsync( LLMRequest request, [EnumeratorCancellation] CancellationToken ct = default) { + Requests.Add(request); if (request.Messages.Any(static message => message.Role == "tool")) { yield return new LLMStreamChunk { DeltaContent = "done" }; @@ -636,4 +709,33 @@ public Task RequestApprovalAsync(ToolApprovalRequest request return Task.FromResult(ToolApprovalResult.Denied("test denial")); } } + + private sealed class ListLogger : ILogger + { + public List WarningMessages { get; } = []; + + public IDisposable BeginScope(TState state) where TState : notnull => NullScope.Instance; + + public bool IsEnabled(LogLevel logLevel) => true; + + public void Log( + LogLevel logLevel, + EventId eventId, + TState state, + Exception? exception, + Func formatter) + { + if (logLevel == LogLevel.Warning) + WarningMessages.Add(formatter(state, exception)); + } + + private sealed class NullScope : IDisposable + { + public static readonly NullScope Instance = new(); + + public void Dispose() + { + } + } + } } diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/TurnStreamingReplySinkTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/TurnStreamingReplySinkTests.cs index 7d609bc4d..b0e5fadec 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/TurnStreamingReplySinkTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/TurnStreamingReplySinkTests.cs @@ -233,6 +233,77 @@ public async Task FinalizeAsync_DispatchInFlight_WaitsForFinalChunkOnWire() sink.ChunksEmitted.Should().Be(2); } + [Fact] + public async Task Dispose_WhenFinalizeIsAwaitingDrain_UnblocksWithoutDispatchingStashedFinalText() + { + var firstDispatchGate = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var envelopes = new List(); + var dispatchCount = 0; + + var dispatchPort = Substitute.For(); + dispatchPort.DispatchAsync("target-actor", Arg.Any(), Arg.Any()) + .Returns(call => + { + envelopes.Add(call.Arg()); + dispatchCount++; + return dispatchCount == 1 ? firstDispatchGate.Task : Task.CompletedTask; + }); + + var sink = CreateSink(dispatchPort, throttleMs: 0, out _); + + var deltaTask = sink.OnDeltaAsync("first", CancellationToken.None); + var finalizeTask = sink.FinalizeAsync("first plus final", CancellationToken.None); + + deltaTask.IsCompleted.Should().BeFalse(); + finalizeTask.IsCompleted.Should().BeFalse(); + envelopes.Should().ContainSingle(); + + sink.Dispose(); + await finalizeTask; + + envelopes.Should().ContainSingle("disposing the sink must drop the stashed final flush"); + + firstDispatchGate.SetResult(); + await deltaTask; + + envelopes.Should().ContainSingle(); + sink.ChunksEmitted.Should().Be(1); + } + + [Fact] + public async Task OnDeltaAsync_WhenDeferredFlushDispatchFails_LaterDeltaStillPublishesLatestTextOnce() + { + var dispatchAttempts = 0; + var dispatchPort = Substitute.For(); + dispatchPort.DispatchAsync("target-actor", Arg.Any(), Arg.Any()) + .Returns(_ => + { + dispatchAttempts++; + if (dispatchAttempts == 2) + return Task.FromException(new InvalidOperationException("boom")); + + return Task.CompletedTask; + }); + + var sink = CreateSink(dispatchPort, throttleMs: 750, out var time); + + await sink.OnDeltaAsync("chunk 1", CancellationToken.None); + time.Advance(TimeSpan.FromMilliseconds(100)); + await sink.OnDeltaAsync("chunk 1 + 2", CancellationToken.None); + + time.Advance(TimeSpan.FromMilliseconds(800)); + + sink.ChunksEmitted.Should().Be(1, "the timer-driven dispatch failed and must not count as emitted"); + + time.Advance(TimeSpan.FromMilliseconds(100)); + await sink.OnDeltaAsync("chunk 1 + 3", CancellationToken.None); + + sink.ChunksEmitted.Should().Be(2); + dispatchAttempts.Should().Be(3); + time.Advance(TimeSpan.FromMilliseconds(2000)); + sink.ChunksEmitted.Should().Be(2); + } + [Fact] public async Task PendingTimerEqualsLastEmitted_DoesNotEmitDuplicate() { From ad5f2bbc5ccab72f4a4438cc6a83c1bcf467a484 Mon Sep 17 00:00:00 2001 From: github-aelf Date: Fri, 15 May 2026 16:37:59 +0800 Subject: [PATCH 4/5] Update lark-bot reply chain regression coverage --- .../ConversationGAgentDedupTests.cs | 71 +++++++++++++ .../AgentRunGAgentTests.cs | 100 +++++++++++++++++- 2 files changed, 166 insertions(+), 5 deletions(-) diff --git a/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs b/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs index 1ab4759a8..70a1a574d 100644 --- a/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs +++ b/test/Aevatar.GAgents.Channel.Protocol.Tests/ConversationGAgentDedupTests.cs @@ -419,6 +419,77 @@ public async Task HandleLlmReplyReadyAsync_WhenDuplicateCorrelationId_CollapsesT events.Count.ShouldBe(3); events.Last().EventType.ShouldContain(nameof(ConversationTurnCompletedEvent)); events.Select(e => e.EventType).ShouldContain(s => s.Contains(nameof(LlmReplyDeliveredEvent))); + agent.State.LastReplyDelivery.RunId.ShouldBe("act-llm-ready"); + agent.State.LastReplyDelivery.OutcomeCase.ShouldBe(ReplyDeliveryStatus.OutcomeOneofCase.Delivered); + agent.State.LastReplyDelivery.Delivered.ChannelMessageId.ShouldBe(string.Empty); + } + + [Fact] + public async Task HandleLlmReplyReadyAsync_WhenDeliverySucceeds_UpdatesLastReplyDeliveryState() + { + var runner = new RecordingTurnRunner + { + LlmReplyResultFactory = reply => ConversationTurnResult.Sent( + "sent:llm:" + reply.CorrelationId, + reply.Outbound?.Clone() ?? new MessageContent { Text = "ack" }, + "bot", + new OutboundDeliveryContext + { + ReplyMessageId = "om_delivery_ok", + CorrelationId = reply.CorrelationId, + }), + }; + var (agent, store) = CreateAgent(runner, "conv-llm-delivered"); + + await agent.HandleLlmReplyReadyAsync(new LlmReplyReadyEvent + { + CorrelationId = "corr-delivered", + RegistrationId = "reg-1", + SourceActorId = "agent-run", + Activity = CreateActivity("corr-delivered", "conv:slack:C1"), + Outbound = new MessageContent { Text = "reply-from-llm" }, + TerminalState = LlmReplyTerminalState.Completed, + ReadyAtUnixMs = 43, + }); + + agent.State.LastReplyDelivery.RunId.ShouldBe("corr-delivered"); + agent.State.LastReplyDelivery.OutcomeCase.ShouldBe(ReplyDeliveryStatus.OutcomeOneofCase.Delivered); + agent.State.LastReplyDelivery.Delivered.ChannelMessageId.ShouldBe("om_delivery_ok"); + agent.State.LastReplyDelivery.Delivered.AckedAtUnixMs.ShouldBeGreaterThan(0); + + var events = await store.GetEventsAsync(agent.Id); + events.Select(e => e.EventType).ShouldContain(s => s.Contains(nameof(LlmReplyDeliveredEvent))); + } + + [Fact] + public async Task HandleLlmReplyReadyAsync_WhenDeliveryFails_UpdatesLastReplyDeliveryState() + { + var runner = new RecordingTurnRunner + { + LlmReplyResultFactory = _ => ConversationTurnResult.PermanentFailure("lark_send_failed", "lark rejected send"), + }; + var (agent, store) = CreateAgent(runner, "conv-llm-delivery-failed"); + + await agent.HandleLlmReplyReadyAsync(new LlmReplyReadyEvent + { + CorrelationId = "corr-delivery-failed", + RegistrationId = "reg-1", + SourceActorId = "agent-run", + Activity = CreateActivity("corr-delivery-failed", "conv:slack:C1"), + Outbound = new MessageContent { Text = "reply-from-llm" }, + TerminalState = LlmReplyTerminalState.Completed, + ReadyAtUnixMs = 43, + }); + + agent.State.LastReplyDelivery.RunId.ShouldBe("corr-delivery-failed"); + agent.State.LastReplyDelivery.OutcomeCase.ShouldBe(ReplyDeliveryStatus.OutcomeOneofCase.Failed); + agent.State.LastReplyDelivery.Failed.ErrorCode.ShouldBe("lark_send_failed"); + agent.State.LastReplyDelivery.Failed.ErrorMessage.ShouldBe("lark rejected send"); + agent.State.LastReplyDelivery.Failed.FailedAtUnixMs.ShouldBeGreaterThan(0); + + var events = await store.GetEventsAsync(agent.Id); + events.Select(e => e.EventType).ShouldContain(s => s.Contains(nameof(LlmReplyDeliveryFailedEvent))); + events.Last().EventType.ShouldContain(nameof(ConversationContinueFailedEvent)); } [Fact] diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs index 7349cce4f..3f28ffa27 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentRunGAgentTests.cs @@ -14,6 +14,7 @@ using Google.Protobuf.WellKnownTypes; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Time.Testing; using NSubstitute; using Xunit; @@ -50,6 +51,96 @@ await dispatcher.DispatchAsync(new NeedsLlmReplyEvent command.Request.ReplyToken.Should().Be("relay-token-dispatch"); } + [Fact] + public async Task DispatchAsync_ShouldReturnAcceptedOutcomeOnly() + { + var actorRuntime = new DispatchingActorRuntime(); + var streamProvider = new RecordingStreamProvider(); + var clock = new FakeTimeProvider(new DateTimeOffset(2026, 5, 15, 8, 0, 0, TimeSpan.Zero)); + var dispatcher = new AgentRunDispatcher( + actorRuntime, + streamProvider, + NullLogger.Instance, + clock); + + var result = await dispatcher.DispatchAsync(new NeedsLlmReplyEvent + { + CorrelationId = "corr-accepted", + TargetActorId = "conversation-actor", + RegistrationId = "reg-1", + Activity = BuildRelayActivity(), + ReplyToken = "relay-token-accepted", + RequestedAtUnixMs = clock.GetUtcNow().ToUnixTimeMilliseconds(), + }, CancellationToken.None); + + var produced = streamProvider.Produced.Should().ContainSingle().Subject; + result.Phase.Should().Be(DispatchPhase.Accepted); + result.CommandId.Should().NotBeNullOrWhiteSpace(); + result.CommandId.Should().Be(produced.Envelope.Id); + result.RunActorId.Should().Be(AgentRunGAgent.BuildActorId("corr-accepted")); + result.RunActorId.Should().Be(produced.StreamId); + result.AcceptedAtUnixMs.Should().Be(clock.GetUtcNow().ToUnixTimeMilliseconds()); + } + + [Fact] + public async Task DispatchAsync_WhenRequestIsStale_ShouldRejectBeforeEnqueue() + { + var actorRuntime = new DispatchingActorRuntime(); + var streamProvider = new RecordingStreamProvider(); + var clock = new FakeTimeProvider(new DateTimeOffset(2026, 5, 15, 8, 10, 0, TimeSpan.Zero)); + var dispatcher = new AgentRunDispatcher( + actorRuntime, + streamProvider, + NullLogger.Instance, + clock); + + var result = await dispatcher.DispatchAsync(new NeedsLlmReplyEvent + { + CorrelationId = "corr-stale", + TargetActorId = "conversation-actor", + RegistrationId = "reg-1", + Activity = BuildRelayActivity(), + RequestedAtUnixMs = clock.GetUtcNow().AddMinutes(-6).ToUnixTimeMilliseconds(), + }, CancellationToken.None); + + result.Phase.Should().Be(DispatchPhase.RejectedStale); + result.CommandId.Should().BeEmpty(); + result.RunActorId.Should().BeNull(); + result.AcceptedAtUnixMs.Should().Be(0); + streamProvider.Produced.Should().BeEmpty(); + } + + [Fact(Skip = "RejectedDuplicate is part of the ADR-0021 dispatcher contract but duplicate suppression is not implemented in AgentRunDispatcher yet.")] + public async Task DispatchAsync_WhenRequestIsDuplicate_ShouldReturnRejectedDuplicate_AndAvoidSecondEnqueue() + { + var actorRuntime = new DispatchingActorRuntime(); + var streamProvider = new RecordingStreamProvider(); + var clock = new FakeTimeProvider(new DateTimeOffset(2026, 5, 15, 8, 20, 0, TimeSpan.Zero)); + var dispatcher = new AgentRunDispatcher( + actorRuntime, + streamProvider, + NullLogger.Instance, + clock); + var request = new NeedsLlmReplyEvent + { + CorrelationId = "corr-duplicate-dispatch", + TargetActorId = "conversation-actor", + RegistrationId = "reg-1", + Activity = BuildRelayActivity(), + RequestedAtUnixMs = clock.GetUtcNow().ToUnixTimeMilliseconds(), + }; + + await dispatcher.DispatchAsync(request, CancellationToken.None); + var duplicate = await dispatcher.DispatchAsync(request.Clone(), CancellationToken.None); + + duplicate.Phase.Should().Be(DispatchPhase.RejectedDuplicate); + // ADR-0021 only locks the duplicate receipt phase and the fact that the + // duplicate request must not be enqueued again. The exact duplicate + // receipt payload shape (e.g. whether RunActorId/AcceptedAtUnixMs are + // populated) is intentionally left to the eventual implementation. + streamProvider.Produced.Should().ContainSingle("duplicate dispatch must not enqueue a second run start command"); + } + [Fact] public void ApplyReplyProduced_HistoricalEventWithoutReplyText_MarksAsAlreadyDispatched() { @@ -319,7 +410,7 @@ public async Task HandleStartAsync_TerminalRun_ShouldNotEmitDuplicateReadyEvent( replyGenerator.CallCount.Should().Be(1); handled.Should().ContainSingle(e => e.Payload.Is(LlmReplyReadyEvent.Descriptor)); - runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced); + runtime.State.Status.Should().Be(AgentRunStatus.ReplyHandedOff); } [Fact] @@ -607,8 +698,8 @@ await runtime.HandleCleanupAsync(new AgentRunCleanupRequested }); actorRuntime.DestroyedIds.Should().BeEmpty(); - runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced); - runtime.State.ReplyDispatched.Should().BeTrue(); + runtime.State.Status.Should().Be(AgentRunStatus.ReplyHandedOff); + runtime.State.CleanupCompletedAtUnixMs.Should().Be(0); } [Fact] @@ -675,8 +766,7 @@ public async Task HandleStartAsync_TerminalFailure_ShouldNotDispatchDuplicateFai await runtime.HandleStartAsync(request.Clone()); handled.Should().ContainSingle(e => e.Payload.Is(LlmReplyReadyEvent.Descriptor)); - runtime.State.Status.Should().Be(AgentRunStatus.ReplyProduced); - runtime.State.ReplyDispatched.Should().BeTrue(); + runtime.State.Status.Should().Be(AgentRunStatus.ReplyHandedOff); runtime.State.ProducedTerminalState.Should().Be(LlmReplyTerminalState.Failed); } From 04a8920b71c4d49b64a52dd4ba07da3be94e6db5 Mon Sep 17 00:00:00 2001 From: github-aelf Date: Fri, 15 May 2026 18:04:33 +0800 Subject: [PATCH 5/5] Fix reply generator regression assertions --- .../ConversationReplyGeneratorTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs index cb4110681..c2f688f9b 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/ConversationReplyGeneratorTests.cs @@ -229,7 +229,7 @@ public async Task GenerateReplyAsync_WithSkillRegistryButNoRemoteFetcher_LogsWar streamingSink: null, CancellationToken.None); - reply.Should().Be("ok"); + reply.Text.Should().Be("ok"); } logger.WarningMessages.Should().ContainSingle(message => @@ -260,7 +260,7 @@ public async Task GenerateReplyAsync_WithStreamingSink_EmitsPlaceholderThenFinal sink, CancellationToken.None); - reply.Should().Be("done"); + reply.Text.Should().Be("done"); providerFactory.Requests.Should().HaveCount(2); providerFactory.Requests[1].Messages.Should().Contain(message => message.Role == "tool"); sink.Emissions.Should().Equal("…", "done");