diff --git a/.gitignore b/.gitignore index 83f3dc2dc..acf09b5e2 100644 --- a/.gitignore +++ b/.gitignore @@ -504,3 +504,6 @@ docs/.triage/ mempalace.yaml entities.json .mempalace/ + +# Paseo agent run-continuation state +.sisyphus/ diff --git a/aevatar.slnx b/aevatar.slnx index e18e03e21..83a26c678 100644 --- a/aevatar.slnx +++ b/aevatar.slnx @@ -157,6 +157,7 @@ + diff --git a/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderActionIds.cs b/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderActionIds.cs index 09e3f9e7b..b27d8b38b 100644 --- a/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderActionIds.cs +++ b/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderActionIds.cs @@ -13,11 +13,6 @@ namespace Aevatar.GAgents.Authoring.Lark; /// internal static class AgentBuilderActionIds { - public const string DailyReport = "create_daily_report"; - public const string SocialMedia = "create_social_media"; - public const string OpenDailyReportForm = "open_daily_report_form"; - public const string OpenSocialMediaForm = "open_social_media_form"; - public const string ListTemplates = "list_templates"; public const string ListAgents = "list_agents"; public const string AgentStatus = "agent_status"; public const string RunAgent = "run_agent"; diff --git a/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderCardContent.cs b/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderCardContent.cs index dab27fe2a..83d102689 100644 --- a/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderCardContent.cs +++ b/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderCardContent.cs @@ -1,7 +1,6 @@ using System.Text; using System.Text.Json; using Aevatar.GAgents.Channel.Abstractions; -using Aevatar.GAgents.Scheduled; namespace Aevatar.GAgents.Authoring.Lark; @@ -12,183 +11,7 @@ namespace Aevatar.GAgents.Authoring.Lark; /// public static class AgentBuilderCardContent { - private const string DailyReportAction = AgentBuilderActionIds.DailyReport; - private const string SocialMediaAction = AgentBuilderActionIds.SocialMedia; - private const string OpenDailyReportFormAction = AgentBuilderActionIds.OpenDailyReportForm; - private const string OpenSocialMediaFormAction = AgentBuilderActionIds.OpenSocialMediaForm; - private const string ListTemplatesAction = AgentBuilderActionIds.ListTemplates; private const string ListAgentsAction = AgentBuilderActionIds.ListAgents; - private const string DefaultScheduleTime = "09:00"; - - public static MessageContent BuildDailyReportForm(string? preferredGithubUsername) => - BuildDailyReportForm(preferredGithubUsername, introCard: null); - - /// - /// Builds the Daily Report creation form card. When is null the - /// default Day One description card is rendered; callers that need a different header (for - /// example, the credentials-required re-prompt) pass their own and this - /// method uses it verbatim instead. - /// - public static MessageContent BuildDailyReportForm( - string? preferredGithubUsername, - CardBlock? introCard) - { - var normalizedSaved = string.IsNullOrWhiteSpace(preferredGithubUsername) - ? null - : preferredGithubUsername!.Trim(); - - var content = new MessageContent(); - content.Cards.Add(introCard ?? BuildDefaultDailyReportIntroCard(normalizedSaved)); - - // Pre-fill the saved GitHub username into the input's default_value so users see it inline - // and can keep it with one submit click. Placeholder stays as a generic hint so the field - // does not disappear when the user clicks to edit. - var githubInput = BuildTextInput( - "github_username", - "GitHub Username", - placeholder: "octocat"); - if (normalizedSaved is not null) - githubInput.Value = normalizedSaved; - content.Actions.Add(githubInput); - - content.Actions.Add(BuildTextInput( - "repositories", - "Repositories (Optional)", - "owner/repo, owner/repo")); - content.Actions.Add(BuildTextInput( - "schedule_time", - "Daily Time (HH:mm)", - DefaultScheduleTime)); - content.Actions.Add(BuildTextInput( - "schedule_timezone", - "Time Zone", - SkillRunnerDefaults.DefaultTimezone)); - - var submit = BuildFormSubmit( - "submit_daily_report", - "Create Agent", - isPrimary: true); - submit.Arguments["agent_builder_action"] = DailyReportAction; - submit.Arguments["run_immediately"] = "true"; - content.Actions.Add(submit); - - return content; - } - - private static CardBlock BuildDefaultDailyReportIntroCard(string? savedGithubUsername) - { - var savedNote = savedGithubUsername is null - ? string.Empty - : $"\n\nSaved GitHub username: `{savedGithubUsername}` — it is already filled in, just press **Create Agent** to reuse it."; - - return new CardBlock - { - Kind = CardBlockKind.Section, - BlockId = "daily_report_intro", - Title = "Create Daily Report Agent", - Text = - "**Day One template:** Daily GitHub report\n" + - "Fill in the fields below. The agent will run once now and then repeat every day at your chosen local time." + - savedNote, - }; - } - - public static MessageContent BuildSocialMediaForm() - { - var content = new MessageContent(); - content.Cards.Add(new CardBlock - { - Kind = CardBlockKind.Section, - BlockId = "social_media_intro", - Title = "Create Social Media Agent", - Text = - "**Workflow-backed template:** Social media draft + approval\n" + - "Fill in the fields below. Each scheduled run will generate one draft and send approval instructions into this Feishu private chat.", - }); - - content.Actions.Add(BuildTextInput( - "topic", - "Topic", - "Launch update for the new workflow feature")); - content.Actions.Add(BuildTextInput( - "audience", - "Audience (Optional)", - "Developers and technical founders")); - content.Actions.Add(BuildTextInput( - "style", - "Style (Optional)", - "Confident, concise, product-focused")); - content.Actions.Add(BuildTextInput( - "schedule_time", - "Daily Time (HH:mm)", - DefaultScheduleTime)); - content.Actions.Add(BuildTextInput( - "schedule_timezone", - "Time Zone", - SkillRunnerDefaults.DefaultTimezone)); - - var submit = BuildFormSubmit( - "submit_social_media", - "Create Agent", - isPrimary: true); - submit.Arguments["agent_builder_action"] = SocialMediaAction; - submit.Arguments["run_immediately"] = "true"; - content.Actions.Add(submit); - - return content; - } - - /// - /// Builds the post-tool acknowledgment for the Day One daily report creation flow. - /// The tool response returns GitHub username, preference-save status, and run_immediately trigger - /// status, which this method folds into a short text reply that leads with "running now" when - /// the schedule fired the first report, so the user knows a report is on the way. - /// - public static MessageContent FormatDailyReportToolReply(JsonElement root) - { - if (TryReadError(root, out var error)) - return TextContent($"Create daily report agent failed: {error}"); - - var status = TryReadString(root, "status") ?? "accepted"; - if (string.Equals(status, "credentials_required", StringComparison.OrdinalIgnoreCase) || - string.Equals(status, "oauth_required", StringComparison.OrdinalIgnoreCase)) - { - return BuildDailyReportCredentialsCard(root, status); - } - - var agentId = TryReadString(root, "agent_id") ?? "unknown-agent"; - var githubUsername = TryReadString(root, "github_username"); - var savedPreference = TryReadBool(root, "github_username_preference_saved"); - // The tool reports whether it asked the skill-runner actor to run now, not whether the - // runner actually finished — hence "requested", not "triggered". The ack text still says - // "Running first report now" because we sent the command; if it fails downstream, the - // ground-truth status surfaces through /agent-status, not through this immediate reply. - var runImmediatelyRequested = TryReadBool(root, "run_immediately_requested"); - var nextRun = TryReadString(root, "next_scheduled_run") ?? "pending"; - - var headline = runImmediatelyRequested - ? (string.IsNullOrWhiteSpace(githubUsername) - ? "Daily report scheduled. Running first report now — I'll reply with the results shortly." - : $"Daily report scheduled for `{githubUsername}`. Running first report now — I'll reply with the results shortly.") - : (string.IsNullOrWhiteSpace(githubUsername) - ? "Daily report scheduled." - : $"Daily report scheduled for `{githubUsername}`."); - - var lines = new List { headline }; - if (savedPreference && !string.IsNullOrWhiteSpace(githubUsername)) - lines.Add($"Saved `{githubUsername}` as your default GitHub username."); - - lines.Add($"Next scheduled run: {nextRun}"); - lines.Add($"Agent ID: {agentId}"); - - var note = TryReadOptional(root, "note"); - if (note is not null) - lines.Add(note); - - lines.Add($"Next commands: /agents, /agent-status {agentId}, /run-agent {agentId}"); - - return TextContent(string.Join('\n', lines)); - } /// /// Renders /agents as a single consolidated card. The earlier design produced one @@ -222,10 +45,7 @@ public static MessageContent FormatListAgentsResult(JsonElement root, string? no emptyBody.Append(notice); emptyBody.Append("\n\n"); } - emptyBody.Append("No agents yet. Create one to get started:\n"); - emptyBody.Append("- `/daily` — daily GitHub report\n"); - emptyBody.Append("- `/social-media` — social-media drafter\n\n"); - emptyBody.Append("Run `/templates` to browse all available templates."); + emptyBody.Append("No agents yet."); content.Cards.Add(new CardBlock { @@ -234,9 +54,7 @@ public static MessageContent FormatListAgentsResult(JsonElement root, string? no Title = "Your Agents", Text = emptyBody.ToString(), }); - content.Actions.Add(BuildAction("Create Daily Report", OpenDailyReportFormAction, isPrimary: true)); - content.Actions.Add(BuildAction("Create Social Media", OpenSocialMediaFormAction, isPrimary: false)); - content.Actions.Add(BuildAction("Templates", ListTemplatesAction, isPrimary: false)); + content.Actions.Add(BuildAction("Refresh", ListAgentsAction, isPrimary: false)); return content; } @@ -285,14 +103,7 @@ public static MessageContent FormatListAgentsResult(JsonElement root, string? no Text = bodyBuilder.ToString(), }); - // Footer is intentionally limited to discovery / creation shortcuts. Per-agent actions - // (status, run, disable, enable, delete) deliberately stay off this card to avoid the - // visual "list + status panel" duplication called out in issue #476; the inline command - // hints in the body cover the same ground without the layout noise. content.Actions.Add(BuildAction("Refresh", ListAgentsAction, isPrimary: false)); - content.Actions.Add(BuildAction("Templates", ListTemplatesAction, isPrimary: false)); - content.Actions.Add(BuildAction("Create Daily Report", OpenDailyReportFormAction, isPrimary: false)); - content.Actions.Add(BuildAction("Create Social Media", OpenSocialMediaFormAction, isPrimary: false)); return content; } @@ -315,67 +126,6 @@ private static ActionElement BuildAction(string label, string agentBuilderAction return button; } - private static MessageContent BuildDailyReportCredentialsCard(JsonElement root, string status) - { - var providerId = TryReadString(root, "provider_id") ?? "unknown-provider"; - var url = TryReadString(root, "authorization_url") - ?? TryReadString(root, "auth_url") - ?? TryReadString(root, "url") - ?? TryReadString(root, "documentation_url"); - var note = TryReadString(root, "note") - ?? "Enter your GitHub username below — I'll save it as your default and run the report immediately."; - var heading = string.Equals(status, "oauth_required", StringComparison.OrdinalIgnoreCase) - ? "GitHub authorization required." - : "GitHub credentials required."; - - var descriptionLines = new List - { - $"**{heading}**", - note, - $"Provider ID: `{providerId}`", - }; - if (!string.IsNullOrWhiteSpace(url)) - descriptionLines.Add($"Open: {url}"); - descriptionLines.Add("Or just reply with `/daily ` — I'll save it and run the report now."); - - var introCard = new CardBlock - { - Kind = CardBlockKind.Section, - BlockId = "daily_report_credentials", - Title = "Create Daily Report Agent", - Text = string.Join('\n', descriptionLines), - }; - - // Echo the username the user already submitted (e.g. `/daily eanzhao`) so it pre-fills - // the form on the auth-required re-prompt — otherwise users had to retype it after the - // OAuth round-trip. The card body alone carries the auth instructions; setting - // content.Text in addition would double-render in Lark form mode (LarkMessageComposer's - // BuildLeadingMarkdown concatenates Text and the first card body), which is the original - // duplicate "GitHub authorization required" block users were seeing. - var submittedGithubUsername = TryReadString(root, "github_username"); - return BuildDailyReportForm( - preferredGithubUsername: submittedGithubUsername, - introCard: introCard); - } - - private static ActionElement BuildTextInput(string actionId, string label, string placeholder) => - new() - { - Kind = ActionElementKind.TextInput, - ActionId = actionId, - Label = label, - Placeholder = placeholder, - }; - - private static ActionElement BuildFormSubmit(string actionId, string label, bool isPrimary) => - new() - { - Kind = ActionElementKind.FormSubmit, - ActionId = actionId, - Label = label, - IsPrimary = isPrimary, - }; - private static MessageContent TextContent(string text) => AgentBuilderJson.TextContent(text); private static bool TryReadError(JsonElement root, out string error) => @@ -384,9 +134,6 @@ private static bool TryReadError(JsonElement root, out string error) => private static string? TryReadString(JsonElement element, string propertyName) => AgentBuilderJson.TryReadString(element, propertyName); - private static bool TryReadBool(JsonElement element, string propertyName) => - AgentBuilderJson.TryReadBool(element, propertyName); - private static string? TryReadOptional(JsonElement element, string propertyName) => AgentBuilderJson.TryReadOptional(element, propertyName); } diff --git a/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderCardFlow.cs b/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderCardFlow.cs index 11c908970..a480f23f2 100644 --- a/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderCardFlow.cs +++ b/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderCardFlow.cs @@ -1,4 +1,3 @@ -using System.Globalization; using System.Text; using System.Text.Json; using Aevatar.GAgents.Channel.Abstractions; @@ -12,11 +11,6 @@ public static class AgentBuilderCardFlow { private const string PrivateChatType = "p2p"; private const string CardActionChatType = "card_action"; - private const string OpenDailyReportFormAction = AgentBuilderActionIds.OpenDailyReportForm; - private const string OpenSocialMediaFormAction = AgentBuilderActionIds.OpenSocialMediaForm; - private const string DailyReportAction = AgentBuilderActionIds.DailyReport; - private const string SocialMediaAction = AgentBuilderActionIds.SocialMedia; - private const string ListTemplatesAction = AgentBuilderActionIds.ListTemplates; private const string ListAgentsAction = AgentBuilderActionIds.ListAgents; private const string AgentStatusAction = AgentBuilderActionIds.AgentStatus; private const string RunAgentAction = AgentBuilderActionIds.RunAgent; @@ -24,31 +18,12 @@ public static class AgentBuilderCardFlow private const string EnableAgentAction = AgentBuilderActionIds.EnableAgent; private const string ConfirmDeleteAgentAction = AgentBuilderActionIds.ConfirmDeleteAgent; private const string DeleteAgentAction = AgentBuilderActionIds.DeleteAgent; - private const string DefaultScheduleTime = "09:00"; - private const string SocialMediaCommand = "/social-media"; private const string AgentStatusCommand = "/agent-status"; private const string RunAgentCommand = "/run-agent"; private const string DisableAgentCommand = "/disable-agent"; private const string EnableAgentCommand = "/enable-agent"; private const string DeleteAgentCommand = "/delete-agent"; - private static readonly HashSet LaunchIntents = new(StringComparer.OrdinalIgnoreCase) - { - "/daily", - "create daily report", - "创建日报助手", - "创建日报agent", - }; - - private static readonly HashSet SocialMediaIntents = new(StringComparer.OrdinalIgnoreCase) - { - SocialMediaCommand, - "/create-social-media", - "create social media", - "创建社媒助手", - "创建社媒agent", - }; - private static readonly HashSet ListIntents = new(StringComparer.OrdinalIgnoreCase) { "/agents", @@ -56,45 +31,20 @@ public static class AgentBuilderCardFlow "我的助手", }; - private static readonly HashSet TemplateIntents = new(StringComparer.OrdinalIgnoreCase) - { - "/templates", - "/agent-templates", - "list templates", - "模板列表", - }; - public static bool TryResolve(ChannelInboundEvent evt, out AgentBuilderFlowDecision? decision) => TryResolve(evt, preferredGithubUsername: null, out decision); - public static async Task TryResolveAsync( + public static Task TryResolveAsync( ChannelInboundEvent evt, IUserConfigQueryPort? userConfigQueryPort, CancellationToken ct = default) { ArgumentNullException.ThrowIfNull(evt); + _ = userConfigQueryPort; + _ = ct; - string? preferredGithubUsername = null; - if (ShouldLoadPreferredGithubUsername(evt) && userConfigQueryPort is not null) - { - try - { - preferredGithubUsername = (await userConfigQueryPort.GetAsync( - ChannelUserConfigScope.FromInboundEvent(evt), - ct)).GithubUsername; - } - catch (OperationCanceledException) - { - throw; - } - catch - { - preferredGithubUsername = null; - } - } - - TryResolve(evt, preferredGithubUsername, out var decision); - return decision; + TryResolve(evt, preferredGithubUsername: null, out var decision); + return Task.FromResult(decision); } private static bool TryResolve( @@ -103,27 +53,12 @@ private static bool TryResolve( out AgentBuilderFlowDecision? decision) { ArgumentNullException.ThrowIfNull(evt); + _ = preferredGithubUsername; decision = null; if (IsPrivateChatText(evt)) { var normalized = NormalizeText(evt.Text); - if (LaunchIntents.Contains(normalized)) - { - // Direct webhook deployments hit this path (no Nyx relay in front); the pre-serialized - // Lark JSON card from BuildDailyReportCard used to land in MessageContent.Text and - // render as raw JSON. Route through the channel-neutral form builder so the composer - // emits a real interactive card. - decision = AgentBuilderFlowDecision.DirectReply( - AgentBuilderCardContent.BuildDailyReportForm(preferredGithubUsername)); - return true; - } - - if (SocialMediaIntents.Contains(normalized)) - { - decision = AgentBuilderFlowDecision.DirectReply(AgentBuilderCardContent.BuildSocialMediaForm()); - return true; - } if (ListIntents.Contains(normalized)) { @@ -131,12 +66,6 @@ private static bool TryResolve( return true; } - if (TemplateIntents.Contains(normalized)) - { - decision = AgentBuilderFlowDecision.ToolCall(ListTemplatesAction, """{"action":"list_templates"}"""); - return true; - } - if (TryResolvePrivateChatCommand(normalized, out decision)) return true; @@ -149,48 +78,14 @@ private static bool TryResolve( if (!evt.Extra.TryGetValue("agent_builder_action", out var action)) return false; + string? argumentsJson; + string? validationError; switch ((action ?? string.Empty).Trim()) { - case OpenDailyReportFormAction: - decision = AgentBuilderFlowDecision.DirectReply( - AgentBuilderCardContent.BuildDailyReportForm(preferredGithubUsername)); - return true; - - case OpenSocialMediaFormAction: - decision = AgentBuilderFlowDecision.DirectReply(AgentBuilderCardContent.BuildSocialMediaForm()); - return true; - - case DailyReportAction: - if (!TryBuildCreateDailyReportArguments(evt, out var argumentsJson, out var validationError)) - { - decision = AgentBuilderFlowDecision.DirectReply(validationError!); - return true; - } - - decision = AgentBuilderFlowDecision.ToolCall(DailyReportAction, argumentsJson!); - return true; - - case SocialMediaAction: - if (!TryBuildCreateSocialMediaArguments(evt, out argumentsJson, out validationError)) - { - decision = AgentBuilderFlowDecision.DirectReply(validationError!); - return true; - } - - decision = AgentBuilderFlowDecision.ToolCall(SocialMediaAction, argumentsJson!); - return true; - case ListAgentsAction: decision = AgentBuilderFlowDecision.ToolCall(ListAgentsAction, """{"action":"list_agents"}"""); return true; - case ListTemplatesAction: - // The /agents card surfaces a `Templates` button (also reachable via the - // text-flow `/templates` slash command). Without this branch, clicking the - // button leaves the user with an unhandled card action and no feedback. - decision = AgentBuilderFlowDecision.ToolCall(ListTemplatesAction, """{"action":"list_templates"}"""); - return true; - case AgentStatusAction: if (!TryBuildAgentActionArguments(evt, "agent_status", out argumentsJson, out validationError)) { @@ -238,8 +133,6 @@ private static bool TryResolve( return true; } - // Use the MessageContent overload so the relay composer renders this as a real - // Lark card instead of forwarding a JSON-as-text payload (issue #482). decision = AgentBuilderFlowDecision.DirectReply(BuildDeleteConfirmationCard( agentId, evt.Extra.TryGetValue("template", out var template) ? template : null)); @@ -275,21 +168,11 @@ public static MessageContent FormatToolResult(AgentBuilderFlowDecision decision, using var doc = JsonDocument.Parse(toolResultJson); return decision.ToolAction switch { - // Daily report creation uses the shared formatter so Nyx-relay slash commands and - // Feishu card-action submits render the same "running now, I'll reply when done" - // acknowledgment. - DailyReportAction => AgentBuilderCardContent.FormatDailyReportToolReply(doc.RootElement), - SocialMediaAction => FormatCreateSocialMediaResult(doc.RootElement), - ListTemplatesAction => FormatListTemplatesResult(doc.RootElement), - // Card-click "Refresh List" and the typed `/agents` command share the same - // unified renderer (issue #476). ListAgentsAction => AgentBuilderCardContent.FormatListAgentsResult(doc.RootElement), AgentStatusAction => FormatAgentStatusResult(doc.RootElement), RunAgentAction => FormatRunAgentResult(doc.RootElement), DisableAgentAction => FormatDisableAgentResult(doc.RootElement), EnableAgentAction => FormatEnableAgentResult(doc.RootElement), - // After a delete completes, surface the updated registry through the same unified - // list renderer with the delete notice prepended. DeleteAgentAction => FormatDeleteAgentResultAsList(doc.RootElement), _ => ToTextContent(toolResultJson), }; @@ -311,98 +194,6 @@ public static string ResolveToolChatType(ChannelInboundEvent evt) : evt.ChatType; } - private static bool TryBuildCreateDailyReportArguments( - ChannelInboundEvent evt, - out string? argumentsJson, - out string? validationError) - { - argumentsJson = null; - validationError = null; - var githubUsername = evt.Extra.TryGetValue("github_username", out var rawGithubUsername) - ? NormalizeOptional(rawGithubUsername) - : null; - - if (!TryBuildDailyCron(evt.Extra.TryGetValue("schedule_time", out var scheduleTime) ? scheduleTime : null, out var scheduleCron, out validationError)) - return false; - - var scheduleTimezone = (evt.Extra.TryGetValue("schedule_timezone", out var rawTimezone) - ? rawTimezone - : null) ?? SkillRunnerDefaults.DefaultTimezone; - scheduleTimezone = string.IsNullOrWhiteSpace(scheduleTimezone) - ? SkillRunnerDefaults.DefaultTimezone - : scheduleTimezone.Trim(); - - var repositories = evt.Extra.TryGetValue("repositories", out var rawRepositories) - ? NormalizeOptional(rawRepositories) - : null; - - var runImmediately = !evt.Extra.TryGetValue("run_immediately", out var rawRunImmediately) || - !bool.TryParse(rawRunImmediately, out var parsedRunImmediately) || - parsedRunImmediately; - - argumentsJson = JsonSerializer.Serialize(new - { - action = "create_agent", - template = "daily_report", - github_username = githubUsername, - save_github_username_preference = githubUsername is not null, - repositories, - schedule_cron = scheduleCron, - schedule_timezone = scheduleTimezone, - run_immediately = runImmediately, - }); - return true; - } - - private static bool TryBuildCreateSocialMediaArguments( - ChannelInboundEvent evt, - out string? argumentsJson, - out string? validationError) - { - argumentsJson = null; - validationError = null; - - if (!TryGetRequiredExtra(evt, "topic", out var topic)) - { - validationError = "Topic is required. Send /social-media and fill in the form again."; - return false; - } - - if (!TryBuildDailyCron(evt.Extra.TryGetValue("schedule_time", out var scheduleTime) ? scheduleTime : null, out var scheduleCron, out validationError)) - return false; - - var scheduleTimezone = (evt.Extra.TryGetValue("schedule_timezone", out var rawTimezone) - ? rawTimezone - : null) ?? SkillRunnerDefaults.DefaultTimezone; - scheduleTimezone = string.IsNullOrWhiteSpace(scheduleTimezone) - ? SkillRunnerDefaults.DefaultTimezone - : scheduleTimezone.Trim(); - - var audience = evt.Extra.TryGetValue("audience", out var rawAudience) - ? NormalizeOptional(rawAudience) - : null; - var style = evt.Extra.TryGetValue("style", out var rawStyle) - ? NormalizeOptional(rawStyle) - : null; - - var runImmediately = !evt.Extra.TryGetValue("run_immediately", out var rawRunImmediately) || - !bool.TryParse(rawRunImmediately, out var parsedRunImmediately) || - parsedRunImmediately; - - argumentsJson = JsonSerializer.Serialize(new - { - action = "create_agent", - template = "social_media", - topic, - audience, - style, - schedule_cron = scheduleCron, - schedule_timezone = scheduleTimezone, - run_immediately = runImmediately, - }); - return true; - } - private static bool TryBuildAgentActionArguments( ChannelInboundEvent evt, string action, @@ -591,27 +382,6 @@ private static bool TryParseAgentCommand( return true; } - private static bool TryBuildDailyCron(string? rawTime, out string? cron, out string? error) - { - cron = null; - error = null; - - var normalized = NormalizeOptional(rawTime) ?? DefaultScheduleTime; - if (!TimeOnly.TryParseExact( - normalized, - ["HH:mm", "H:mm"], - CultureInfo.InvariantCulture, - DateTimeStyles.None, - out var time)) - { - error = "schedule_time must use HH:mm, for example 09:00."; - return false; - } - - cron = $"{time.Minute} {time.Hour} * * *"; - return true; - } - private static bool TryGetRequiredExtra(ChannelInboundEvent evt, string key, out string value) { value = string.Empty; @@ -626,19 +396,6 @@ private static bool IsPrivateChatText(ChannelInboundEvent evt) => string.Equals(evt.ChatType, PrivateChatType, StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(evt.Text); - private static bool ShouldLoadPreferredGithubUsername(ChannelInboundEvent evt) - { - if (IsPrivateChatText(evt)) - { - var normalized = NormalizeText(evt.Text); - return LaunchIntents.Contains(normalized); - } - - return string.Equals(evt.ChatType, CardActionChatType, StringComparison.Ordinal) && - evt.Extra.TryGetValue("agent_builder_action", out var action) && - string.Equals(action, OpenDailyReportFormAction, StringComparison.Ordinal); - } - private static string NormalizeText(string? text) => (text ?? string.Empty).Trim(); private static string? NormalizeOptional(string? value) @@ -647,109 +404,6 @@ private static bool ShouldLoadPreferredGithubUsername(ChannelInboundEvent evt) return normalized.Length == 0 ? null : normalized; } - private static MessageContent FormatCreateSocialMediaResult(JsonElement root) - { - if (TryReadError(root, out var error)) - return ToTextContent($"Create social media agent failed: {error}"); - - var status = ReadString(root, "status") ?? "accepted"; - var agentId = ReadString(root, "agent_id") ?? "unknown-agent"; - var workflowId = ReadString(root, "workflow_id") ?? "pending"; - var nextRun = ReadString(root, "next_scheduled_run") ?? "pending"; - var note = NormalizeOptional(ReadString(root, "note")); - - var headline = string.Equals(status, "created", StringComparison.OrdinalIgnoreCase) - ? "Social media agent created." - : "Social media agent accepted."; - - var body = new StringBuilder(); - body.Append(headline).Append('\n'); - body.Append($"- Agent ID: `{agentId}`\n"); - body.Append($"- Workflow ID: `{workflowId}`\n"); - body.Append($"- Next scheduled run: `{nextRun}`"); - if (note is not null) - body.Append("\n\n").Append(note); - - var content = new MessageContent(); - content.Cards.Add(new CardBlock - { - Kind = CardBlockKind.Section, - BlockId = $"social_media_created:{agentId}", - Title = "Social Media Agent", - Text = body.ToString(), - }); - content.Actions.Add(BuildCardAction("View Agents", ListAgentsAction, isPrimary: true)); - content.Actions.Add(BuildCardAction("Create Another", OpenSocialMediaFormAction, isPrimary: false)); - return content; - } - - private static MessageContent FormatListTemplatesResult(JsonElement root) - { - if (TryReadError(root, out var error)) - return ToTextContent($"List templates failed: {error}"); - - var content = new MessageContent(); - - if (!root.TryGetProperty("templates", out var templatesElement) || - templatesElement.ValueKind != JsonValueKind.Array || - templatesElement.GetArrayLength() == 0) - { - content.Cards.Add(new CardBlock - { - Kind = CardBlockKind.Section, - BlockId = "templates_empty", - Title = "Available Templates", - Text = "No templates available right now.", - }); - content.Actions.Add(BuildCardAction("View Agents", ListAgentsAction, isPrimary: false)); - return content; - } - - var body = new StringBuilder(); - body.Append("Day One currently exposes the templates below."); - - var hasReadyDaily = false; - var hasReadySocial = false; - - foreach (var item in templatesElement.EnumerateArray()) - { - var name = ReadString(item, "name") ?? "unknown-template"; - var status = ReadString(item, "status") ?? "unknown"; - var description = ReadString(item, "description") ?? "No description."; - var requiredFields = ReadStringArray(item, "required_fields"); - var optionalFields = ReadStringArray(item, "optional_fields"); - - body.Append("\n\n"); - body.Append($"**`{name}`** · {status}\n"); - body.Append($"{description}\n"); - body.Append($"- Required: {FormatFieldList(requiredFields)}\n"); - body.Append($"- Optional: {FormatFieldList(optionalFields)}"); - - if (string.Equals(status, "ready", StringComparison.OrdinalIgnoreCase)) - { - if (string.Equals(name, "daily_report", StringComparison.OrdinalIgnoreCase)) - hasReadyDaily = true; - else if (string.Equals(name, "social_media", StringComparison.OrdinalIgnoreCase)) - hasReadySocial = true; - } - } - - content.Cards.Add(new CardBlock - { - Kind = CardBlockKind.Section, - BlockId = "templates_list", - Title = "Available Templates", - Text = body.ToString(), - }); - - if (hasReadyDaily) - content.Actions.Add(BuildCardAction("Create Daily Report", OpenDailyReportFormAction, isPrimary: true)); - if (hasReadySocial) - content.Actions.Add(BuildCardAction("Create Social Media", OpenSocialMediaFormAction, isPrimary: !hasReadyDaily)); - content.Actions.Add(BuildCardAction("View Agents", ListAgentsAction, isPrimary: false)); - return content; - } - private static MessageContent FormatAgentStatusResult(JsonElement root) { if (TryReadError(root, out var error)) @@ -804,9 +458,6 @@ private static MessageContent FormatAgentStatusResult(JsonElement root) } content.Actions.Add(BuildCardAction("Back to Agents", ListAgentsAction, isPrimary: false)); - // The card-flow path keeps the explicit confirmation step before deletion (vs. the typed - // /agent-status path's direct delete) so the per-agent template is carried along to the - // confirmation card. Danger styling matches Lark's red-button affordance. var deleteButton = BuildAgentScopedCardAction("Delete", ConfirmDeleteAgentAction, agentId, isPrimary: false); deleteButton.IsDanger = true; deleteButton.Arguments["template"] = template; @@ -884,27 +535,6 @@ private static bool TryReadError(JsonElement root, out string error) => private static string? ReadString(JsonElement element, string propertyName) => AgentBuilderJson.TryReadString(element, propertyName); - private static IReadOnlyList ReadStringArray(JsonElement element, string propertyName) - { - if (!element.TryGetProperty(propertyName, out var property) || - property.ValueKind != JsonValueKind.Array) - return Array.Empty(); - - var values = new List(); - foreach (var item in property.EnumerateArray()) - { - if (item.ValueKind == JsonValueKind.String && !string.IsNullOrWhiteSpace(item.GetString())) - values.Add(item.GetString()!); - } - - return values; - } - - private static string FormatFieldList(IReadOnlyList fields) => - fields.Count == 0 - ? "`None`" - : string.Join(", ", fields.Select(static field => $"`{field}`")); - private static MessageContent BuildDeleteConfirmationCard(string agentId, string? template) { var templateLabel = NormalizeOptional(template) ?? "unknown-template"; diff --git a/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderTemplates.cs b/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderTemplates.cs deleted file mode 100644 index 3b1441697..000000000 --- a/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderTemplates.cs +++ /dev/null @@ -1,314 +0,0 @@ -using System.Text; - -namespace Aevatar.GAgents.Authoring.Lark; - -public static class AgentBuilderTemplates -{ - public static IReadOnlyList ListTemplates() => - [ - new - { - name = "daily_report", - status = "ready", - description = "Generate a daily GitHub progress summary and send it back to the current Feishu private chat.", - required_fields = new[] { "schedule_cron" }, - optional_fields = new[] { "github_username", "repositories", "schedule_timezone", "run_immediately" }, - }, - new - { - name = "social_media", - status = "ready", - description = "Generate a social media draft on a schedule and send it into the current Feishu private chat for approval.", - required_fields = new[] { "topic", "schedule_cron" }, - optional_fields = new[] { "audience", "style", "schedule_timezone", "run_immediately" }, - }, - ]; - - public static bool TryBuildDailyReportSpec( - string githubUsername, - string? repositories, - out DailyReportTemplateSpec? spec, - out string? error) - { - spec = null; - error = null; - - var normalizedUser = (githubUsername ?? string.Empty).Trim(); - if (string.IsNullOrWhiteSpace(normalizedUser)) - { - error = "github_username is required for template=daily_report"; - return false; - } - - var repoList = NormalizeRepositories(repositories); - var skillPrompt = BuildDailyReportSkillPrompt(normalizedUser, repoList); - - var executionPrompt = repoList.Count == 0 - ? $"Run the daily report for GitHub user `{normalizedUser}` covering the last 24 hours. Follow the section schema in the system prompt. Return plain text only." - : $"Run the daily report for GitHub user `{normalizedUser}` covering the last 24 hours. Restrict source queries to these repositories (one pass per repo, do not collapse to a global search): {string.Join(", ", repoList)}. Follow the section schema in the system prompt. Return plain text only."; - - spec = new DailyReportTemplateSpec( - "daily_report", - "daily_report", - skillPrompt, - executionPrompt, - ["api-github", "api-lark-bot"], - repoList); - return true; - } - - public static bool TryBuildSocialMediaSpec( - string agentId, - string topic, - string? audience, - string? style, - string? deliveryProviderSlug, - string? publishProviderSlug, - out SocialMediaTemplateSpec? spec, - out string? error) - { - spec = null; - error = null; - - var normalizedAgentId = (agentId ?? string.Empty).Trim(); - if (string.IsNullOrWhiteSpace(normalizedAgentId)) - { - error = "agent_id is required for template=social_media"; - return false; - } - - var normalizedTopic = (topic ?? string.Empty).Trim(); - if (string.IsNullOrWhiteSpace(normalizedTopic)) - { - error = "topic is required for template=social_media"; - return false; - } - - var normalizedAudience = NormalizeOptional(audience) ?? "general followers"; - var normalizedStyle = NormalizeOptional(style) ?? "clear, concise, and professional"; - var normalizedDeliverySlug = NormalizeOptional(deliveryProviderSlug) ?? "api-lark-bot"; - var normalizedPublishSlug = NormalizeOptional(publishProviderSlug) ?? "api-twitter"; - var workflowId = BuildSocialMediaWorkflowId(normalizedAgentId); - var workflowName = BuildSocialMediaWorkflowName(normalizedAgentId); - var displayName = $"Social Media Approval {normalizedAgentId}"; - var executionPrompt = $"Generate the scheduled social media draft for topic `{normalizedTopic}` and route it for approval."; - - spec = new SocialMediaTemplateSpec( - WorkflowId: workflowId, - WorkflowName: workflowName, - DisplayName: displayName, - WorkflowYaml: BuildSocialMediaWorkflowYaml( - workflowName, - normalizedAgentId, - normalizedTopic, - normalizedAudience, - normalizedStyle, - normalizedPublishSlug), - ExecutionPrompt: executionPrompt, - RequiredServiceSlugs: [normalizedDeliverySlug, normalizedPublishSlug]); - return true; - } - - // Daily report system prompt is treated as a fetch-and-summarize SPECIFICATION rather than a - // freeform creative brief: explicit section order, hard per-section line budgets, and an - // "omit if empty" rule. See issue #423 for the rationale (current single-paragraph output is - // too thin and pads when sources are silent). - private static string BuildDailyReportSkillPrompt(string normalizedUser, IReadOnlyList repoList) - { - var repoScope = repoList.Count == 0 - ? "Repository scope: not pinned. Use the global GitHub search endpoints listed below." - : $"Repository scope: {string.Join(", ", repoList)}. Run the per-repo endpoints once per repo; do NOT fold the list into a global search query (the /search/* endpoints don't filter to a repo allowlist cleanly)."; - - var prompt = new StringBuilder() - .AppendLine("You are Aevatar Daily Report Runner.") - .AppendLine("Each run produces one Feishu-ready summary of the user's recent GitHub work over the last 24 hours.") - .AppendLine("Use NyxID-backed tools only. Prefer nyxid_proxy with service slug `api-github` for GitHub data access.") - .AppendLine() - .AppendLine($"Primary GitHub username: {normalizedUser}") - .AppendLine(repoScope) - .AppendLine() - .AppendLine("# Output sections (emit in this exact order)") - .AppendLine() - .AppendLine("Each section has a hard line budget. If a section has zero data OR the source is unavailable, OMIT THE SECTION ENTIRELY (header and body) — do not pad with `no activity` or filler.") - .AppendLine() - .AppendLine("1. Title (1 line) — `Daily report — {username} — last 24h`.") - .AppendLine("2. Shipped (≤6 lines) — PRs merged AND commits authored by the user in the window. Format `- [owner/repo#NNN] title` for PRs, `- [owner/repo@sha7] subject` for commits.") - .AppendLine("3. In flight (≤6 lines) — open PRs authored by the user. Append `(stale)` when the PR has had no activity for >24h.") - .AppendLine("4. Reviews (≤4 lines) — PRs the user reviewed in the window. Include kind counts, e.g. `approved 2 / requested-changes 1 / commented 3`.") - .AppendLine("5. Issues (≤4 lines) — issues opened, closed, or commented on by the user.") - .AppendLine("6. CI (≤3 lines) — failing GitHub Actions runs on the tracked repos. Best-effort and only feasible in repo-allowlist mode; OMIT this section in no-repo mode (the global search endpoints do not expose Actions run conclusions).") - .AppendLine("7. Trend (1 line, optional) — running totals vs the prior 24h, e.g. `Trend: shipped 3 (+1), reviews 5 (-2)`. Omit when the prior-window data could not be fetched.") - .AppendLine("8. Blockers (1 line) — `Blockers: ` or `No blockers.` Auto-detect from: PRs >24h waiting on a review, CI red >2h, issues with labels `blocked` or `needs-info`. Position-locked at slot 8; the only section that may sit below it is the §9 Source health footer.") - .AppendLine("9. Source health (1 line, footer) — `Source health: `. Emit ONLY when at least one source returned a non-2xx / error-shaped tool result. When emitted, this is always the final line — below Blockers, below everything.") - .AppendLine() - .AppendLine("If EVERY source returned 2xx with no matching items (genuine empty day), return ONLY the title line followed by `No measurable activity in the last 24h.` and nothing else — do NOT emit Blockers or Source health. If ANY source failed, you are NOT on the empty-day path: emit at least the title line plus the §9 Source health footer (any other sections that have 2xx data render normally; §8 Blockers is also emitted).") - .AppendLine("Do not invent activity. Do not paraphrase issue or PR titles into different wording. Keep each line short — Feishu text messages have a body cap, prefer trimming trailing detail over exceeding it.") - .AppendLine() - .AppendLine("# Suggested GitHub proxy calls") - .AppendLine(); - - prompt - .AppendLine($"Substitution variables in the URLs below: `{{username}}` → `{normalizedUser}`; `{{iso_date}}` → start of the 24h window in ISO 8601 UTC (e.g. `2026-04-26T09:00:00Z`); `{{owner}}/{{repo}}` → each entry from the repository allowlist. Always substitute these literally before sending.") - .AppendLine(); - - if (repoList.Count == 0) - { - prompt - .AppendLine("Repository allowlist not provided — use the global search endpoints:") - .AppendLine("- GET /search/issues?q=author:{username}+is:pr+is:merged+merged:>={iso_date} // shipped PRs") - .AppendLine("- GET /search/commits?q=author:{username}+author-date:>={iso_date} // shipped commits") - .AppendLine("- GET /search/issues?q=author:{username}+is:pr+is:open // in flight") - .AppendLine("- GET /search/issues?q=reviewed-by:{username}+updated:>={iso_date} // reviews") - .AppendLine("- GET /search/issues?q=author:{username}+is:issue+created:>={iso_date} // issues opened") - .AppendLine("- GET /search/issues?q=author:{username}+is:issue+is:closed+closed:>={iso_date} // issues closed") - .AppendLine("- GET /search/issues?q=commenter:{username}+updated:>={iso_date} // issues commented") - .AppendLine("// CI section is omitted in no-repo mode: the global /search/* endpoints do not expose Actions run conclusions, and per-repo /actions/runs requires a known repo. Skip section 6 entirely."); - } - else - { - prompt - .AppendLine("Repository allowlist provided — run these per-repo (one search per allowlist entry; do NOT collapse into one global query):") - .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+author:{username}+is:pr+is:merged+merged:>={iso_date} // shipped PRs (search keys on merge time + author, reliable across pagination)") - .AppendLine("- GET /search/commits?q=repo:{owner}/{repo}+author:{username}+author-date:>={iso_date} // shipped commits") - .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+author:{username}+is:pr+is:open // in flight") - .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+reviewed-by:{username}+updated:>={iso_date} // reviews") - .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+author:{username}+is:issue+updated:>={iso_date} // issues authored (created/closed)") - .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+commenter:{username}+is:issue+updated:>={iso_date} // issues commented") - .AppendLine("- GET /repos/{owner}/{repo}/actions/runs?per_page=10 // CI: filter `conclusion=failure` and `created_at >= {iso_date}` client-side; do NOT add a `branch=` filter (default branch varies; trim noise client-side instead)"); - } - - prompt - .AppendLine() - .AppendLine("# Source health — when to emit the §9 footer") - .AppendLine() - .AppendLine("Do NOT collapse transport, auth, or proxy failures into the empty-day fallback. Classify every tool result before mapping it to a section:") - .AppendLine("- 2xx with an empty list / no matching items → genuine zero data; the section is omitted per the schema. Does NOT trigger §9.") - .AppendLine("- 4xx / 5xx / tool error envelope (e.g. `{\"error\": true, ...}`, revoked OAuth grant, proxy timeout) → the SOURCE is UNAVAILABLE, not zero. Add the source name + short reason to the §9 Source health footer.") - .AppendLine("- The empty-day fallback (`No measurable activity in the last 24h.`) is ONLY valid when EVERY source returned 2xx. If ANY source failed, you are NOT on the empty-day path — emit the title plus the §9 Source health footer at minimum. Silently masking credential expiration as `No measurable activity` is the bug we are guarding against.") - .AppendLine("- Do not retry. Do not fall back to invented data. Do not leave any literal `{username}` / `{iso_date}` / `{owner}/{repo}` placeholders in outbound URLs."); - - return prompt.ToString(); - } - - private static string BuildSocialMediaWorkflowId(string agentId) => - $"social-media-{SanitizeSegment(agentId)}"; - - private static string BuildSocialMediaWorkflowName(string agentId) => - $"social_media_{SanitizeSegment(agentId).Replace('-', '_')}"; - - private static string BuildSocialMediaWorkflowYaml( - string workflowName, - string deliveryTargetId, - string topic, - string audience, - string style, - string publishProviderSlug) - { - return $$""" - name: {{workflowName}} - description: Generate a social media draft, request human approval in Feishu, and publish the approved post to Twitter (X). - - roles: - - id: writer - name: Social Writer - provider: nyxid - system_prompt: | - You write polished short-form social media updates for professional audiences. - Keep drafts specific, concrete, and ready for human approval. - - steps: - - id: draft_post - type: llm_call - role: writer - parameters: - prompt_prefix: | - Draft one short social media post. - Topic: {{EscapeYamlBlock(topic)}} - Audience: {{EscapeYamlBlock(audience)}} - Style: {{EscapeYamlBlock(style)}} - Requirements: - - Return plain text only. - - Keep it concise and publication-ready. - - Do not add hashtags unless they are clearly justified. - next: request_approval - - - id: request_approval - type: human_approval - parameters: - prompt: "Approve this social media draft?" - delivery_target_id: "{{EscapeDoubleQuoted(deliveryTargetId)}}" - on_reject: skip - branches: - "true": publish_to_twitter - "false": done - - - id: publish_to_twitter - type: twitter_publish - parameters: - publish_provider_slug: "{{EscapeDoubleQuoted(publishProviderSlug)}}" - delivery_target_id: "{{EscapeDoubleQuoted(deliveryTargetId)}}" - on_error: - strategy: skip - default_output: "twitter_publish_failed" - next: done - - - id: done - type: assign - parameters: - target: "result" - value: "$input" - """; - } - - private static string EscapeDoubleQuoted(string value) => - (value ?? string.Empty) - .Replace("\\", "\\\\", StringComparison.Ordinal) - .Replace("\"", "\\\"", StringComparison.Ordinal); - - private static string EscapeYamlBlock(string value) => - (value ?? string.Empty).Replace("\r\n", "\n", StringComparison.Ordinal); - - private static IReadOnlyList NormalizeRepositories(string? repositories) => - (repositories ?? string.Empty) - .Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) - .Where(static item => !string.IsNullOrWhiteSpace(item)) - .Distinct(StringComparer.OrdinalIgnoreCase) - .ToArray(); - - private static string? NormalizeOptional(string? value) - { - var normalized = (value ?? string.Empty).Trim(); - return normalized.Length == 0 ? null : normalized; - } - - private static string SanitizeSegment(string value) - { - var builder = new StringBuilder(value.Length); - foreach (var ch in value) - { - if (char.IsLetterOrDigit(ch)) - builder.Append(char.ToLowerInvariant(ch)); - else if (ch is '-' or '_') - builder.Append('-'); - } - - var sanitized = builder.ToString().Trim('-'); - return string.IsNullOrWhiteSpace(sanitized) ? "agent" : sanitized; - } -} - -public sealed record DailyReportTemplateSpec( - string TemplateName, - string SkillName, - string SkillContent, - string ExecutionPrompt, - IReadOnlyList RequiredServiceSlugs, - IReadOnlyList Repositories); - -public sealed record SocialMediaTemplateSpec( - string WorkflowId, - string WorkflowName, - string DisplayName, - string WorkflowYaml, - string ExecutionPrompt, - IReadOnlyList RequiredServiceSlugs); diff --git a/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderTool.cs b/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderTool.cs index 1f99d3f8c..510b71d87 100644 --- a/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderTool.cs +++ b/agents/Aevatar.GAgents.Authoring.Lark/AgentBuilderTool.cs @@ -1,4 +1,3 @@ -using System.Net; using System.Text.Json; using Aevatar.AI.Abstractions.LLMProviders; using Aevatar.AI.Abstractions.ToolProviders; @@ -7,12 +6,7 @@ using Aevatar.GAgentService.Abstractions.Ports; using Aevatar.Foundation.Abstractions; using Aevatar.GAgents.Channel.Runtime; -using Aevatar.GAgents.Platform.Lark; using Aevatar.GAgents.Scheduled; -using Aevatar.Studio.Application.Studio.Abstractions; -using Aevatar.Workflow.Application.Abstractions.Runs; -using Google.Protobuf; -using Google.Protobuf.WellKnownTypes; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; @@ -22,10 +16,6 @@ public sealed class AgentBuilderTool : IAgentTool { private readonly IServiceProvider _serviceProvider; private readonly ILogger? _logger; - // Per-instance polling budget for actor -> projector -> document store - // propagation. Defaults to ProjectionWaitDefaults (15 s); tests inject - // shrunk values via the constructor instead of mutating a process-global, - // which would race other tests if the test surface ever parallelizes. private readonly int _projectionWaitAttempts; private readonly int _projectionWaitDelayMilliseconds; @@ -44,8 +34,9 @@ public AgentBuilderTool( public string Name => "agent_builder"; public string Description => - "Create and manage persistent user-facing automation agents for the current channel context. " + - "Actions: list_templates, create_agent, list_agents, agent_status, run_agent, disable_agent, enable_agent, delete_agent."; + "List and manage the caller's persistent automation agents. " + + "Actions: list_agents, agent_status, run_agent, disable_agent, enable_agent, delete_agent. " + + "Agent creation is not handled here — recipes for new agents live as Ornn skills."; // Note (issue #466): no `owner_nyx_user_id` parameter is exposed. The tool always // operates on the caller's own agents; the resolver derives ownership from the @@ -58,73 +49,22 @@ public AgentBuilderTool( "properties": { "action": { "type": "string", - "enum": ["list_templates", "create_agent", "list_agents", "agent_status", "run_agent", "disable_agent", "enable_agent", "delete_agent"] - }, - "template": { - "type": "string", - "description": "Template name, currently supports daily_report and social_media" + "enum": ["list_agents", "agent_status", "run_agent", "disable_agent", "enable_agent", "delete_agent"] }, "agent_id": { "type": "string", - "description": "Optional stable actor ID. Auto-generated when omitted." - }, - "github_username": { - "type": "string", - "description": "GitHub username for the daily_report template" - }, - "save_github_username_preference": { - "type": "boolean", - "description": "When true, save github_username as the owner-scoped default preference after a successful daily_report creation" - }, - "topic": { - "type": "string", - "description": "Primary topic or campaign focus for the social_media template" - }, - "audience": { - "type": "string", - "description": "Optional audience descriptor for the social_media template" - }, - "style": { - "type": "string", - "description": "Optional tone/style instruction for the social_media template" - }, - "repositories": { - "type": "string", - "description": "Optional comma-separated repositories to prioritize" - }, - "schedule_cron": { - "type": "string", - "description": "Cron expression for future executions" - }, - "schedule_timezone": { - "type": "string", - "description": "IANA or system timezone ID (default: UTC)" - }, - "conversation_id": { - "type": "string", - "description": "Override outbound conversation/chat ID. Defaults to current channel context." - }, - "nyx_provider_slug": { - "type": "string", - "description": "Outbound Nyx proxy slug (default: api-lark-bot)" - }, - "publish_provider_slug": { - "type": "string", - "description": "Optional Nyx proxy slug used to publish approved content (default: api-twitter for the social_media template)" - }, - "run_immediately": { - "type": "boolean", - "description": "When true, trigger one execution right after creation" + "description": "Stable actor ID. Required for every action except list_agents." }, "confirm": { "type": "boolean", - "description": "Must be true to execute delete_agent" + "description": "Must be true to execute delete_agent." }, "revision_feedback": { "type": "string", - "description": "Optional revision guidance to include in the next workflow-backed run" + "description": "Optional revision guidance to include in the next run." } - } + }, + "required": ["action"] } """; @@ -138,18 +78,13 @@ public async Task ExecuteAsync(string argumentsJson, CancellationToken c if (args.HasParseError) return JsonSerializer.Serialize(new { error = args.ParseError }); - var action = args.Str("action", "list_templates"); - if (string.Equals(action, "list_templates", StringComparison.Ordinal)) - return JsonSerializer.Serialize(new { templates = AgentBuilderTemplates.ListTemplates() }); - var queryPort = _serviceProvider.GetService(); var nyxClient = _serviceProvider.GetService(); var skillRunnerPort = _serviceProvider.GetService(); - var workflowAgentPort = _serviceProvider.GetService(); var catalogCommandPort = _serviceProvider.GetService(); var callerScopeResolver = _serviceProvider.GetService(); if (queryPort is null || nyxClient is null || - skillRunnerPort is null || workflowAgentPort is null || catalogCommandPort is null || + skillRunnerPort is null || catalogCommandPort is null || callerScopeResolver is null) { return """{"error":"Agent builder runtime not available. Required services are not registered in DI."}"""; @@ -172,412 +107,25 @@ skillRunnerPort is null || workflowAgentPort is null || catalogCommandPort is nu }); } + var action = args.Str("action", "list_agents"); return action switch { - "create_agent" => await CreateAgentAsync(args, queryPort, skillRunnerPort, workflowAgentPort, nyxClient, token, caller, ct), "list_agents" => await ListAgentsAsync(queryPort, caller, ct), "agent_status" => await GetAgentStatusAsync(args, queryPort, caller, ct), - "run_agent" => await RunAgentAsync(args, queryPort, skillRunnerPort, workflowAgentPort, caller, ct), - "disable_agent" => await DisableAgentAsync(args, queryPort, skillRunnerPort, workflowAgentPort, caller, ct), - "enable_agent" => await EnableAgentAsync(args, queryPort, skillRunnerPort, workflowAgentPort, caller, ct), - "delete_agent" => await DeleteAgentAsync(args, queryPort, catalogCommandPort, skillRunnerPort, workflowAgentPort, nyxClient, token, caller, ct), + "run_agent" => await RunAgentAsync(args, queryPort, skillRunnerPort, caller, ct), + "disable_agent" => await DisableAgentAsync(args, queryPort, skillRunnerPort, caller, ct), + "enable_agent" => await EnableAgentAsync(args, queryPort, skillRunnerPort, caller, ct), + "delete_agent" => await DeleteAgentAsync(args, queryPort, catalogCommandPort, skillRunnerPort, nyxClient, token, caller, ct), _ => JsonSerializer.Serialize(new { error = $"Unsupported action '{action}'" }), }; } - private async Task CreateAgentAsync( - BuilderArgs args, - IUserAgentCatalogQueryPort queryPort, - ISkillRunnerCommandPort skillRunnerPort, - IWorkflowAgentCommandPort workflowAgentPort, - NyxIdApiClient nyxClient, - string token, - OwnerScope caller, - CancellationToken ct) - { - var chatType = AgentToolRequestContext.TryGet(ChannelMetadataKeys.ChatType); - if (!string.IsNullOrWhiteSpace(chatType) && - !string.Equals(chatType, "p2p", StringComparison.OrdinalIgnoreCase)) - { - return """{"error":"Day One agent creation only supports private chat (chat_type=p2p)."}"""; - } - - var template = (args.Str("template") ?? string.Empty).Trim(); - return template.ToLowerInvariant() switch - { - "daily_report" => await CreateDailyReportAgentAsync(args, queryPort, skillRunnerPort, nyxClient, token, caller, ct), - "social_media" => await CreateSocialMediaAgentAsync(args, queryPort, workflowAgentPort, nyxClient, token, caller, ct), - _ => JsonSerializer.Serialize(new { error = $"Unsupported template '{template}'. Supported templates: daily_report, social_media." }), - }; - } - - private async Task CreateDailyReportAgentAsync( - BuilderArgs args, - IUserAgentCatalogQueryPort queryPort, - ISkillRunnerCommandPort skillRunnerPort, - NyxIdApiClient nyxClient, - string token, - OwnerScope caller, - CancellationToken ct) - { - var rawScopeId = NormalizeOptional(AgentToolRequestContext.TryGet(ChannelMetadataKeys.RegistrationScopeId)); - var configScopeId = NormalizeScopeId(rawScopeId); - // Bot's RegistrationScopeId is per-NyxID-account (one bot = one scope), so multiple - // Lark users sharing one bot would otherwise share a single UserConfigGAgent and - // overwrite each other's saved github_username (issue #436). Compose a per-end-user - // scope from the channel sender for personal-preference reads/writes only; - // SkillRunner.ScopeId stays bot-scoped for downstream NyxID-tenant tools. - var userConfigScopeId = ChannelUserConfigScope.FromMetadata(AgentToolRequestContext.CurrentMetadata); - var githubUsernameResolution = await ResolveDailyReportGithubUsernameAsync( - args, - nyxClient, - token, - userConfigScopeId, - ct); - if (githubUsernameResolution.ErrorResponse is not null) - return githubUsernameResolution.ErrorResponse; - - if (!AgentBuilderTemplates.TryBuildDailyReportSpec( - githubUsernameResolution.GithubUsername ?? string.Empty, - args.Str("repositories"), - out var templateSpec, - out var templateError)) - { - return JsonSerializer.Serialize(new { error = templateError }); - } - - var scheduleCron = args.Str("schedule_cron"); - if (string.IsNullOrWhiteSpace(scheduleCron)) - return """{"error":"schedule_cron is required for create_agent"}"""; - - var scheduleTimezone = args.Str("schedule_timezone") ?? SkillRunnerDefaults.DefaultTimezone; - if (!ChannelScheduleCalculator.TryGetNextOccurrence(scheduleCron, scheduleTimezone, DateTimeOffset.UtcNow, out var nextRunAtUtc, out var cronError)) - return JsonSerializer.Serialize(new { error = $"Invalid schedule: {cronError}" }); - - var conversationId = args.Str("conversation_id") - ?? AgentToolRequestContext.TryGet(ChannelMetadataKeys.ConversationId); - if (string.IsNullOrWhiteSpace(conversationId)) - return """{"error":"conversation_id is required when no current channel conversation is available"}"""; - - var ownerNyxUserId = caller.NyxUserId; - - var gitHubAuthorizationResponse = await BuildGitHubAuthorizationResponseAsync( - nyxClient, - token, - ct, - submittedGithubUsername: githubUsernameResolution.GithubUsername); - if (!string.IsNullOrWhiteSpace(gitHubAuthorizationResponse)) - return gitHubAuthorizationResponse; - - var providerSlug = (args.Str("nyx_provider_slug") ?? "api-lark-bot").Trim(); - var serviceResolution = await ResolveProxyServiceIdsAsync(nyxClient, token, templateSpec!.RequiredServiceSlugs, ct); - if (serviceResolution.ErrorJson != null) - return serviceResolution.ErrorJson; - - // Issue #423 §C — capture the inbound channel-bot slug as a failure-notification - // fallback. By definition the user can be reached through the bot they just - // messaged, so when a primary outbound delivery is rejected (e.g. cross-tenant - // Lark `99992364`) the failure-notification message can still land if the agent's - // API key is allowed to route through the inbound bot. Optional: if the inbound - // slug is not registered as a per-user UserService row (or equals the primary, - // in which case the fallback would just hit the same proxy), we leave the field - // empty and TrySendFailureAsync degrades to the current single-attempt behavior. - var failureNotificationContext = ResolveFailureNotificationContext( - providerSlug, - serviceResolution.RequiredIds!, - serviceResolution.EligibleIdBySlug); - - var agentId = string.IsNullOrWhiteSpace(args.Str("agent_id")) - ? SkillRunnerDefaults.GenerateActorId() - : args.Str("agent_id")!.Trim(); - - var createKeyResponse = await nyxClient.CreateApiKeyAsync( - token, - BuildCreateApiKeyPayload(agentId, failureNotificationContext.AllowedServiceIds), - ct); - - if (IsErrorPayload(createKeyResponse)) - return createKeyResponse; - - if (!TryParseApiKeyCreateResponse(createKeyResponse, out var apiKeyId, out var apiKeyValue, out var apiKeyError)) - return JsonSerializer.Serialize(new { error = apiKeyError }); - - // Issue aevatarAI/aevatar#411 / #417 follow-up: catch in-flight GitHub-side issues. - // The earlier `BuildGitHubAuthorizationResponseAsync` check covers the "no provider - // token at all" case; this preflight catches misconfigurations that only surface at - // request time (the original case under #421 was a missing `User-Agent` header that - // GitHub rejects with 403; OAuth grant revocation is the other one). - // - // PR #418 review r3141846175: revoke the freshly-minted key on preflight failure so - // each `/daily` retry doesn't leave another orphan proxy-scoped key behind in the - // user's NyxID account. The revoke is best-effort cleanup, not a safety claim about - // the key's correctness. - var preflight = await PreflightGitHubProxyAsync( - nyxClient, - apiKeyValue!, - githubUsernameResolution.GithubUsername ?? string.Empty, - templateSpec!.Repositories, - providerSlug, - ct); - if (preflight is not null) - { - await BestEffortRevokeApiKeyAsync(nyxClient, token, apiKeyId!, "github_preflight_failed", ct); - return preflight; - } - - // Pre-create version baseline. Use the caller-scoped version probe — for an agent - // the caller is about to own (not yet existing), the probe returns null so - // versionBefore stays at -1, which is what the create-confirmation wait expects. - var versionBefore = await queryPort.GetStateVersionForCallerAsync(agentId, caller, ct) ?? -1; - - var deliveryTarget = ResolveDeliveryTarget(conversationId, agentId); -#pragma warning disable CS0612 // legacy fields written for rollback safety during owner_scope migration - var outboundConfig = new SkillRunnerOutboundConfig - { - ConversationId = conversationId.Trim(), - NyxProviderSlug = providerSlug, - NyxApiKey = apiKeyValue!, - OwnerNyxUserId = ownerNyxUserId!, - Platform = caller.Platform, - ApiKeyId = apiKeyId!, - LarkReceiveId = deliveryTarget.Primary.ReceiveId, - LarkReceiveIdType = deliveryTarget.Primary.ReceiveIdType, - LarkReceiveIdFallback = deliveryTarget.Fallback?.ReceiveId ?? string.Empty, - LarkReceiveIdTypeFallback = deliveryTarget.Fallback?.ReceiveIdType ?? string.Empty, - OwnerScope = caller.Clone(), - FailureNotificationProviderSlug = failureNotificationContext.FailureSlug ?? string.Empty, - }; -#pragma warning restore CS0612 - - var initialize = new InitializeSkillRunnerCommand - { - SkillName = templateSpec.SkillName, - TemplateName = templateSpec.TemplateName, - SkillContent = templateSpec.SkillContent, - ExecutionPrompt = templateSpec.ExecutionPrompt, - ScheduleCron = scheduleCron.Trim(), - ScheduleTimezone = scheduleTimezone.Trim(), - Enabled = true, - ScopeId = configScopeId, - ProviderName = SkillRunnerDefaults.DefaultProviderName, - MaxToolRounds = SkillRunnerDefaults.DefaultMaxToolRounds, - MaxHistoryMessages = SkillRunnerDefaults.DefaultMaxHistoryMessages, - OutboundConfig = outboundConfig, - }; - - var runImmediatelyRequested = args.Bool("run_immediately") == true; - await skillRunnerPort.InitializeAsync(agentId, initialize, runImmediatelyRequested, ct); - - var confirmed = await WaitForCreatedAgentAsync( - queryPort, - agentId, - caller, - versionBefore, - entry => string.Equals(entry.AgentType, SkillRunnerDefaults.AgentType, StringComparison.Ordinal) && - string.Equals(entry.TemplateName, templateSpec.TemplateName, StringComparison.Ordinal), - ct, - maxAttempts: runImmediatelyRequested ? 20 : 10); - - var savePreferenceRequested = args.Bool("save_github_username_preference") == true; - var preferenceSaved = await SaveGithubUsernamePreferenceIfRequestedAsync( - userConfigScopeId, - githubUsernameResolution.GithubUsername ?? string.Empty, - savePreferenceRequested, - ct); - - return JsonSerializer.Serialize(new - { - status = confirmed ? "created" : "accepted", - agent_id = agentId, - agent_type = SkillRunnerDefaults.AgentType, - template = templateSpec.TemplateName, - github_username = githubUsernameResolution.GithubUsername, - github_username_preference_saved = preferenceSaved, - run_immediately_requested = runImmediatelyRequested, - next_scheduled_run = nextRunAtUtc, - conversation_id = conversationId, - api_key_id = apiKeyId, - note = confirmed ? "" : "Agent initialization accepted but registry projection is not yet confirmed.", - }); - } - - private async Task CreateSocialMediaAgentAsync( - BuilderArgs args, - IUserAgentCatalogQueryPort queryPort, - IWorkflowAgentCommandPort workflowAgentPort, - NyxIdApiClient nyxClient, - string token, - OwnerScope caller, - CancellationToken ct) - { - var scopeId = AgentToolRequestContext.TryGet(ChannelMetadataKeys.RegistrationScopeId); - if (string.IsNullOrWhiteSpace(scopeId)) - return """{"error":"scope_id is required for the social_media template"}"""; - - var workflowCommandPort = _serviceProvider.GetService(); - if (workflowCommandPort is null) - return """{"error":"Scope workflow command port is not registered."}"""; - - var scheduleCron = args.Str("schedule_cron"); - if (string.IsNullOrWhiteSpace(scheduleCron)) - return """{"error":"schedule_cron is required for create_agent"}"""; - - var scheduleTimezone = args.Str("schedule_timezone") ?? WorkflowAgentDefaults.DefaultTimezone; - if (!ChannelScheduleCalculator.TryGetNextOccurrence(scheduleCron, scheduleTimezone, DateTimeOffset.UtcNow, out var nextRunAtUtc, out var cronError)) - return JsonSerializer.Serialize(new { error = $"Invalid schedule: {cronError}" }); - - var conversationId = args.Str("conversation_id") - ?? AgentToolRequestContext.TryGet(ChannelMetadataKeys.ConversationId); - if (string.IsNullOrWhiteSpace(conversationId)) - return """{"error":"conversation_id is required when no current channel conversation is available"}"""; - - var ownerNyxUserId = caller.NyxUserId; - - var providerSlug = (args.Str("nyx_provider_slug") ?? "api-lark-bot").Trim(); - // The social_media template now publishes the approved post to Twitter (X) via the - // api-twitter NyxID proxy in addition to delivering the approval card via api-lark-bot - // (issue #216). Mint the agent api-key with both slugs so a single key carries both - // entitlements; without api-twitter here, NyxID's `allowed_service_ids` enforcement - // (api_keys.rs / proxy.rs) would 403 every publish call regardless of OAuth scope. - var publishProviderSlug = (args.Str("publish_provider_slug") ?? "api-twitter").Trim(); - - var agentId = string.IsNullOrWhiteSpace(args.Str("agent_id")) - ? WorkflowAgentDefaults.GenerateActorId() - : args.Str("agent_id")!.Trim(); - - if (!AgentBuilderTemplates.TryBuildSocialMediaSpec( - agentId, - args.Str("topic") ?? string.Empty, - args.Str("audience"), - args.Str("style"), - providerSlug, - publishProviderSlug, - out var templateSpec, - out var templateError)) - { - return JsonSerializer.Serialize(new { error = templateError }); - } - - // Resolve service IDs from the spec's authoritative slug list (parity with - // daily_report's TemplateSpec.RequiredServiceSlugs — PR #461 review item #6). Inlined - // hardcoded `[providerSlug, publishProviderSlug]` was fine for two slugs but would - // drift if a third slug were ever added; route through the spec so the source of - // truth lives next to the workflow YAML. - var serviceResolution = await ResolveProxyServiceIdsAsync( - nyxClient, - token, - templateSpec!.RequiredServiceSlugs, - ct); - if (serviceResolution.ErrorJson != null) - return serviceResolution.ErrorJson; - - var createKeyResponse = await nyxClient.CreateApiKeyAsync( - token, - BuildCreateApiKeyPayload(agentId, serviceResolution.RequiredIds!), - ct); - - if (IsErrorPayload(createKeyResponse)) - return createKeyResponse; - - if (!TryParseApiKeyCreateResponse(createKeyResponse, out var apiKeyId, out var apiKeyValue, out var apiKeyError)) - return JsonSerializer.Serialize(new { error = apiKeyError }); - - // Mirror the daily_report preflight (#411 / #418) for Twitter: the user may not have - // connected Twitter at NyxID yet, or may have revoked the OAuth grant at x.com between - // connect-time and create-time. Surfacing 401/403 here keeps us from persisting a - // social_media agent whose every approved post would fail at publish time. Best-effort - // revoke the freshly minted key on failure so retries don't accumulate orphan keys. - var preflight = await PreflightTwitterProxyAsync(nyxClient, apiKeyValue!, publishProviderSlug, ct); - if (preflight is not null) - { - await BestEffortRevokeApiKeyAsync(nyxClient, token, apiKeyId!, "twitter_preflight_failed", ct); - return preflight; - } - - var workflowUpsert = await workflowCommandPort.UpsertAsync( - new ScopeWorkflowUpsertRequest( - scopeId.Trim(), - templateSpec!.WorkflowId, - templateSpec.WorkflowYaml, - templateSpec.WorkflowName, - templateSpec.DisplayName), - ct); - - var versionBefore = await queryPort.GetStateVersionForCallerAsync(agentId, caller, ct) ?? -1; - - var deliveryTarget = ResolveDeliveryTarget(conversationId, agentId); -#pragma warning disable CS0612 // legacy fields written for rollback safety during owner_scope migration - var initialize = new InitializeWorkflowAgentCommand - { - WorkflowId = workflowUpsert.Workflow.WorkflowId, - WorkflowName = templateSpec.WorkflowName, - WorkflowActorId = workflowUpsert.Workflow.ActorId, - ExecutionPrompt = templateSpec.ExecutionPrompt, - ScheduleCron = scheduleCron.Trim(), - ScheduleTimezone = scheduleTimezone.Trim(), - ConversationId = conversationId.Trim(), - NyxProviderSlug = providerSlug, - NyxApiKey = apiKeyValue!, - OwnerNyxUserId = ownerNyxUserId!, - Platform = caller.Platform, - ApiKeyId = apiKeyId!, - Enabled = true, - ScopeId = scopeId.Trim(), - LarkReceiveId = deliveryTarget.Primary.ReceiveId, - LarkReceiveIdType = deliveryTarget.Primary.ReceiveIdType, - LarkReceiveIdFallback = deliveryTarget.Fallback?.ReceiveId ?? string.Empty, - LarkReceiveIdTypeFallback = deliveryTarget.Fallback?.ReceiveIdType ?? string.Empty, - OwnerScope = caller.Clone(), - }; -#pragma warning restore CS0612 - - // Initialize via the workflow-agent command port; observation lives in - // the polling loop below since it crosses actors (Workflow → catalog). - // We split run-immediately into a follow-up TriggerAsync so the trigger - // fires only after the catalog projection confirms creation. - await workflowAgentPort.InitializeAsync(agentId, initialize, runImmediately: false, ct); - - var confirmed = await WaitForCreatedAgentAsync( - queryPort, - agentId, - caller, - versionBefore, - entry => string.Equals(entry.AgentType, WorkflowAgentDefaults.AgentType, StringComparison.Ordinal) && - string.Equals(entry.TemplateName, WorkflowAgentDefaults.TemplateName, StringComparison.Ordinal), - ct, - maxAttempts: args.Bool("run_immediately") == true ? 20 : 10); - - if (args.Bool("run_immediately") == true && confirmed) - { - await workflowAgentPort.TriggerAsync(agentId, "create_agent", revisionFeedback: null, ct); - } - - return JsonSerializer.Serialize(new - { - status = confirmed ? "created" : "accepted", - agent_id = agentId, - agent_type = WorkflowAgentDefaults.AgentType, - template = WorkflowAgentDefaults.TemplateName, - next_scheduled_run = nextRunAtUtc, - conversation_id = conversationId, - workflow_id = workflowUpsert.Workflow.WorkflowId, - workflow_actor_id = workflowUpsert.Workflow.ActorId, - api_key_id = apiKeyId, - note = confirmed - ? string.Empty - : args.Bool("run_immediately") == true - ? "Agent initialization accepted but registry projection is not yet confirmed, so the immediate run was not triggered. Use Run Now after the agent appears." - : "Agent initialization accepted but registry projection is not yet confirmed.", - }); - } - private async Task ListAgentsAsync( IUserAgentCatalogQueryPort queryPort, OwnerScope caller, CancellationToken ct) { var agents = await QueryAgentsForCallerAsync(queryPort, caller, ct); - return JsonSerializer.Serialize(new { agents, total = agents.Length }); } @@ -603,7 +151,6 @@ private async Task DeleteAgentAsync( IUserAgentCatalogQueryPort queryPort, IUserAgentCatalogCommandPort catalogCommandPort, ISkillRunnerCommandPort skillRunnerPort, - IWorkflowAgentCommandPort workflowAgentPort, NyxIdApiClient nyxClient, string token, OwnerScope caller, @@ -628,19 +175,15 @@ private async Task DeleteAgentAsync( }); } - // Disable via the typed lifecycle port (dispatch + projection priming happen there); - // skip if the agent type isn't managed. var disableResult = await TryDispatchLifecycleAsync( entry, "delete_agent", LifecycleAction.Disable, revisionFeedback: null, - skillRunnerPort, workflowAgentPort, ct); + skillRunnerPort, ct); if (disableResult.error != null) return disableResult.error; if (!string.IsNullOrWhiteSpace(entry.ApiKeyId)) await nyxClient.DeleteApiKeyAsync(token, entry.ApiKeyId, ct); - // Tombstone via UserAgentCatalogCommandPort; port owns priming + - // version observation and returns an honest accepted/observed status. var tombstoneResult = await catalogCommandPort.TombstoneAsync(entry.AgentId, ct); var deleted = tombstoneResult.Outcome == CatalogCommandOutcome.Observed; @@ -675,7 +218,6 @@ private async Task RunAgentAsync( BuilderArgs args, IUserAgentCatalogQueryPort queryPort, ISkillRunnerCommandPort skillRunnerPort, - IWorkflowAgentCommandPort workflowAgentPort, OwnerScope caller, CancellationToken ct) { @@ -690,12 +232,11 @@ private async Task RunAgentAsync( if (!SupportsManagedLifecycle(entry.AgentType)) return JsonSerializer.Serialize(new { error = $"Agent '{entry.AgentId}' does not support run_agent" }); - if (string.Equals(entry.Status, SkillRunnerDefaults.StatusDisabled, StringComparison.Ordinal) || - string.Equals(entry.Status, WorkflowAgentDefaults.StatusDisabled, StringComparison.Ordinal)) + if (string.Equals(entry.Status, SkillRunnerDefaults.StatusDisabled, StringComparison.Ordinal)) return JsonSerializer.Serialize(new { error = $"Agent '{entry.AgentId}' is disabled. Enable it before running." }); var revisionFeedback = NormalizeOptional(args.Str("revision_feedback")); - var dispatch = await TryDispatchLifecycleAsync(entry, "run_agent", LifecycleAction.Run, revisionFeedback, skillRunnerPort, workflowAgentPort, ct); + var dispatch = await TryDispatchLifecycleAsync(entry, "run_agent", LifecycleAction.Run, revisionFeedback, skillRunnerPort, ct); if (dispatch.error != null) return dispatch.error; @@ -714,7 +255,6 @@ private async Task DisableAgentAsync( BuilderArgs args, IUserAgentCatalogQueryPort queryPort, ISkillRunnerCommandPort skillRunnerPort, - IWorkflowAgentCommandPort workflowAgentPort, OwnerScope caller, CancellationToken ct) { @@ -722,8 +262,7 @@ private async Task DisableAgentAsync( if (entry.error != null) return entry.error; - if (string.Equals(entry.value!.Status, SkillRunnerDefaults.StatusDisabled, StringComparison.Ordinal) || - string.Equals(entry.value.Status, WorkflowAgentDefaults.StatusDisabled, StringComparison.Ordinal)) + if (string.Equals(entry.value!.Status, SkillRunnerDefaults.StatusDisabled, StringComparison.Ordinal)) return SerializeAgentStatus(entry.value, "Agent is already disabled."); // Capture baseline version BEFORE dispatch so the wait can distinguish @@ -733,7 +272,7 @@ private async Task DisableAgentAsync( // against a fast projection that already advanced the version. var versionBefore = await queryPort.GetStateVersionForCallerAsync(entry.value.AgentId, caller, ct) ?? -1; - var dispatch = await TryDispatchLifecycleAsync(entry.value, "disable_agent", LifecycleAction.Disable, null, skillRunnerPort, workflowAgentPort, ct); + var dispatch = await TryDispatchLifecycleAsync(entry.value, "disable_agent", LifecycleAction.Disable, null, skillRunnerPort, ct); if (dispatch.error != null) return dispatch.error; @@ -741,10 +280,6 @@ private async Task DisableAgentAsync( if (observation.Confirmed) return SerializeAgentStatus(observation.Entry!, "Agent disabled. Scheduling paused."); - // Dual gate never passed — the disable was dispatched but the read - // model has not confirmed the lifecycle change within the wait - // budget. Surface the pre-dispatch entry with an honest propagating - // note so the caller (LLM/user) does not assume the agent is paused. return SerializeAgentStatus(entry.value, "Disable submitted. Run /agent-status in a few seconds to confirm the agent is paused."); } @@ -752,7 +287,6 @@ private async Task EnableAgentAsync( BuilderArgs args, IUserAgentCatalogQueryPort queryPort, ISkillRunnerCommandPort skillRunnerPort, - IWorkflowAgentCommandPort workflowAgentPort, OwnerScope caller, CancellationToken ct) { @@ -760,15 +294,12 @@ private async Task EnableAgentAsync( if (entry.error != null) return entry.error; - if (string.Equals(entry.value!.Status, SkillRunnerDefaults.StatusRunning, StringComparison.Ordinal) || - string.Equals(entry.value.Status, WorkflowAgentDefaults.StatusRunning, StringComparison.Ordinal)) + if (string.Equals(entry.value!.Status, SkillRunnerDefaults.StatusRunning, StringComparison.Ordinal)) return SerializeAgentStatus(entry.value, "Agent is already enabled."); - // See DisableAgentAsync for why versionBefore is captured here (before - // any dispatch) and not inside WaitForAgentStatusAsync. var versionBefore = await queryPort.GetStateVersionForCallerAsync(entry.value.AgentId, caller, ct) ?? -1; - var dispatch = await TryDispatchLifecycleAsync(entry.value, "enable_agent", LifecycleAction.Enable, null, skillRunnerPort, workflowAgentPort, ct); + var dispatch = await TryDispatchLifecycleAsync(entry.value, "enable_agent", LifecycleAction.Enable, null, skillRunnerPort, ct); if (dispatch.error != null) return dispatch.error; @@ -776,54 +307,9 @@ private async Task EnableAgentAsync( if (observation.Confirmed) return SerializeAgentStatus(observation.Entry!, "Agent enabled. Scheduling resumed."); - // See DisableAgentAsync for the rationale on the un-confirmed branch. return SerializeAgentStatus(entry.value, "Enable submitted. Run /agent-status in a few seconds to confirm the agent is running."); } - /// - /// Builds the JSON body for POST /api/v1/api-keys when the agent-builder mints a - /// scoped child key for a new agent. Pins allow_all_services = false alongside the - /// resolved allowed_service_ids so the agent's proxy reach is bounded to exactly the - /// catalog slugs the template requires. - /// - /// - /// PR #418 review (4175529548): NyxID's CreateApiKeyRequest.allow_all_services - /// (backend/src/handlers/api_keys.rs:105) is #[serde(default = "default_true")], - /// and proxy enforcement (backend/src/handlers/proxy.rs:1030) only checks - /// allowed_service_ids when !auth_user.allow_all_services. Omitting the field - /// means NyxID stores true, the resolved UserService.id list is persisted but - /// never consulted, and the key has broad proxy reach across every service the parent token - /// can see. Setting false explicitly: - /// - /// activates the enforcement path #417 was written to satisfy, - /// makes the narrow-scope intent first-class instead of relying on the parent - /// delegation token's setting (which is what surfaced the bug in production), and - /// triggers validate_service_ids at create-time - /// (backend/src/services/key_service.rs:183), so a malformed - /// UserService.id fails fast at POST /api-keys instead of silently passing - /// through and 403'ing on every later proxy call. - /// - /// allow_all_nodes stays at the NyxID default — this flow does not restrict node - /// routing, and pinning it would surface a separate boundary that has nothing to do with - /// the agent's service reach. - /// - private static string BuildCreateApiKeyPayload(string agentId, IReadOnlyList requiredServiceIds) - { - if (requiredServiceIds.Count == 0) - throw new InvalidOperationException("requiredServiceIds must not be empty."); - - var payload = new Dictionary - { - ["name"] = $"aevatar-agent-{agentId}", - ["scopes"] = "proxy", - ["platform"] = "generic", - ["allowed_service_ids"] = requiredServiceIds, - ["allow_all_services"] = false, - }; - - return JsonSerializer.Serialize(payload); - } - private static string SerializeAgentStatus(UserAgentCatalogEntry entry, string? note = null) { return JsonSerializer.Serialize(new @@ -888,34 +374,6 @@ private async Task QueryAgentsForCallerAsync( return (entry, null); } - private async Task WaitForCreatedAgentAsync( - IUserAgentCatalogQueryPort queryPort, - string agentId, - OwnerScope caller, - long versionBefore, - Func predicate, - CancellationToken ct, - int maxAttempts = 10, - int delayMilliseconds = 500) - { - for (var attempt = 0; attempt < maxAttempts; attempt++) - { - if (attempt > 0) - await Task.Delay(delayMilliseconds, ct); - - var versionAfter = await queryPort.GetStateVersionForCallerAsync(agentId, caller, ct) ?? -1; - if (versionAfter <= versionBefore) - continue; - - var entry = await queryPort.GetForCallerAsync(agentId, caller, ct); - if (entry != null && predicate(entry)) - return true; - } - - return false; - } - - private async Task<(bool Confirmed, UserAgentCatalogEntry? Entry)> WaitForAgentStatusAsync( IUserAgentCatalogQueryPort queryPort, string agentId, @@ -924,22 +382,14 @@ private async Task WaitForCreatedAgentAsync( string expectedStatus, CancellationToken ct) { - // Status + version dual-condition (mirrors WaitForCreatedAgentAsync): - // wait until the read model both advances past the caller-captured - // baseline AND surfaces the expected status. Status alone is not - // enough — a stale replica can hold an expected-looking historical - // status (e.g., a previous disable→enable→disable cycle) and pass a - // status-only check while the actor has not yet processed *this* - // dispatch. Conversely, version alone is not enough either — an - // unrelated state event could advance the version without changing - // status. Both conditions together pin "this specific lifecycle - // event has materialized in the read model". Caller must capture - // versionBefore *before* dispatch, otherwise a fast projection that - // already advanced the version would make versionAfter == versionBefore - // and burn the entire budget. Projection scope priming also happens - // in the caller before dispatch (see DisableAgentAsync / - // EnableAgentAsync) — a late prime here cannot recover an event the - // projector already missed. + // Status + version dual-condition: wait until the read model both advances past the + // caller-captured baseline AND surfaces the expected status. Status alone is not + // enough — a stale replica can hold an expected-looking historical status (e.g., a + // previous disable→enable→disable cycle) and pass a status-only check while the + // actor has not yet processed *this* dispatch. Conversely, version alone is not + // enough either — an unrelated state event could advance the version without + // changing status. Both conditions together pin "this specific lifecycle event has + // materialized in the read model". for (var attempt = 0; attempt < _projectionWaitAttempts; attempt++) { if (attempt > 0) @@ -954,11 +404,6 @@ private async Task WaitForCreatedAgentAsync( return (Confirmed: true, Entry: entry); } - // Budget exhausted: the dual gate never passed. Do NOT fall back to an - // un-gated GetAsync read — that would surface a stale-but-expected- - // looking entry and let callers report success despite the contract - // not being satisfied. Callers must surface honest "submitted / - // propagating" copy when Confirmed is false. return (Confirmed: false, Entry: null); } @@ -968,829 +413,33 @@ private async Task WaitForCreatedAgentAsync( LifecycleAction action, string? revisionFeedback, ISkillRunnerCommandPort skillRunnerPort, - IWorkflowAgentCommandPort workflowAgentPort, CancellationToken ct) { - if (string.Equals(entry.AgentType, SkillRunnerDefaults.AgentType, StringComparison.Ordinal)) + if (!string.Equals(entry.AgentType, SkillRunnerDefaults.AgentType, StringComparison.Ordinal)) { - switch (action) - { - case LifecycleAction.Run: - await skillRunnerPort.TriggerAsync(entry.AgentId, reason, ct); - break; - case LifecycleAction.Disable: - await skillRunnerPort.DisableAsync(entry.AgentId, reason, ct); - break; - case LifecycleAction.Enable: - await skillRunnerPort.EnableAsync(entry.AgentId, reason, ct); - break; - default: - throw new ArgumentOutOfRangeException(nameof(action), action, null); - } - return (true, null); + return (false, JsonSerializer.Serialize(new { error = $"Agent '{entry.AgentId}' does not support {action.ToString().ToLowerInvariant()}." })); } - if (string.Equals(entry.AgentType, WorkflowAgentDefaults.AgentType, StringComparison.Ordinal)) + switch (action) { - switch (action) - { - case LifecycleAction.Run: - await workflowAgentPort.TriggerAsync(entry.AgentId, reason, revisionFeedback?.Trim(), ct); - break; - case LifecycleAction.Disable: - await workflowAgentPort.DisableAsync(entry.AgentId, reason, ct); - break; - case LifecycleAction.Enable: - await workflowAgentPort.EnableAsync(entry.AgentId, reason, ct); - break; - default: - throw new ArgumentOutOfRangeException(nameof(action), action, null); - } - return (true, null); + case LifecycleAction.Run: + await skillRunnerPort.TriggerAsync(entry.AgentId, reason, ct); + break; + case LifecycleAction.Disable: + await skillRunnerPort.DisableAsync(entry.AgentId, reason, ct); + break; + case LifecycleAction.Enable: + await skillRunnerPort.EnableAsync(entry.AgentId, reason, ct); + break; + default: + throw new ArgumentOutOfRangeException(nameof(action), action, null); } - - return (false, JsonSerializer.Serialize(new { error = $"Agent '{entry.AgentId}' does not support {action.ToString().ToLowerInvariant()}." })); + _ = revisionFeedback; // SkillRunner doesn't accept revision feedback today; reserved for future surfaces. + return (true, null); } private static bool SupportsManagedLifecycle(string? agentType) => - string.Equals(agentType, SkillRunnerDefaults.AgentType, StringComparison.Ordinal) || - string.Equals(agentType, WorkflowAgentDefaults.AgentType, StringComparison.Ordinal); - - private async Task ResolveCurrentUserIdAsync(NyxIdApiClient client, string token, CancellationToken ct) - { - var response = await client.GetCurrentUserAsync(token, ct); - if (IsErrorPayload(response)) - return null; - - try - { - using var doc = JsonDocument.Parse(response); - if (doc.RootElement.TryGetProperty("user", out var user)) - return ReadString(user, "id", "user_id", "sub"); - - return ReadString(doc.RootElement, "id", "user_id", "sub"); - } - catch (JsonException) - { - return null; - } - } - - /// - /// Resolves the per-user UserService.id values that the new agent's API key needs in - /// allowed_service_ids to reach each required catalog slug through the NyxID proxy. - /// - /// - /// Issue aevatarAI/aevatar#417. The previous implementation called - /// GET /api/v1/proxy/services (the catalog list) and pulled out each row's - /// id, which is a DownstreamService.id — a global catalog UUID shared across - /// all users. NyxID's proxy enforcement (backend/src/handlers/proxy.rs:1030) checks the - /// API key's allowed_service_ids against the per-user UserService.id, not the - /// catalog id. The mismatch silently passed at POST /api-keys creation time, then - /// surfaced as 403 ApiKeyScopeForbidden on every proxy call. - /// Why the old code looked correct in development: allow_all_services=true - /// short-circuits the enforcement check (NyxID proxy.rs:1030). Session-token-minted - /// API keys default to true, so a developer reproducing the create-key + proxy-call - /// dance from a CLI never tripped the bug. The agent path mints child keys via the - /// channel-relay delegation token; NyxID forces those children to inherit - /// allow_all_services=false from the parent, which is when enforcement kicks in. - /// The BuildCreateApiKeyPayload change in PR #418 (review 4175529548) makes the - /// narrow-scope intent first-class by setting allow_all_services=false explicitly, - /// so this resolver's output is consulted regardless of the parent's setting. - /// The fix: use GET /api/v1/user-services, which lists this user's - /// UserService instances. For each instance the response carries the per-user - /// id (what enforcement actually checks) plus slug, is_active, and a - /// credential_source envelope. We filter to active rows whose slug matches a required - /// slug, and skip org-shared rows the caller cannot use as a proxy target — those would later - /// surface as a less-actionable org_role_insufficient error. - /// - /// - /// Result of . / - /// are mutually exclusive (success vs. blocking error). Even on - /// success, callers can use to look up optional - /// slugs that were not in requiredSlugs — e.g. the inbound channel-bot slug for - /// SkillRunner's failure-notification fallback (issue #423 §C). Optional lookups must - /// not block agent creation, so they go through this map instead of being added to - /// requiredSlugs (which would cause to - /// return a service_not_connected error if the slug is missing). - /// - private readonly record struct ProxyServiceResolutionResult( - IReadOnlyList? RequiredIds, - string? ErrorJson, - IReadOnlyDictionary EligibleIdBySlug); - - private async Task ResolveProxyServiceIdsAsync( - NyxIdApiClient client, - string token, - IReadOnlyList requiredSlugs, - CancellationToken ct) - { - var emptyEligible = new Dictionary(StringComparer.OrdinalIgnoreCase); - if (requiredSlugs.Count == 0) - { - return new ProxyServiceResolutionResult(null, JsonSerializer.Serialize(new - { - error = "no_required_slugs", - hint = "At least one required Nyx proxy service slug must be provided.", - }), emptyEligible); - } - - var response = await client.ListUserServicesAsync(token, ct); - if (IsErrorPayload(response)) - { - return new ProxyServiceResolutionResult(null, JsonSerializer.Serialize(new - { - error = "user_services_unavailable", - hint = "Could not list connected Nyx user-services. Try again or check NyxID availability.", - }), emptyEligible); - } - - try - { - using var doc = JsonDocument.Parse(response); - // List response shape: { "services": [ {id, slug, is_active, credential_source: {...}}, ... ] } - // The catalog response also nests under "services" (and additionally "custom_services"), - // so reusing EnumerateProxyServiceItems is safe — but we accept *only* rows that look - // like UserService instances by checking presence of `slug`. - // - // Codex review (PR #418 r3141846173): users with mixed bindings can have multiple - // rows for the same slug (e.g. an org-shared `allowed:false` row alongside a personal - // active row). NyxID does not guarantee any ordering, so the resolver must keep the - // *most eligible* row per slug rather than the first one seen. We track the first - // ineligible row anyway so that when no eligible row exists we can still emit a - // specific error (`service_inactive` / `service_org_viewer_only`) instead of a - // generic miss. - var bestBySlug = new Dictionary(StringComparer.OrdinalIgnoreCase); - foreach (var svc in EnumerateProxyServiceItems(doc.RootElement)) - { - var slug = ReadString(svc, "slug"); - if (string.IsNullOrWhiteSpace(slug)) - continue; - - var id = ReadString(svc, "id"); - if (string.IsNullOrWhiteSpace(id)) - continue; - - var isActive = TryReadBool(svc, "is_active") ?? true; - var credentialSource = svc.TryGetProperty("credential_source", out var cs) ? cs : default; - var sourceType = credentialSource.ValueKind == JsonValueKind.Object - ? ReadString(credentialSource, "type") - : null; - var orgAllowed = credentialSource.ValueKind == JsonValueKind.Object - ? TryReadBool(credentialSource, "allowed") - : null; - - var candidate = new ServiceResolution( - Id: id!, - IsActive: isActive, - CredentialSourceType: sourceType, - OrgAllowed: orgAllowed); - - if (bestBySlug.TryGetValue(slug, out var existing)) - { - // Already have an eligible row → never downgrade. - if (existing.IsEligible) - continue; - // Existing is ineligible; only replace with another ineligible row if we - // would otherwise lose information. Replace iff candidate is eligible. - if (!candidate.IsEligible) - continue; - } - - bestBySlug[slug] = candidate; - } - - // Snapshot the eligible (slug → id) map before the per-required-slug check so - // callers can look up optional slugs (e.g. inbound channel-bot for failure- - // notification fallback) without re-listing user-services. Ineligible rows are - // intentionally excluded — including them would let optional lookups silently - // pick up an inactive or org-viewer-only service the API key cannot route through. - var eligibleBySlug = bestBySlug - .Where(static pair => pair.Value.IsEligible) - .ToDictionary( - pair => pair.Key, - pair => pair.Value.Id, - StringComparer.OrdinalIgnoreCase); - - var ids = new List(requiredSlugs.Count); - foreach (var slug in requiredSlugs.Distinct(StringComparer.OrdinalIgnoreCase)) - { - if (!bestBySlug.TryGetValue(slug, out var resolution)) - { - return new ProxyServiceResolutionResult(null, JsonSerializer.Serialize(new - { - error = "service_not_connected", - slug, - hint = $"NyxID has no connected user-service for slug `{slug}`. Connect the provider at NyxID before creating this agent.", - }), emptyEligible); - } - - if (resolution.IsEligible) - { - ids.Add(resolution.Id); - continue; - } - - if (string.Equals(resolution.CredentialSourceType, "org", StringComparison.OrdinalIgnoreCase) && - resolution.OrgAllowed != true) - { - return new ProxyServiceResolutionResult(null, JsonSerializer.Serialize(new - { - error = "service_org_viewer_only", - slug, - hint = $"NyxID user-service for slug `{slug}` is shared by your org but your role does not permit using it as a proxy target. Ask an admin to widen the org role scope, or connect a personal credential.", - }), emptyEligible); - } - - // Remaining ineligible reason: !is_active. - return new ProxyServiceResolutionResult(null, JsonSerializer.Serialize(new - { - error = "service_inactive", - slug, - hint = $"NyxID user-service for slug `{slug}` is inactive. Re-activate it at NyxID before creating this agent.", - }), emptyEligible); - } - - return new ProxyServiceResolutionResult( - ids.Distinct(StringComparer.Ordinal).ToArray(), - null, - eligibleBySlug); - } - catch (JsonException) - { - return new ProxyServiceResolutionResult(null, JsonSerializer.Serialize(new - { - error = "user_services_parse_failed", - hint = "NyxID user-services response was not valid JSON.", - }), emptyEligible); - } - } - - private readonly record struct ServiceResolution( - string Id, - bool IsActive, - string? CredentialSourceType, - bool? OrgAllowed) - { - public bool IsEligible => - IsActive && - !(string.Equals(CredentialSourceType, "org", StringComparison.OrdinalIgnoreCase) && OrgAllowed != true); - } - - /// - /// Result of resolving the inbound channel-bot fallback used by SkillRunner's - /// failure-notification path (issue #423 §C). When the inbound slug is reachable - /// (registered + eligible + distinct from the primary), - /// is set and its corresponding UserService.id is appended to - /// so the agent's API key can route through it - /// at runtime. Otherwise is null and the agent - /// degrades to the existing single-attempt failure notification. - /// - private readonly record struct FailureNotificationContext( - string? FailureSlug, - IReadOnlyList AllowedServiceIds); - - private FailureNotificationContext ResolveFailureNotificationContext( - string primarySlug, - IReadOnlyList requiredIds, - IReadOnlyDictionary eligibleIdBySlug) - { - var inboundSlug = AgentToolRequestContext.TryGet(ChannelMetadataKeys.InboundChannelBotProxySlug)?.Trim(); - if (string.IsNullOrWhiteSpace(inboundSlug)) - return new FailureNotificationContext(null, requiredIds); - - // Same-proxy fallback gives no recovery benefit — a primary rejection at - // `slug=X` would also fail at `slug=X`. Skip the capture so TrySendFailureAsync - // doesn't pay the wasted POST and doesn't double-log the same rejection. - if (string.Equals(inboundSlug, primarySlug, StringComparison.Ordinal)) - return new FailureNotificationContext(null, requiredIds); - - // Optional slug must be a connected, eligible user-service for the API key to - // route through it. If it's not, leaving the failure-notification field empty - // keeps the runtime on the existing single-attempt path — better than persisting - // a slug whose every send would 403 at proxy enforcement time. - if (!eligibleIdBySlug.TryGetValue(inboundSlug, out var inboundId)) - return new FailureNotificationContext(null, requiredIds); - - // Dedupe — if the inbound slug's UserService.id is already in requiredIds the - // expanded list is identical, but we still surface the slug on OutboundConfig so - // the runtime knows to use it for failure notifications. - var allowed = requiredIds.Contains(inboundId, StringComparer.Ordinal) - ? requiredIds - : requiredIds.Append(inboundId).ToArray(); - - return new FailureNotificationContext(inboundSlug, allowed); - } - - private async Task BuildGitHubAuthorizationResponseAsync( - NyxIdApiClient client, - string token, - CancellationToken ct, - bool preferCredentialsRequiredStatus = false, - string? submittedGithubUsername = null) - { - var providerTokensResponse = await client.ListProviderTokensAsync(token, ct); - if (IsErrorPayload(providerTokensResponse)) - { - return JsonSerializer.Serialize(new - { - error = "Could not verify GitHub authorization status from NyxID providers.", - }); - } - - if (HasConnectedGitHubProvider(providerTokensResponse)) - return null; - - var catalogResponse = await client.GetCatalogEntryAsync(token, "api-github", ct); - if (IsErrorPayload(catalogResponse)) - { - return JsonSerializer.Serialize(new - { - error = "GitHub provider configuration is not available in the NyxID catalog.", - }); - } - - if (!TryParseGitHubCatalogEntry( - catalogResponse, - out var providerId, - out var providerType, - out var credentialMode, - out var documentationUrl, - out var catalogError)) - return JsonSerializer.Serialize(new { error = catalogError }); - - if (!string.Equals(providerType, "oauth2", StringComparison.OrdinalIgnoreCase)) - { - return JsonSerializer.Serialize(new - { - error = $"GitHub provider requires unsupported connection mode '{providerType ?? "unknown"}'.", - }); - } - - if (string.Equals(credentialMode, "user", StringComparison.OrdinalIgnoreCase)) - { - var credentialsResponse = await client.GetUserCredentialsAsync(token, providerId!, ct); - if (IsErrorPayload(credentialsResponse)) - return credentialsResponse; - - if (!TryParseUserCredentialsStatus(credentialsResponse, out var hasCredentials, out var credentialsError)) - return JsonSerializer.Serialize(new { error = credentialsError }); - - if (!hasCredentials) - { - return JsonSerializer.Serialize(new - { - status = "credentials_required", - template = "daily_report", - provider = "GitHub", - provider_id = providerId, - documentation_url = documentationUrl, - github_username = submittedGithubUsername, - note = "GitHub in NyxID uses user-managed OAuth app credentials. Set your GitHub OAuth app client_id/client_secret in NyxID first, then submit the daily report form again.", - }); - } - } - - var connectResponse = await client.InitiateOAuthConnectAsync(token, providerId!, ct); - if (IsErrorPayload(connectResponse)) - { - return JsonSerializer.Serialize(new - { - error = "Could not initiate GitHub OAuth connect in NyxID.", - }); - } - - if (!TryParseAuthorizationUrl(connectResponse, out var authorizationUrl, out var authError)) - return JsonSerializer.Serialize(new { error = authError }); - - return JsonSerializer.Serialize(new - { - status = preferCredentialsRequiredStatus ? "credentials_required" : "oauth_required", - template = "daily_report", - provider = "GitHub", - provider_id = providerId, - authorization_url = authorizationUrl, - documentation_url = documentationUrl, - github_username = submittedGithubUsername, - note = preferCredentialsRequiredStatus - ? "Connect GitHub in NyxID, then run /daily again." - : "Connect GitHub in NyxID, then return to Feishu and submit the daily report form again.", - }); - } - - private async Task<(string? GithubUsername, string? ErrorResponse)> ResolveDailyReportGithubUsernameAsync( - BuilderArgs args, - NyxIdApiClient nyxClient, - string token, - string scopeId, - CancellationToken ct) - { - var explicitGithubUsername = NormalizeOptional(args.Str("github_username")); - if (explicitGithubUsername is not null) - return (explicitGithubUsername, null); - - var preferredGithubUsername = await TryResolvePreferredGithubUsernameAsync(scopeId, ct); - if (preferredGithubUsername is not null) - return (preferredGithubUsername, null); - - var derivedGithubUsername = await TryResolveGitHubUsernameFromNyxAsync(nyxClient, token, ct); - if (derivedGithubUsername is not null) - return (derivedGithubUsername, null); - - var authorizationResponse = await BuildGitHubAuthorizationResponseAsync( - nyxClient, - token, - ct, - preferCredentialsRequiredStatus: true); - if (authorizationResponse is not null) - return (null, authorizationResponse); - - return (null, JsonSerializer.Serialize(new - { - status = "credentials_required", - template = "daily_report", - provider = "GitHub", - note = "Could not resolve github_username. Provide github_username explicitly, save a default preference, or reconnect GitHub in NyxID.", - })); - } - - private static bool TryParseApiKeyCreateResponse( - string response, - out string? apiKeyId, - out string? apiKeyValue, - out string? error) - { - apiKeyId = null; - apiKeyValue = null; - error = null; - - try - { - using var doc = JsonDocument.Parse(response); - var root = doc.RootElement; - apiKeyId = ReadString(root, "id", "api_key_id"); - apiKeyValue = ReadString(root, "full_key", "api_key", "token"); - - if ((string.IsNullOrWhiteSpace(apiKeyId) || string.IsNullOrWhiteSpace(apiKeyValue)) && - root.TryGetProperty("api_key", out var nested)) - { - apiKeyId ??= ReadString(nested, "id", "api_key_id"); - apiKeyValue ??= ReadString(nested, "full_key", "token", "value"); - } - - if (string.IsNullOrWhiteSpace(apiKeyId) || string.IsNullOrWhiteSpace(apiKeyValue)) - { - error = "NyxID API key response did not include both id and full_key."; - return false; - } - - return true; - } - catch (JsonException ex) - { - error = ex.Message; - return false; - } - } - - private static bool IsErrorPayload(string payload) - { - try - { - using var doc = JsonDocument.Parse(payload); - if (doc.RootElement.ValueKind != JsonValueKind.Object) - return false; - - return doc.RootElement.TryGetProperty("error", out var errorProp) && - errorProp.ValueKind == JsonValueKind.True; - } - catch (JsonException) - { - return false; - } - } - - private static bool HasConnectedGitHubProvider(string response) - { - try - { - using var doc = JsonDocument.Parse(response); - if (!doc.RootElement.TryGetProperty("tokens", out var tokens) || tokens.ValueKind != JsonValueKind.Array) - return false; - - foreach (var element in tokens.EnumerateArray()) - { - if (!LooksLikeGitHubProvider(element)) - continue; - - return string.Equals( - NormalizeOptional(ReadString(element, "status")), - "active", - StringComparison.OrdinalIgnoreCase); - } - } - catch (JsonException) - { - } - - return false; - } - - private static bool TryParseGitHubCatalogEntry( - string response, - out string? providerId, - out string? providerType, - out string? credentialMode, - out string? documentationUrl, - out string? error) - { - providerId = null; - providerType = null; - credentialMode = null; - documentationUrl = null; - error = null; - - try - { - using var doc = JsonDocument.Parse(response); - providerId = ReadStringDeep(doc.RootElement, 3, "provider_config_id", "provider_id"); - providerType = ReadStringDeep(doc.RootElement, 3, "provider_type"); - credentialMode = ReadStringDeep(doc.RootElement, 3, "credential_mode"); - documentationUrl = ReadStringDeep(doc.RootElement, 3, "documentation_url"); - - if (string.IsNullOrWhiteSpace(providerId)) - { - error = "GitHub catalog entry did not include provider_config_id."; - return false; - } - - return true; - } - catch (JsonException ex) - { - error = ex.Message; - return false; - } - } - - private static bool TryParseUserCredentialsStatus( - string response, - out bool hasCredentials, - out string? error) - { - hasCredentials = false; - error = null; - - try - { - using var doc = JsonDocument.Parse(response); - if (doc.RootElement.TryGetProperty("has_credentials", out var property)) - { - if (property.ValueKind == JsonValueKind.True) - { - hasCredentials = true; - return true; - } - - if (property.ValueKind == JsonValueKind.False) - { - hasCredentials = false; - return true; - } - } - - error = "NyxID user credentials response did not include has_credentials."; - return false; - } - catch (JsonException ex) - { - error = ex.Message; - return false; - } - } - - private static bool TryParseAuthorizationUrl( - string response, - out string? authorizationUrl, - out string? error) - { - authorizationUrl = null; - error = null; - - try - { - using var doc = JsonDocument.Parse(response); - authorizationUrl = ReadStringDeep(doc.RootElement, 3, "authorization_url", "auth_url", "url"); - if (string.IsNullOrWhiteSpace(authorizationUrl)) - { - error = "NyxID OAuth connect response did not include an authorization URL."; - return false; - } - - return true; - } - catch (JsonException ex) - { - error = ex.Message; - return false; - } - } - - private async Task TryResolvePreferredGithubUsernameAsync(string scopeId, CancellationToken ct) - { - var queryPort = _serviceProvider.GetService(); - if (queryPort is null) - return null; - - try - { - var config = await queryPort.GetAsync(scopeId, ct); - return NormalizeOptional(config.GithubUsername); - } - catch (OperationCanceledException) - { - throw; - } - catch - { - return null; - } - } - - private async Task TryResolveGitHubUsernameFromNyxAsync( - NyxIdApiClient client, - string token, - CancellationToken ct) - { - try - { - var response = await client.ProxyRequestAsync( - token, - "api-github", - "user", - "GET", - null, - null, - ct); - if (IsErrorPayload(response)) - return null; - - return TryParseGitHubUserLogin(response, out var login) - ? login - : null; - } - catch (OperationCanceledException) - { - throw; - } - catch - { - return null; - } - } - - private async Task SaveGithubUsernamePreferenceIfRequestedAsync( - string scopeId, - string githubUsername, - bool shouldSave, - CancellationToken ct) - { - if (!shouldSave || string.IsNullOrWhiteSpace(githubUsername)) - return false; - - var commandService = _serviceProvider.GetService(); - if (commandService is null) - return false; - - try - { - await commandService.SaveGithubUsernameAsync(scopeId, githubUsername, ct); - return true; - } - catch (OperationCanceledException) - { - throw; - } - catch - { - return false; - } - } - - private static bool TryParseGitHubUserLogin( - string response, - out string? login) - { - login = null; - - try - { - using var doc = JsonDocument.Parse(response); - login = NormalizeOptional(ReadStringDeep(doc.RootElement, 2, "login", "username")); - return login is not null; - } - catch (JsonException) - { - return false; - } - } - - private static string? ReadString(JsonElement element, params string[] names) - { - if (element.ValueKind != JsonValueKind.Object) - return null; - - foreach (var name in names) - { - if (!element.TryGetProperty(name, out var property)) - continue; - - if (property.ValueKind == JsonValueKind.String) - return property.GetString(); - - if (property.ValueKind == JsonValueKind.Number) - return property.GetRawText(); - } - - return null; - } - - private static string? ReadStringDeep(JsonElement element, int maxDepth, params string[] names) - { - var direct = ReadString(element, names); - if (!string.IsNullOrWhiteSpace(direct) || maxDepth <= 0) - return direct; - - if (element.ValueKind == JsonValueKind.Object) - { - foreach (var property in element.EnumerateObject()) - { - var nested = ReadStringDeep(property.Value, maxDepth - 1, names); - if (!string.IsNullOrWhiteSpace(nested)) - return nested; - } - } - else if (element.ValueKind == JsonValueKind.Array) - { - foreach (var item in element.EnumerateArray()) - { - var nested = ReadStringDeep(item, maxDepth - 1, names); - if (!string.IsNullOrWhiteSpace(nested)) - return nested; - } - } - - return null; - } - - private static bool LooksLikeGitHubProvider(JsonElement element) - { - foreach (var value in EnumerateStrings( - ReadStringDeep(element, 2, "provider_name", "name", "display_name", "slug", "provider", "service_slug"))) - { - if (value.Contains("github", StringComparison.OrdinalIgnoreCase)) - return true; - } - - return false; - } - - private static IEnumerable EnumerateStrings(params string?[] values) - { - foreach (var value in values) - { - if (!string.IsNullOrWhiteSpace(value)) - yield return value; - } - } - - private static IEnumerable EnumerateProxyServiceItems(JsonElement root) - { - if (root.ValueKind == JsonValueKind.Array) - { - foreach (var item in root.EnumerateArray()) - yield return item; - yield break; - } - - if (root.ValueKind != JsonValueKind.Object) - yield break; - - foreach (var propertyName in new[] { "services", "custom_services", "data" }) - { - if (!root.TryGetProperty(propertyName, out var items) || - items.ValueKind != JsonValueKind.Array) - { - continue; - } - - foreach (var item in items.EnumerateArray()) - yield return item; - } - } - - private static string NormalizeScopeId(string? value) => - NormalizeOptional(value) ?? "default"; + string.Equals(agentType, SkillRunnerDefaults.AgentType, StringComparison.Ordinal); private static string? NormalizeOptional(string? value) { @@ -1798,519 +447,6 @@ private static string NormalizeScopeId(string? value) => return normalized.Length == 0 ? null : normalized; } - /// - /// Builds the typed Lark delivery target (primary + optional fallback) from the current - /// AgentToolRequestContext, and emits a LogDebug breadcrumb when the primary fell back from - /// the cross-app safe pair (chat_id / union_id) to the legacy open_id / conversation_id - /// path. The primary is what - /// returns; the fallback (when the primary is a DM chat_id and we also have a union_id at - /// ingress) is captured so the runtime can retry once on a Lark - /// 230002 bot not in chat rejection — the failure mode for cross-app same-tenant - /// deployments where the outbound app is not in the inbound DM. Operators correlating Lark - /// 99992361 open_id cross app rejections need the log line to confirm whether the - /// relay surfaced union_id at agent-create time. - /// - /// - /// Preflights GitHub proxy access using the newly created agent API key. Three-step probe: - /// first /rate_limit (catches token-level OAuth-grant revocation as 401/403), then - /// global /search/issues + /search/commits with the bound github_username - /// (catches scope insufficiency for global search), then per-repo - /// /search/{issues,commits}?q=repo:{owner}/{repo}+author:{username} for every - /// repository in the configured allowlist (catches the case where global public search - /// works but a specific repo in the allowlist is private and the token lacks repo - /// scope — codex review PR #479 r3152148327). - /// - /// Returns a structured error JSON suitable for returning verbatim from the tool on - /// hard-fail shapes; returns null on success or on probe shapes we don't classify - /// as "fundamentally broken" (rate limits, 5xx). - /// - /// - /// Issue aevatarAI/aevatar#411 added the original /rate_limit step to fail fast on - /// a misdiagnosed root cause (we thought the api-key was missing a GitHub binding). Issue - /// #417 fixed that real cause — the api-key now carries the right per-user - /// UserService.ids. The probe was retained because the OAuth grant can still be - /// revoked outside our control. Issue #474 widens the probe surface to /search/* - /// because /rate_limit is scope-light (succeeds with any valid token) and never - /// caught the production failure mode where /search/* 422s every call — agents got - /// persisted but every scheduled run produced an empty report. The freshly minted api-key - /// is best-effort revoked at the call site on any preflight failure so retries don't - /// accumulate orphan proxy-scoped keys. - /// - private async Task PreflightGitHubProxyAsync( - NyxIdApiClient nyxClient, - string apiKey, - string githubUsername, - IReadOnlyList repositories, - string nyxProviderSlug, - CancellationToken ct) - { - // Step 1: cheap read-only endpoint; succeeds even with a rate-limited token, fails with - // 401/403 when the proxy can't resolve a bound GitHub credential. - var rateLimitProbe = await nyxClient.ProxyRequestAsync( - apiKey, - "api-github", - "/rate_limit", - "GET", - body: null, - extraHeaders: null, - ct); - - var rateLimitFailure = ClassifyRateLimitProbeFailure(rateLimitProbe, nyxProviderSlug); - if (rateLimitFailure is not null) - return rateLimitFailure; - - // Step 2: global search-API probes. /rate_limit is scope-light — it returns 200 even - // with a token that GitHub's search engine will reject. Issue #474: all of - // /search/issues and /search/commits return 422 "invalid user/permission" when the - // bound OAuth grant lacks public_repo/repo or the username is unreachable, and the - // daily report is useless if those endpoints don't work. Probe both with per_page=1 so - // we exercise the same auth surface the runtime will hit, without paying for full - // result pages. Skip when no username is bound — the rate_limit step is the only - // signal we have in that case (and CreateDailyReportAgentAsync rejects empty - // github_username earlier, so this guard is defensive only). - var normalizedUser = (githubUsername ?? string.Empty).Trim(); - if (string.IsNullOrEmpty(normalizedUser)) - return null; - - var encodedUser = Uri.EscapeDataString(normalizedUser); - var globalSearchPaths = new (string Path, string Label)[] - { - ($"/search/issues?q=author:{encodedUser}&per_page=1", "/search/issues"), - ($"/search/commits?q=author:{encodedUser}&per_page=1", "/search/commits"), - }; - foreach (var (path, label) in globalSearchPaths) - { - var searchProbe = await nyxClient.ProxyRequestAsync( - apiKey, - "api-github", - path, - "GET", - body: null, - extraHeaders: null, - ct); - - var searchFailure = ClassifySearchProbeFailure(searchProbe, label, normalizedUser, nyxProviderSlug); - if (searchFailure is not null) - return searchFailure; - } - - // Step 3: per-repo search-API probes when a repository allowlist is configured. The - // runtime daily report runs `repo:{owner}/{repo}+author:{username}` queries (see - // AgentBuilderTemplates.cs repo-mode URL list) — different auth surface from the - // global search above, because GitHub enforces per-repo visibility. A token with - // public_repo can pass global search yet 422 every repo-scoped call when one of the - // listed repos is private. Codex review PR #479 r3152148327: probing only global - // queries leaves that case persisting broken agents, so loop the repos here. - if (repositories is null || repositories.Count == 0) - return null; - - foreach (var repoEntry in repositories) - { - var trimmedRepo = (repoEntry ?? string.Empty).Trim(); - if (string.IsNullOrEmpty(trimmedRepo)) - continue; - - // GitHub usernames and repo names are restricted to [a-zA-Z0-9-._] per the - // github.com identifier rules — none of which need percent-encoding. The slash - // separator must be preserved literally (Uri.EscapeDataString would emit %2F, - // which GitHub's q= parser does not consistently accept). Pass repoEntry through - // unescaped; defense-in-depth escaping happens on the username segment. - var repoSearchPaths = new (string Path, string Label)[] - { - ($"/search/issues?q=repo:{trimmedRepo}+author:{encodedUser}&per_page=1", $"/search/issues (repo={trimmedRepo})"), - ($"/search/commits?q=repo:{trimmedRepo}+author:{encodedUser}&per_page=1", $"/search/commits (repo={trimmedRepo})"), - }; - foreach (var (path, label) in repoSearchPaths) - { - var searchProbe = await nyxClient.ProxyRequestAsync( - apiKey, - "api-github", - path, - "GET", - body: null, - extraHeaders: null, - ct); - - var searchFailure = ClassifySearchProbeFailure(searchProbe, label, normalizedUser, nyxProviderSlug); - if (searchFailure is not null) - return searchFailure; - } - } - - return null; - } - - /// - /// Maps a /rate_limit probe response onto a fail-fast structured error or null. - /// Only 401/403 are fail-fast; all other shapes (200, 5xx, transient errors, malformed - /// JSON) flow through so creation can proceed and the operator can debug from logs. - /// - /// - /// `NyxIdApiClient.SendAsync` (NyxIdApiClient.cs:710) wraps HTTP non-2xx as - /// {"error": true, "status": <http>, "body": "<raw downstream body>"} — - /// status, not code. Reviewer (PR #412 r3141699476): the previous parser only - /// read code, so for the actual #411 production failures (HTTP 403 from - /// /api/v1/proxy/s/api-github/rate_limit) it set status=0, returned null, and - /// persisted a daily_report agent that would fail at runtime. Read both status (the - /// SendAsync envelope) AND code (any future inverted-naming envelope or top-level - /// Lark code). - /// - private static string? ClassifyRateLimitProbeFailure(string probe, string nyxProviderSlug) - { - if (string.IsNullOrWhiteSpace(probe)) - return null; - - try - { - using var doc = JsonDocument.Parse(probe); - var root = doc.RootElement; - // `envelopeMessage` is the proxy envelope's `message` field; named to avoid - // shadowing the anonymous-type `detail` property below (codex review PR #479). - if (!IsErrorEnvelope(root, out var status, out var envelopeMessage, out var body)) - return null; - - if (status != (int)HttpStatusCode.Unauthorized && status != (int)HttpStatusCode.Forbidden) - return null; - - return JsonSerializer.Serialize(new - { - error = "github_proxy_access_denied", - detail = string.IsNullOrWhiteSpace(envelopeMessage) ? "GitHub proxy returned 401/403 for the new agent API key." : envelopeMessage, - http_status = status, - proxy_body = string.IsNullOrWhiteSpace(body) ? null : body, - hint = "GitHub returned 401/403 through the NyxID proxy. Common causes: (a) the OAuth grant for GitHub was revoked at github.com/settings/applications or its scopes were downgraded — re-authorize the GitHub provider at NyxID; (b) the request reached GitHub without a User-Agent header (NyxIdApiClient now sends a default; if you see this, check that the deployed binary includes that fix). The agent will not produce a useful daily report until proxy access succeeds.", - nyx_provider_slug = nyxProviderSlug, - }); - } - catch (JsonException) - { - // Non-JSON probe response: don't pretend we know what's going on; let creation - // proceed so the agent can at least be created (operator can debug from logs). - return null; - } - } - - /// - /// Maps a /search/{issues,commits} probe response onto a fail-fast structured - /// error or null. Only 422 is fail-fast (the documented "invalid user/permission" / - /// "validation failed" surface); all other shapes (200 with empty results, 200 with - /// items, transient 5xx, secondary rate limits) flow through. - /// - /// - /// Sub-reason classification reads the upstream GitHub error body, since GitHub does not - /// give different status codes for the four cases the user-facing report needs to - /// distinguish (issue #473's expected behavior): user-not-exist, scope-insufficient, - /// search rate-limited, query-invalid. The first two share a body - /// ("...cannot be searched either because the resources do not exist or you do not - /// have permission to view them..."), so we collapse them into one - /// scope_insufficient_or_user_not_found reason — they're both actionable in the - /// same way (re-authorize GitHub at NyxID with broader scope, then retry; if that still - /// fails, verify the username is reachable). Other 422 bodies fall through as - /// validation_failed. - /// - private static string? ClassifySearchProbeFailure( - string probe, - string githubPath, - string githubUsername, - string nyxProviderSlug) - { - if (string.IsNullOrWhiteSpace(probe)) - return null; - - try - { - using var doc = JsonDocument.Parse(probe); - var root = doc.RootElement; - // `envelopeMessage` is the proxy envelope's `message` field; named to avoid - // shadowing the anonymous-type `detail` property below (codex review PR #479). - if (!IsErrorEnvelope(root, out var status, out var envelopeMessage, out var body)) - return null; - - if (status != (int)HttpStatusCode.UnprocessableEntity) - return null; - - var reason = ClassifyGitHubSearch422Body(body); - return JsonSerializer.Serialize(new - { - error = "github_search_unauthorized", - detail = string.IsNullOrWhiteSpace(envelopeMessage) - ? $"GitHub {githubPath} returned 422 for github_username `{githubUsername}` with the new agent API key. The /rate_limit probe succeeded, so the api-key itself is valid; the failure is specific to GitHub's search API." - : envelopeMessage, - http_status = status, - github_path = githubPath, - github_username = githubUsername, - reason_code = reason, - proxy_body = string.IsNullOrWhiteSpace(body) ? null : body, - // Hint references the `github_username` field above instead of inlining it - // a second time; codex review PR #479 caught a stray `{username}` literal in - // an earlier draft. - hint = "GitHub returned 422 from /search/* with the bound username. /search/commits and /search/issues enforce stricter scope than /rate_limit (which succeeded), so a token that passes /rate_limit can still fail every search call. Most common causes: (a) the OAuth grant for GitHub at NyxID is missing the scope GitHub's search engine requires (need `public_repo` to search public commits/issues, `repo` for private) — re-authorize the GitHub provider at NyxID with appropriate scopes; (b) the bound github_username (see field above) does not exist, was renamed, or has been restricted — verify it resolves at https://github.com/. The agent will not produce a useful daily report until /search/* succeeds.", - nyx_provider_slug = nyxProviderSlug, - }); - } - catch (JsonException) - { - return null; - } - } - - /// - /// Preflights Twitter (X) proxy access using the newly created agent API key against - /// Twitter's /users/me — a cheap read-only endpoint that returns 401 when NyxID has - /// no OAuth grant for the user (or the grant was revoked) and 403 when the bound token - /// lacks tweet.write scope. Returns a structured error JSON suitable for returning - /// verbatim from the tool when access is denied; returns null on success or on - /// probe shapes we don't classify as "fundamentally broken" (rate limits, 5xx). - /// - /// - /// Mirrors (issue aevatarAI/aevatar#216 / #418). - /// Two error codes instead of one because 401 and 403 lead to different user actions: - /// 401 means "go connect Twitter at NyxID" (or re-authorize a revoked grant); 403 means - /// "the bound token is missing tweet.write — operator/seed bug, not user fixable". - /// The freshly minted api-key is best-effort revoked at the call site so retries don't - /// accumulate orphan proxy-scoped keys. - /// - private async Task PreflightTwitterProxyAsync( - NyxIdApiClient nyxClient, - string apiKey, - string nyxProviderSlug, - CancellationToken ct) - { - // Cheap read-only endpoint; succeeds with the default `users.read` scope, fails with - // 401 when no OAuth grant is bound to the user behind the api-key, and 403 when the - // bound token's scope set is too narrow. - // - // PR #461 review (commit d9f6df81 follow-up): probe the *configured* publish slug so - // a caller-overridden `publish_provider_slug` is the slug we actually validate. The - // earlier hardcoded `"api-twitter"` would silently green-light a custom slug at - // create-time only to surface a runtime 4xx on the first publish. - var probe = await nyxClient.ProxyRequestAsync( - apiKey, - nyxProviderSlug, - "/users/me", - "GET", - body: null, - extraHeaders: null, - ct); - - if (string.IsNullOrWhiteSpace(probe)) - return null; - - try - { - using var doc = JsonDocument.Parse(probe); - var root = doc.RootElement; - if (root.ValueKind != JsonValueKind.Object) - return null; - - if (!root.TryGetProperty("error", out var errorProp)) - return null; - if (errorProp.ValueKind != JsonValueKind.True && errorProp.ValueKind != JsonValueKind.String) - return null; - - var status = TryReadInt32Property(root, "status") - ?? TryReadInt32Property(root, "code") - ?? 0; - if (status != (int)HttpStatusCode.Unauthorized && status != (int)HttpStatusCode.Forbidden) - return null; - - var detail = root.TryGetProperty("message", out var msgProp) && msgProp.ValueKind == JsonValueKind.String - ? msgProp.GetString() - : null; - var body = root.TryGetProperty("body", out var bodyProp) && bodyProp.ValueKind == JsonValueKind.String - ? bodyProp.GetString() - : null; - - // 401 vs 403 distinction is the actionable difference for the user. NyxID seeds - // `tweet.write` into the default scope set (provider_service.rs:405-450), so the - // realistic 401 path is "user has not connected Twitter yet at NyxID" or "the - // user revoked the grant at x.com/settings". A 403 here would mean either the - // seed regressed (ops escalation) or x.com itself denied the request body — keep - // both paths separate so the hint copy steers the right person. - if (status == (int)HttpStatusCode.Unauthorized) - { - return JsonSerializer.Serialize(new - { - error = "twitter_oauth_required", - detail = string.IsNullOrWhiteSpace(detail) ? "Twitter proxy returned 401 for the new agent API key." : detail, - http_status = status, - proxy_body = string.IsNullOrWhiteSpace(body) ? null : body, - hint = "Twitter (X) returned 401 through the NyxID proxy. The user has not connected Twitter at NyxID, or the OAuth grant was revoked at x.com/settings/connected_apps. Re-authorize the Twitter provider at NyxID before retrying agent creation.", - nyx_provider_slug = nyxProviderSlug, - }); - } - - return JsonSerializer.Serialize(new - { - error = "twitter_proxy_access_denied", - detail = string.IsNullOrWhiteSpace(detail) ? "Twitter proxy returned 403 for the new agent API key." : detail, - http_status = status, - proxy_body = string.IsNullOrWhiteSpace(body) ? null : body, - hint = "Twitter (X) returned 403 through the NyxID proxy. Default provider scope includes `tweet.write`; a 403 here usually means the seeded provider scope was downgraded or the bound token was issued before the scope was widened. Re-authorize at NyxID; if it still fails, ask ops to verify the Twitter provider seed includes `tweet.write`.", - nyx_provider_slug = nyxProviderSlug, - }); - } - catch (JsonException) - { - return null; - } - } - - /// - /// Reads the standard NyxIdApiClient.SendAsync error envelope shape. Returns - /// true when the response is an error envelope (with error: true or - /// error: "...") and extracts status (or code), message, and - /// body for downstream classification. Used by both rate-limit and search probe - /// classifiers so they parse the envelope identically. - /// - private static bool IsErrorEnvelope( - JsonElement root, - out int status, - out string? detail, - out string? body) - { - status = 0; - detail = null; - body = null; - - if (root.ValueKind != JsonValueKind.Object) - return false; - - if (!root.TryGetProperty("error", out var errorProp)) - return false; - if (errorProp.ValueKind != JsonValueKind.True && errorProp.ValueKind != JsonValueKind.String) - return false; - - status = TryReadInt32Property(root, "status") - ?? TryReadInt32Property(root, "code") - ?? 0; - detail = root.TryGetProperty("message", out var msgProp) && msgProp.ValueKind == JsonValueKind.String - ? msgProp.GetString() - : null; - body = root.TryGetProperty("body", out var bodyProp) && bodyProp.ValueKind == JsonValueKind.String - ? bodyProp.GetString() - : null; - return true; - } - - /// - /// Best-effort sub-reason classification for a GitHub 422 search response body. Returns a - /// short stable code so callers / operators can distinguish actionable cases without - /// regex'ing the body themselves. The detection is conservative — when the body doesn't - /// match a known pattern we fall through to validation_failed rather than guessing. - /// - private static string ClassifyGitHubSearch422Body(string? body) - { - if (string.IsNullOrWhiteSpace(body)) - return "validation_failed"; - - // GitHub returns the same body for "user does not exist" and "scope insufficient": - // the search engine refuses to enumerate the user's items in either case. Operators - // distinguish them by checking https://github.com/{username} out of band. - if (body.Contains("cannot be searched", StringComparison.OrdinalIgnoreCase) || - body.Contains("do not have permission to view", StringComparison.OrdinalIgnoreCase)) - { - return "scope_insufficient_or_user_not_found"; - } - - return "validation_failed"; - } - - private static int? TryReadInt32Property(JsonElement element, string propertyName) - { - if (!element.TryGetProperty(propertyName, out var property) || - property.ValueKind != JsonValueKind.Number || - !property.TryGetInt32(out var value)) - { - return null; - } - return value; - } - - private static bool? TryReadBool(JsonElement element, string propertyName) - { - if (element.ValueKind != JsonValueKind.Object || - !element.TryGetProperty(propertyName, out var property)) - { - return null; - } - - return property.ValueKind switch - { - JsonValueKind.True => true, - JsonValueKind.False => false, - _ => null, - }; - } - - /// - /// Best-effort revoke of an API key minted earlier in the create flow. Used when GitHub - /// preflight fails so retries of /daily don't accumulate orphan proxy-scoped keys - /// in the user's NyxID account (codex review #418 r3141846175). Failures here are logged - /// at Warning but do NOT propagate — the structured create-time error is the user-facing - /// signal; an orphan key is an ops cleanup concern, not a hard failure. - /// - private async Task BestEffortRevokeApiKeyAsync( - NyxIdApiClient nyxClient, - string sessionToken, - string apiKeyId, - string reason, - CancellationToken ct) - { - if (string.IsNullOrWhiteSpace(apiKeyId)) - return; - - try - { - var response = await nyxClient.DeleteApiKeyAsync(sessionToken, apiKeyId, ct); - if (LarkProxyResponse.TryGetError(response, out _, out var detail)) - { - _logger?.LogWarning( - "Failed to revoke orphan agent API key {ApiKeyId} after {Reason}: {Detail}", - apiKeyId, - reason, - detail); - } - } - catch (Exception ex) - { - _logger?.LogWarning( - ex, - "Exception revoking orphan agent API key {ApiKeyId} after {Reason}", - apiKeyId, - reason); - } - } - - private LarkReceiveTargetWithFallback ResolveDeliveryTarget(string conversationId, string agentId) - { - var chatType = AgentToolRequestContext.TryGet(ChannelMetadataKeys.ChatType); - var senderId = AgentToolRequestContext.TryGet(ChannelMetadataKeys.SenderId); - var unionId = AgentToolRequestContext.TryGet(ChannelMetadataKeys.LarkUnionId); - var chatId = AgentToolRequestContext.TryGet(ChannelMetadataKeys.LarkChatId); - - var target = LarkConversationTargets.BuildFromInboundWithFallback( - chatType, - conversationId, - senderId, - unionId, - chatId); - - if (target.Primary.FellBackToPrefixInference) - { - _logger?.LogDebug( - "Agent builder fell back to legacy delivery target inference for {AgentId}: chatType={ChatType}, hasUnionId={HasUnionId}, hasLarkChatId={HasLarkChatId}, hasSenderId={HasSenderId}, resolvedReceiveIdType={ReceiveIdType}. Cross-app outbound (e.g. customer api-lark-bot) may surface Lark `99992361 open_id cross app` until the relay propagates union_id.", - agentId, - chatType ?? string.Empty, - !string.IsNullOrWhiteSpace(unionId), - !string.IsNullOrWhiteSpace(chatId), - !string.IsNullOrWhiteSpace(senderId), - target.Primary.ReceiveIdType); - } - - return target; - } - private sealed class BuilderArgs { private readonly Dictionary _properties; diff --git a/agents/Aevatar.GAgents.Authoring.Lark/FeishuCardHumanInteractionPort.cs b/agents/Aevatar.GAgents.Authoring.Lark/FeishuCardHumanInteractionPort.cs index 5163f345a..958d8d2a4 100644 --- a/agents/Aevatar.GAgents.Authoring.Lark/FeishuCardHumanInteractionPort.cs +++ b/agents/Aevatar.GAgents.Authoring.Lark/FeishuCardHumanInteractionPort.cs @@ -72,16 +72,6 @@ await SendTextMessageAsync( "Feishu approval resolution delivery failed", cancellationToken); - if (ShouldSendApprovedContent(target, resolution)) - { - await SendTextMessageAsync( - target, - resolution.ResolvedContent!, - "Feishu approved-content delivery returned empty response.", - "Feishu approved-content delivery failed", - cancellationToken); - } - _logger.LogInformation( "Delivered human approval resolution text: target={DeliveryTargetId}, run={RunId}, step={StepId}, approved={Approved}", deliveryTargetId, @@ -143,14 +133,6 @@ internal static string BuildApprovalResolutionText( if (!string.IsNullOrWhiteSpace(resolution.Feedback)) lines.Add($"Feedback: {resolution.Feedback}"); - if (!resolution.Approved && target is not null && - string.Equals(target.TemplateName, WorkflowAgentDefaults.TemplateName, StringComparison.OrdinalIgnoreCase)) - { - lines.Add(string.Empty); - lines.Add($"Run again: /run-agent {target.AgentId}"); - lines.Add("View agents: /agents"); - } - return string.Join('\n', lines); } @@ -281,13 +263,6 @@ private async Task ResolveTargetAsync( return target; } - private static bool ShouldSendApprovedContent( - UserAgentDeliveryTarget target, - HumanApprovalResolution resolution) => - resolution.Approved && - !string.IsNullOrWhiteSpace(resolution.ResolvedContent) && - string.Equals(target.TemplateName, WorkflowAgentDefaults.TemplateName, StringComparison.OrdinalIgnoreCase); - private async Task SendTextMessageAsync( UserAgentDeliveryTarget target, string text, @@ -436,8 +411,8 @@ private static string BuildLarkRejectionMessage(string failurePrefix, int? larkC // instead of the cryptic Lark `99992361 open_id cross app`. return $"{failurePrefix} (code={larkCode}): {detail}. " + - "This workflow agent was created before cross-app union_id ingress existed; " + - "delete and recreate it (`/agents` → Delete → `/social-media`) to pick up the cross-app safe target."; + "This agent was created before cross-app union_id ingress existed; " + + "delete it (`/agents` → Delete) and recreate it to pick up the cross-app safe target."; } if (larkCode == LarkBotErrorCodes.UserIdCrossTenant) @@ -449,10 +424,9 @@ private static string BuildLarkRejectionMessage(string failurePrefix, int? larkC return $"{failurePrefix} (code={larkCode}): {detail}. " + "The outbound Lark app is in a different tenant than the inbound app, so " + - "user-id translation is impossible. Delete and recreate the workflow agent " + - "(`/agents` → Delete → `/social-media`) so the new chat_id-preferred outbound " + - "path takes effect, or align the NyxID `s/api-lark-bot` proxy with the channel-bot " + - "that received the inbound event."; + "user-id translation is impossible. Delete the agent (`/agents` → Delete) and recreate " + + "it so the new chat_id-preferred outbound path takes effect, or align the NyxID " + + "`s/api-lark-bot` proxy with the channel-bot that received the inbound event."; } return larkCode is { } code diff --git a/agents/Aevatar.GAgents.Authoring.Lark/NyxRelayAgentBuilderFlow.cs b/agents/Aevatar.GAgents.Authoring.Lark/NyxRelayAgentBuilderFlow.cs index 4653a19b7..732972da3 100644 --- a/agents/Aevatar.GAgents.Authoring.Lark/NyxRelayAgentBuilderFlow.cs +++ b/agents/Aevatar.GAgents.Authoring.Lark/NyxRelayAgentBuilderFlow.cs @@ -1,4 +1,3 @@ -using System.Globalization; using System.Text; using System.Text.Json; using Aevatar.GAgents.Channel.Abstractions; @@ -11,17 +10,13 @@ namespace Aevatar.GAgents.Authoring.Lark; public static class NyxRelayAgentBuilderFlow { private const string PrivateChatType = "p2p"; - private const string DailyCommand = "/daily"; - private const string SocialMediaCommand = "/social-media"; - private const string SocialMediaAlias = "/create-social-media"; - private const string ListTemplatesCommand = "/templates"; private const string ListAgentsCommand = "/agents"; private const string AgentStatusCommand = "/agent-status"; private const string RunAgentCommand = "/run-agent"; private const string DisableAgentCommand = "/disable-agent"; private const string EnableAgentCommand = "/enable-agent"; private const string DeleteAgentCommand = "/delete-agent"; - private const string DefaultScheduleTime = "09:00"; + private const string DailySkillCommand = "/daily"; public static bool TryResolve( ChannelInboundEvent evt, @@ -43,6 +38,9 @@ public static bool TryResolve( return false; var command = tokens[0]; + if (IsOrnnSkillShortcut(command)) + return false; + if (!IsKnownCommand(command)) { decision = AgentBuilderFlowDecision.DirectReply(BuildUnknownCommandReply(command, slashCommandRegistry)); @@ -55,7 +53,7 @@ public static bool TryResolve( return true; } - return TryResolveKnownCommand(command, tokens, evt.ConversationId, out decision); + return TryResolveKnownCommand(command, tokens, out decision); } public static MessageContent FormatToolResult(AgentBuilderFlowDecision decision, string toolResultJson) @@ -67,9 +65,6 @@ public static MessageContent FormatToolResult(AgentBuilderFlowDecision decision, using var doc = JsonDocument.Parse(toolResultJson); return decision.ToolAction switch { - "create_daily_report" => FormatCreateDailyReportResult(doc.RootElement), - "create_social_media" => TextContent(FormatCreateSocialMediaResult(doc.RootElement)), - "list_templates" => TextContent(FormatListTemplatesResult(doc.RootElement)), "list_agents" => AgentBuilderCardContent.FormatListAgentsResult(doc.RootElement), "agent_status" => FormatAgentStatusCard(doc.RootElement), "run_agent" => TextContent(FormatRunAgentResult(doc.RootElement)), @@ -88,38 +83,26 @@ public static MessageContent FormatToolResult(AgentBuilderFlowDecision decision, private static MessageContent TextContent(string text) => AgentBuilderJson.TextContent(text); private static bool IsKnownCommand(string command) => - command is DailyCommand - or SocialMediaCommand or SocialMediaAlias - or ListTemplatesCommand - or ListAgentsCommand + command is ListAgentsCommand or AgentStatusCommand or RunAgentCommand or DisableAgentCommand or EnableAgentCommand or DeleteAgentCommand; + private static bool IsOrnnSkillShortcut(string command) => + string.Equals(command, DailySkillCommand, StringComparison.OrdinalIgnoreCase); + private static bool IsPrivateChat(string? chatType) => string.Equals(chatType, PrivateChatType, StringComparison.OrdinalIgnoreCase); private static bool TryResolveKnownCommand( string command, IReadOnlyList tokens, - string? conversationId, out AgentBuilderFlowDecision? decision) { switch (command) { - case DailyCommand: - return TryResolveDailyReport(tokens, conversationId, out decision); - - case SocialMediaCommand: - case SocialMediaAlias: - return TryResolveSocialMedia(tokens, conversationId, out decision); - - case ListTemplatesCommand: - decision = AgentBuilderFlowDecision.ToolCall("list_templates", """{"action":"list_templates"}"""); - return true; - case ListAgentsCommand: decision = AgentBuilderFlowDecision.ToolCall("list_agents", """{"action":"list_agents"}"""); return true; @@ -145,102 +128,6 @@ private static bool TryResolveKnownCommand( } } - private static bool TryResolveDailyReport( - IReadOnlyList tokens, - string? conversationId, - out AgentBuilderFlowDecision? decision) - { - decision = null; - var args = ChannelTextCommandParser.ParseNamedArguments(tokens); - var githubUsername = NormalizeOptional( - GetOptional(args, "github_username") ?? FirstPositionalArgument(tokens)); - - if (!TryResolveSchedule(args, out var scheduleCron, out var scheduleTimezone, out var error)) - { - decision = AgentBuilderFlowDecision.DirectReply(error! + "\n\n" + BuildDailyReportHelpText()); - return true; - } - - var repositories = GetOptional(args, "repositories"); - var runImmediately = ResolveRunImmediately(args); - // When the user typed a positional username we persist it as their default so the next /daily - // call auto-resolves via the saved preference fallback inside AgentBuilderTool. - var savePreference = githubUsername is not null; - decision = AgentBuilderFlowDecision.ToolCall( - "create_daily_report", - JsonSerializer.Serialize(new - { - action = "create_agent", - template = "daily_report", - github_username = githubUsername, - save_github_username_preference = savePreference, - repositories, - schedule_cron = scheduleCron, - schedule_timezone = scheduleTimezone, - run_immediately = runImmediately, - conversation_id = NormalizeOptional(conversationId), - })); - return true; - } - - private static bool TryResolveSocialMedia( - IReadOnlyList tokens, - string? conversationId, - out AgentBuilderFlowDecision? decision) - { - decision = null; - if (tokens.Count == 1) - { - decision = AgentBuilderFlowDecision.DirectReply(BuildSocialMediaHelpText()); - return true; - } - - var args = ChannelTextCommandParser.ParseNamedArguments(tokens); - var topic = GetOptional(args, "topic") ?? FirstPositionalArgument(tokens); - if (string.IsNullOrWhiteSpace(topic)) - { - decision = AgentBuilderFlowDecision.DirectReply( - "topic is required.\n\n" + BuildSocialMediaHelpText()); - return true; - } - - if (!TryResolveSchedule(args, out var scheduleCron, out var scheduleTimezone, out var error)) - { - decision = AgentBuilderFlowDecision.DirectReply(error! + "\n\n" + BuildSocialMediaHelpText()); - return true; - } - - decision = AgentBuilderFlowDecision.ToolCall( - "create_social_media", - JsonSerializer.Serialize(new - { - action = "create_agent", - template = "social_media", - topic, - audience = GetOptional(args, "audience"), - style = GetOptional(args, "style"), - schedule_cron = scheduleCron, - schedule_timezone = scheduleTimezone, - run_immediately = ResolveRunImmediately(args), - conversation_id = NormalizeOptional(conversationId), - })); - return true; - } - - private static string? FirstPositionalArgument(IReadOnlyList tokens) - { - for (var i = 1; i < tokens.Count; i++) - { - var token = tokens[i]; - if (string.IsNullOrWhiteSpace(token)) - continue; - if (token.IndexOf('=', StringComparison.Ordinal) >= 0) - continue; - return token.Trim(); - } - return null; - } - private static bool TryResolveSimpleAgentAction( IReadOnlyList tokens, string action, @@ -296,58 +183,12 @@ private static bool TryResolveDeleteAgent( return true; } - private static MessageContent FormatCreateDailyReportResult(JsonElement root) => - AgentBuilderCardContent.FormatDailyReportToolReply(root); - - private static string FormatCreateSocialMediaResult(JsonElement root) - { - if (TryReadError(root, out var error)) - return $"Create social media agent failed: {error}"; - - return BuildTextBlock( - "Social media agent registered.", - $"Agent ID: {ReadString(root, "agent_id") ?? "unknown-agent"}", - $"Workflow ID: {ReadString(root, "workflow_id") ?? "pending"}", - $"Next scheduled run: {ReadString(root, "next_scheduled_run") ?? "pending"}", - NormalizeOptional(ReadString(root, "note")), - "Approvals will arrive as interactive cards in this chat. Text commands such as /approve and /reject still work as fallback.", - "Next commands: /agents, /agent-status , /run-agent "); - } - - private static string FormatListTemplatesResult(JsonElement root) - { - if (TryReadError(root, out var error)) - return $"List templates failed: {error}"; - - if (!root.TryGetProperty("templates", out var templatesElement) || - templatesElement.ValueKind != JsonValueKind.Array || - templatesElement.GetArrayLength() == 0) - { - return "No templates available."; - } - - var lines = new List { "Available templates:" }; - foreach (var item in templatesElement.EnumerateArray()) - { - var name = ReadString(item, "name") ?? "unknown-template"; - var description = ReadString(item, "description") ?? "No description."; - lines.Add($"- {name}: {description}"); - } - - lines.Add(string.Empty); - lines.Add("Examples:"); - lines.Add(BuildDailyReportCommandExample()); - lines.Add(BuildSocialMediaCommandExample()); - return string.Join('\n', lines); - } - /// /// Renders /agent-status <agent_id> as an interactive card with action buttons /// (Run, Disable, Enable, Delete). Each button submits the corresponding /// agent_builder_action with the agent_id as an argument so /// can route the click to the existing tool action without - /// the user having to retype the id. Mirrors the card produced by the card-flow path so the - /// text-command and card-flow surfaces stay visually consistent. + /// the user having to retype the id. /// private static MessageContent FormatAgentStatusCard(JsonElement root) { @@ -386,10 +227,6 @@ private static MessageContent FormatAgentStatusCard(JsonElement root) Text = string.Join("\n", bodyLines), }); - // Lifecycle buttons mirror the legacy text "Next commands: ..." line. Disable and Enable - // are both shown so the user can flip status either direction without typing; the click - // handler enforces the invariants. Delete is marked danger so Lark renders it red and the - // user has a final visual confirm before submitting. var isRunning = string.Equals(status, SkillRunnerDefaults.StatusRunning, StringComparison.OrdinalIgnoreCase) || string.Equals(status, SkillRunnerDefaults.StatusError, StringComparison.OrdinalIgnoreCase); content.Actions.Add(BuildAgentScopedButton("Run Now", "run_agent", agentId, isPrimary: isRunning)); @@ -469,81 +306,12 @@ private static string FormatDeleteAgentResult(JsonElement root) "Run /agents to refresh the registry view."); } - private static bool TryResolveSchedule( - IReadOnlyDictionary args, - out string? scheduleCron, - out string scheduleTimezone, - out string? error) - { - scheduleCron = null; - error = null; - - scheduleTimezone = GetOptional(args, "schedule_timezone") ?? SkillRunnerDefaults.DefaultTimezone; - var rawCron = GetOptional(args, "schedule_cron"); - if (!string.IsNullOrWhiteSpace(rawCron)) - { - scheduleCron = rawCron; - return true; - } - - var rawTime = GetOptional(args, "schedule_time"); - var normalized = rawTime ?? DefaultScheduleTime; - if (!TimeOnly.TryParseExact( - normalized, - ["HH:mm", "H:mm"], - CultureInfo.InvariantCulture, - DateTimeStyles.None, - out var time)) - { - error = "schedule_time must use HH:mm, for example 09:00."; - return false; - } - - scheduleCron = $"{time.Minute} {time.Hour} * * *"; - return true; - } - - private static bool ResolveRunImmediately(IReadOnlyDictionary args) - { - var raw = GetOptional(args, "run_immediately"); - return !bool.TryParse(raw, out var parsed) || parsed; - } - - private static string? GetOptional(IReadOnlyDictionary args, string key) - { - if (!args.TryGetValue(key, out var raw)) - return null; - - return NormalizeOptional(raw); - } - private static bool TryReadError(JsonElement root, out string error) => AgentBuilderJson.TryReadError(root, out error); private static string? ReadString(JsonElement element, string propertyName) => AgentBuilderJson.TryReadString(element, propertyName); - private static string BuildDailyReportHelpText() => - BuildTextBlock( - "Daily report agent command", - "GitHub username can be passed explicitly, or omitted to reuse a saved preference when available.", - "Schedule defaults to 09:00 if schedule_time and schedule_cron are both omitted.", - $"Example: {BuildDailyReportCommandExample()}", - "Optional: github_username (otherwise uses your saved preference or connected GitHub login), repositories=owner/repo,owner/repo schedule_timezone=Asia/Singapore run_immediately=false"); - - private static string BuildSocialMediaHelpText() => - BuildTextBlock( - "Social media agent command", - "Required: topic plus either schedule_time or schedule_cron.", - $"Example: {BuildSocialMediaCommandExample()}", - "Optional: audience=\"Developers\" style=\"Confident and concise\" schedule_timezone=Asia/Singapore run_immediately=false"); - - private static string BuildDailyReportCommandExample() => - "/daily [github_username] schedule_time=09:00 repositories=owner/repo"; - - private static string BuildSocialMediaCommandExample() => - "/social-media topic=\"Launch update\" schedule_time=10:30 audience=\"Developers\" style=\"Confident and concise\""; - private static string BuildUnknownCommandReply( string command, ChannelSlashCommandRegistry? slashCommandRegistry) => @@ -552,9 +320,6 @@ private static string BuildUnknownCommandReply( { $"Unknown command: {command}", "Supported commands:", - BuildDailyReportCommandExample(), - BuildSocialMediaCommandExample(), - "/templates", "/agents", "/agent-status ", "/run-agent ", diff --git a/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingNotFoundException.cs b/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingNotFoundException.cs index 66b63dba9..31e0f05d1 100644 --- a/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingNotFoundException.cs +++ b/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingNotFoundException.cs @@ -12,9 +12,9 @@ namespace Aevatar.GAgents.Channel.Identity.Abstractions; /// /// Caller behaviour: /// -/// Outbound / turn path: prompt the sender to run /init. -/// Do NOT fall back to bot-owner credentials or any cached token -/// (ADR-0018 §Implementation Notes #4). +/// Binding-required commands: prompt the sender to run /init. +/// Normal LLM turns: treat the sender config as unavailable and fall +/// back to the bot owner's LLM credentials. /// /// public sealed class BindingNotFoundException : Exception diff --git a/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingRevokedException.cs b/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingRevokedException.cs index b5c7b4fc8..a0738c8f2 100644 --- a/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingRevokedException.cs +++ b/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingRevokedException.cs @@ -5,8 +5,8 @@ namespace Aevatar.GAgents.Channel.Identity.Abstractions; /// /// Thrown by when /// NyxID reports the binding as revoked (HTTP 400 invalid_grant). -/// Callers MUST event-source revoke the local binding actor and prompt the -/// sender to run /init again. See ADR-0018 Decision §invalid_grant. +/// Binding-required callers should prompt the sender to run /init +/// again; normal LLM turns may fall back to the bot owner's LLM credentials. /// public sealed class BindingRevokedException : Exception { diff --git a/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingScopeMismatchException.cs b/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingScopeMismatchException.cs index db2e99f12..a0416f16d 100644 --- a/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingScopeMismatchException.cs +++ b/agents/Aevatar.GAgents.Channel.Identity.Abstractions/BindingScopeMismatchException.cs @@ -5,8 +5,9 @@ namespace Aevatar.GAgents.Channel.Identity.Abstractions; /// /// Thrown by when /// NyxID reports that the existing binding cannot mint the requested scope -/// (HTTP 400 invalid_scope). The user must re-run /init so the -/// binding is recreated against the current OAuth client scopes. +/// (HTTP 400 invalid_scope). Binding-required callers should ask the +/// user to re-run /init; normal LLM turns may fall back to the bot +/// owner's LLM credentials. /// public sealed class BindingScopeMismatchException : Exception { diff --git a/agents/Aevatar.GAgents.Channel.Identity.Abstractions/IExternalIdentityBindingQueryPort.cs b/agents/Aevatar.GAgents.Channel.Identity.Abstractions/IExternalIdentityBindingQueryPort.cs index d1ec57e84..61f311202 100644 --- a/agents/Aevatar.GAgents.Channel.Identity.Abstractions/IExternalIdentityBindingQueryPort.cs +++ b/agents/Aevatar.GAgents.Channel.Identity.Abstractions/IExternalIdentityBindingQueryPort.cs @@ -13,8 +13,9 @@ public interface IExternalIdentityBindingQueryPort /// /// Returns the active for the given external subject, /// or null when no active binding is materialized in the readmodel. - /// A miss MUST drive the caller to prompt the sender to /init; - /// callers MUST NOT fall back to bot-owner credentials or any cached token. + /// A miss means the sender has no usable per-user NyxID context. Callers + /// that require per-user state may prompt /init; normal LLM turns + /// may continue with bot-owner fallback credentials. /// Task ResolveAsync( ExternalSubjectRef externalSubject, diff --git a/agents/Aevatar.GAgents.Channel.Identity.Abstractions/INyxIdCapabilityBroker.cs b/agents/Aevatar.GAgents.Channel.Identity.Abstractions/INyxIdCapabilityBroker.cs index 92746a029..e7b16063f 100644 --- a/agents/Aevatar.GAgents.Channel.Identity.Abstractions/INyxIdCapabilityBroker.cs +++ b/agents/Aevatar.GAgents.Channel.Identity.Abstractions/INyxIdCapabilityBroker.cs @@ -40,9 +40,9 @@ Task RevokeBindingAsync( /// when NyxID reports /// invalid_grant on a previously-bound subject; throws /// when NyxID reports - /// invalid_scope for an existing binding. Callers MUST event-source - /// revoke the local binding actor on invalid_grant and prompt the sender - /// to re-run /init for both user-remediable cases. + /// invalid_scope for an existing binding. Binding-required callers + /// can prompt the sender to re-run /init; normal LLM turns can + /// continue with bot-owner fallback credentials. /// /// /// No active binding exists for the subject (never bound, or readmodel diff --git a/agents/Aevatar.GAgents.Channel.Identity/DependencyInjection/IdentityServiceCollectionExtensions.cs b/agents/Aevatar.GAgents.Channel.Identity/DependencyInjection/IdentityServiceCollectionExtensions.cs index a6892bcb1..bbab5d536 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/DependencyInjection/IdentityServiceCollectionExtensions.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/DependencyInjection/IdentityServiceCollectionExtensions.cs @@ -84,6 +84,8 @@ public static IServiceCollection AddChannelIdentity( // inbound message's gate keeps re-sending the binding card. See // issue #549 follow-up observed 2026-05-01. services.TryAddSingleton(); + services.TryAddSingleton( + sp => sp.GetRequiredService()); // ─── Cluster-singleton OAuth client projection ─── services.AddProjectionMaterializationRuntimeCore< @@ -112,6 +114,19 @@ public static IServiceCollection AddChannelIdentity( // (production regression observed 2026-04-30 in aismart-app-mainnet). services.TryAddSingleton(); + // Endpoint filter for the operator /rebuild path — rejects unauthenticated + // callers before model binding/DI resolution kicks in. + services.TryAddTransient(); + services.TryAddSingleton(); + + // ─── Operator admin surface (rebuild endpoint, issue #549) ─── + // Bound from configuration when present; absence keeps the rebuild + // endpoint fail-secure (503 with "rebuild not configured"). Production + // sets the token via env var ChannelIdentity__Admin__RebuildToken. + var adminOptions = services.AddOptions(); + if (configuration is not null) + adminOptions.Bind(configuration.GetSection(AevatarOAuthAdminOptions.SectionName)); + // ─── Broker (self-bootstrapping, no appsettings dependency) ─── // Register broker as a *singleton* and inject IHttpClientFactory so // each call resolves a fresh HttpClient backed by the factory's diff --git a/agents/Aevatar.GAgents.Channel.Identity/Endpoints/IdentityOAuthEndpoints.cs b/agents/Aevatar.GAgents.Channel.Identity/Endpoints/IdentityOAuthEndpoints.cs index 4900c6392..5c8198a59 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/Endpoints/IdentityOAuthEndpoints.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/Endpoints/IdentityOAuthEndpoints.cs @@ -1,3 +1,5 @@ +using System.Security.Cryptography; +using System.Text; using Aevatar.Foundation.Abstractions; using Aevatar.Foundation.Core; using Aevatar.GAgents.Channel.Abstractions; @@ -8,7 +10,9 @@ using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Routing; +using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; namespace Aevatar.GAgents.Channel.Identity.Endpoints; @@ -19,7 +23,46 @@ namespace Aevatar.GAgents.Channel.Identity.Endpoints; public static class IdentityOAuthEndpoints { private static readonly TimeSpan ProjectionWaitTimeout = TimeSpan.FromSeconds(3); + // 15s leaves comfortable margin under typical reverse-proxy idle-timeout + // budgets (Cloudflare 100s, AWS ALB 60s default, stricter corporate + // proxies 30s) so the operator does not hit a 504 race on the happy path + // even when the readmodel takes a few seconds to materialize. Callers + // that hit the timeout still get a 202 with a poll URL — see issue #549 + // PR #570 review (mimo-v2.5-pro / glm-5.1). + private static readonly TimeSpan RebuildObservationTimeout = TimeSpan.FromSeconds(15); + private static readonly TimeSpan RebuildObservationPollDelay = TimeSpan.FromMilliseconds(250); private const int MaxWebhookBodyBytes = 64 * 1024; + private const string OAuthCallbackPublisherActorId = "channel-identity.oauth-callback"; + private const string OAuthRebuildPublisherActorId = "channel-identity.oauth-rebuild"; + private const string BrokerRevocationPublisherActorId = "channel-identity.broker-revocation"; + + /// + /// Same-host admission gate for the break-glass OAuth client rebuild endpoint. + /// The actor is still the authoritative serializer; this gate prevents two + /// operator HTTP calls on one host from dispatching competing rebuild commands + /// and then racing each other through the readmodel observation loop. + /// + public sealed class AevatarOAuthClientRebuildCoordinator + { + private readonly SemaphoreSlim _gate = new(1, 1); + + public async ValueTask TryEnterAsync(CancellationToken ct) + { + if (!await _gate.WaitAsync(millisecondsTimeout: 0, ct).ConfigureAwait(false)) + return null; + + return new Lease(_gate); + } + + private sealed class Lease(SemaphoreSlim gate) : IAsyncDisposable + { + public ValueTask DisposeAsync() + { + gate.Release(); + return ValueTask.CompletedTask; + } + } + } public static IEndpointRouteBuilder MapIdentityOAuthEndpoints(this IEndpointRouteBuilder app) { @@ -34,6 +77,18 @@ public static IEndpointRouteBuilder MapIdentityOAuthEndpoints(this IEndpointRout app.MapGet("/api/oauth/aevatar-client/status", HandleAevatarOAuthClientStatusAsync) .WithTags("ChannelIdentity") .AllowAnonymous(); + // Operator-only: rebuild the cluster-singleton OAuth client snapshot + // to point at an admin-supplied client_id (issue #549 production + // unblock). Auth is by static admin token header — see + // AevatarOAuthAdminOptions. AllowAnonymous because the auth check is + // done inline; no ASP.NET auth handler is wired for this module. The + // RebuildAuthEndpointFilter rejects unauthenticated callers BEFORE + // model binding / DI resolution so a flooded admin-token-less request + // does not run through deserialization and DI on every call. + app.MapPost("/api/oauth/aevatar-client/rebuild", HandleAevatarOAuthClientRebuildAsync) + .WithTags("ChannelIdentity") + .AddEndpointFilter() + .AllowAnonymous(); return app; } @@ -44,11 +99,13 @@ internal static async Task HandleNyxIdOAuthCallbackAsync( [FromQuery] string? code, [FromQuery] string? state, [FromQuery] string? error, + [FromQuery] string? format, [FromServices] INyxIdBrokerCallbackClient brokerCallback, [FromServices] IExternalIdentityBindingQueryPort queryPort, [FromServices] IActorRuntime actorRuntime, + [FromServices] IActorDispatchPort actorDispatchPort, [FromServices] IProjectionReadinessPort projectionReadiness, - [FromServices] ExternalIdentityBindingProjectionPort bindingProjectionPort, + [FromServices] IExternalIdentityBindingProjectionPort bindingProjectionPort, [FromServices] ILoggerFactory loggerFactory, CancellationToken ct) { @@ -159,7 +216,7 @@ internal static async Task HandleNyxIdOAuthCallbackAsync( // orphan. Best-effort revoke at NyxID before responding so the // orphan does not accumulate at NyxID with no local reference. await TryRevokeOrphanBindingAsync(brokerCallback, exchange.BindingId, logger, ct).ConfigureAwait(false); - return Results.Ok(new { status = "already_bound", detail = "已绑定 NyxID 账号,可以回到 Lark 继续对话" }); + return RenderBoundSuccess(displayName: null, alreadyBound: true, format: format); } var actor = await TryActivateActorAsync(actorRuntime, actorId, logger, ct).ConfigureAwait(false); @@ -187,12 +244,9 @@ internal static async Task HandleNyxIdOAuthCallbackAsync( ExternalSubject = subject.Clone(), BindingId = exchange.BindingId, }), - Route = new EnvelopeRoute - { - Direct = new DirectRoute { TargetActorId = actorId }, - }, + Route = EnvelopeRouteSemantics.CreateDirect(OAuthCallbackPublisherActorId, actorId), }; - await actor.HandleEventAsync(commitEnvelope, ct).ConfigureAwait(false); + await actorDispatchPort.DispatchAsync(actor.Id, commitEnvelope, ct).ConfigureAwait(false); // Observe broker capability on the cluster client (idempotent) — first // successful binding_id is proof that NyxID admin enabled the flag. @@ -201,15 +255,14 @@ internal static async Task HandleNyxIdOAuthCallbackAsync( var clientActor = await actorRuntime .CreateAsync(AevatarOAuthClientGAgent.WellKnownId, ct) .ConfigureAwait(false); - await clientActor.HandleEventAsync(new EventEnvelope + await actorDispatchPort.DispatchAsync(clientActor.Id, new EventEnvelope { Id = Guid.NewGuid().ToString("N"), Timestamp = Timestamp.FromDateTimeOffset(DateTimeOffset.UtcNow), Payload = Any.Pack(new ObserveBrokerCapabilityCommand()), - Route = new EnvelopeRoute - { - Direct = new DirectRoute { TargetActorId = AevatarOAuthClientGAgent.WellKnownId }, - }, + Route = EnvelopeRouteSemantics.CreateDirect( + OAuthCallbackPublisherActorId, + AevatarOAuthClientGAgent.WellKnownId), }, ct).ConfigureAwait(false); } catch (Exception ex) @@ -252,7 +305,7 @@ await projectionReadiness resolvedAfterTimeout.Value, exchange.BindingId); await TryRevokeOrphanBindingAsync(brokerCallback, exchange.BindingId, logger, ct).ConfigureAwait(false); - return Results.Ok(new { status = "already_bound", detail = "已绑定 NyxID 账号,可以回到 Lark 继续对话" }); + return RenderBoundSuccess(displayName: null, alreadyBound: true, format: format); } logger.LogWarning( @@ -271,13 +324,7 @@ await projectionReadiness "Bound external identity {Platform}:{Tenant}:{User} -> binding_id={BindingId}", subject.Platform, subject.Tenant, subject.ExternalUserId, exchange.BindingId); - return Results.Ok(new - { - status = "bound", - detail = displayName is null - ? "已绑定 NyxID 账号,可以回到 Lark 继续对话" - : $"已绑定 NyxID 账号({displayName}),可以回到 Lark 继续对话", - }); + return RenderBoundSuccess(displayName, alreadyBound: false, format: format); } // ─── Status endpoint ─── @@ -328,12 +375,337 @@ internal static async Task HandleAevatarOAuthClientStatusAsync( } } + // ─── Operator rebuild ─── + + /// + /// Body for POST /api/oauth/aevatar-client/rebuild. The operator + /// supplies a fresh client_id (typically created via NyxID admin + /// after a wedge — see issue #549) and the actor pins its snapshot to + /// it. redirect_uri and oauth_scope are NOT operator- + /// supplied fields: the endpoint always uses + /// and + /// respectively, + /// otherwise the next bootstrap pass would observe drift and re-DCR + /// away the freshly-pinned client (PR #570 review consensus on the + /// drift bug + URL-validation surface). + /// + public sealed record RebuildAevatarOAuthClientRequest( + string? client_id, + long? client_id_issued_at_unix); + + internal static Task HandleAevatarOAuthClientRebuildAsync( + HttpContext http, + [FromBody] RebuildAevatarOAuthClientRequest? body, + [FromServices] IOptionsMonitor adminOptions, + [FromServices] IAevatarOAuthClientProvider provider, + [FromServices] AevatarOAuthClientProjectionPort projectionPort, + [FromServices] IActorRuntime actorRuntime, + [FromServices] IActorDispatchPort actorDispatchPort, + [FromServices] AevatarOAuthClientRebuildCoordinator rebuildCoordinator, + [FromServices] ILoggerFactory loggerFactory, + CancellationToken ct) => + HandleAevatarOAuthClientRebuildCoreAsync( + http, + body, + adminOptions, + provider, + projectionPort, + actorRuntime, + actorDispatchPort, + rebuildCoordinator, + loggerFactory, + observationTimeout: RebuildObservationTimeout, + observationPollDelay: RebuildObservationPollDelay, + ct); + + /// + /// Implementation seam exposed for tests so the readmodel-propagation + /// timeout can be tightened without waiting the full operator-grade + /// 30-second budget on every assertion. Production routes call the + /// thin overload above with the canonical defaults. + /// + internal static async Task HandleAevatarOAuthClientRebuildCoreAsync( + HttpContext http, + RebuildAevatarOAuthClientRequest? body, + IOptionsMonitor adminOptions, + IAevatarOAuthClientProvider provider, + AevatarOAuthClientProjectionPort projectionPort, + IActorRuntime actorRuntime, + IActorDispatchPort actorDispatchPort, + AevatarOAuthClientRebuildCoordinator? rebuildCoordinator, + ILoggerFactory loggerFactory, + TimeSpan observationTimeout, + TimeSpan observationPollDelay, + CancellationToken ct) + { + var logger = loggerFactory.CreateLogger("Aevatar.Channel.Identity.OAuthRebuild"); + + var configuredToken = adminOptions.CurrentValue.RebuildToken; + if (string.IsNullOrEmpty(configuredToken)) + { + logger.LogWarning( + "Rebuild endpoint invoked but ChannelIdentity:Admin:RebuildToken is unset; refusing fail-secure."); + return Results.Json(new + { + error = "rebuild_not_configured", + detail = "ChannelIdentity:Admin:RebuildToken is unset. Configure it (env var ChannelIdentity__Admin__RebuildToken) and redeploy before retrying.", + }, statusCode: StatusCodes.Status503ServiceUnavailable); + } + + if (!http.Request.Headers.TryGetValue(AevatarOAuthAdminOptions.RebuildTokenHeader, out var presented) + || !ConstantTimeEquals(configuredToken, presented.ToString())) + { + logger.LogWarning( + "Rebuild endpoint rejected: missing or invalid {Header}.", + AevatarOAuthAdminOptions.RebuildTokenHeader); + return Results.Unauthorized(); + } + + if (body is null || string.IsNullOrWhiteSpace(body.client_id)) + { + return Results.BadRequest(new + { + error = "client_id_required", + detail = "Body must include client_id (the NyxID-issued OAuth client_id this cluster should pin to).", + }); + } + + var authority = NyxIdAuthorityResolver.Resolve(logger); + var redirectUri = NyxIdRedirectUriResolver.Resolve(logger); + var oauthScope = AevatarOAuthClientScopes.AuthorizationScope; + + // Validate Unix-seconds before dispatching: AevatarOAuthClient + // ProjectionProvider later calls DateTimeOffset.FromUnixTimeSeconds + // on the persisted value, which throws ArgumentOutOfRangeException + // for values like long.MaxValue. Surface the bad input as a 400 + // here instead of letting the read path crash on the next status + // poll (codex P1 on PR #570). + long issuedAtUnix; + if (body.client_id_issued_at_unix is { } supplied) + { + try + { + _ = DateTimeOffset.FromUnixTimeSeconds(supplied); + } + catch (ArgumentOutOfRangeException) + { + return Results.BadRequest(new + { + error = "client_id_issued_at_unix_invalid", + detail = "client_id_issued_at_unix must be a Unix-seconds value within DateTimeOffset range.", + }); + } + issuedAtUnix = supplied; + } + else + { + issuedAtUnix = DateTimeOffset.UtcNow.ToUnixTimeSeconds(); + } + + await using var rebuildLease = rebuildCoordinator is null + ? null + : await rebuildCoordinator.TryEnterAsync(ct).ConfigureAwait(false); + if (rebuildCoordinator is not null && rebuildLease is null) + { + return Results.Json(new + { + error = "rebuild_in_progress", + detail = "Another OAuth client rebuild request is already dispatching or waiting for readmodel observation. Retry after it completes.", + }, statusCode: StatusCodes.Status409Conflict); + } + + // Activate the projection scope first so the projector subscribes to + // the actor's committed events before we dispatch the provision + // command — same pattern as AevatarOAuthClientBootstrapService. + // Without this the readmodel never updates and the wait loop below + // times out even though the actor committed correctly. + await projectionPort + .EnsureProjectionForActorAsync(AevatarOAuthClientGAgent.WellKnownId, ct) + .ConfigureAwait(false); + + // Dispatch through IActorDispatchPort to match /unbind and the rest of the + // codebase. CLAUDE.md "Runtime 与 Dispatch 分责" forbids inline + // actor.HandleEventAsync from app/host code — that bypasses the inbox + // serialization guarantees and any middleware/logging the dispatch port + // owns. The rebuild path deliberately skips DCR mediation (operator + // already holds the client_id), so we publish the provision command + // directly to the cluster-singleton actor and let the inbox process it. + var provisionEnvelope = new EventEnvelope + { + Id = Guid.NewGuid().ToString("N"), + Timestamp = Timestamp.FromDateTimeOffset(DateTimeOffset.UtcNow), + Payload = Any.Pack(new ProvisionAevatarOAuthClientCommand + { + ClientId = body.client_id!.Trim(), + ClientIdIssuedAtUnix = issuedAtUnix, + NyxidAuthority = authority, + OauthScope = oauthScope, + RedirectUri = redirectUri, + }), + Route = EnvelopeRouteSemantics.CreateDirect( + OAuthRebuildPublisherActorId, + AevatarOAuthClientGAgent.WellKnownId), + }; + try + { + await actorDispatchPort + .DispatchAsync(AevatarOAuthClientGAgent.WellKnownId, provisionEnvelope, ct) + .ConfigureAwait(false); + } + catch (Exception ex) + { + logger.LogError(ex, "Rebuild endpoint failed to dispatch ProvisionAevatarOAuthClientCommand."); + return Results.Json(new + { + error = "actor_dispatch_failed", + detail = "Failed to dispatch the provision command to the OAuth client actor. Check silo logs.", + }, statusCode: StatusCodes.Status503ServiceUnavailable); + } + + logger.LogWarning( + "Operator rebuild dispatched for AevatarOAuthClientGAgent: client_id={ClientId}, authority={Authority}, redirect_uri={RedirectUri}.", + body.client_id, + authority, + redirectUri); + + var observed = await WaitForRebuildObservedAsync( + provider, + expectedClientId: body.client_id!.Trim(), + expectedAuthority: authority, + expectedRedirectUri: redirectUri, + expectedOauthScope: oauthScope, + timeout: observationTimeout, + pollDelay: observationPollDelay, + ct) + .ConfigureAwait(false); + if (observed is null) + { + return Results.Json(new + { + status = "rebuild_pending_propagation", + detail = $"Provision command dispatched but readmodel has not yet caught up within {observationTimeout.TotalSeconds:n0}s. Re-poll /api/oauth/aevatar-client/status; it will reflect the new client_id once the projection materializes.", + }, statusCode: StatusCodes.Status202Accepted); + } + + return Results.Ok(new + { + status = "rebuilt", + client_id = observed.ClientId, + client_id_issued_at = observed.ClientIdIssuedAt, + nyxid_authority = observed.NyxIdAuthority, + redirect_uri_registered = observed.RedirectUri, + oauth_scope_registered = observed.OauthScope, + broker_capability_observed = observed.BrokerCapabilityObserved, + detail = "OAuth client rebuilt. New /init flows will use the supplied client_id; the previous client_id is now an orphan at NyxID — delete it via NyxID admin to keep the registration list clean.", + }); + } + + private static async Task WaitForRebuildObservedAsync( + IAevatarOAuthClientProvider provider, + string expectedClientId, + string expectedAuthority, + string expectedRedirectUri, + string expectedOauthScope, + TimeSpan timeout, + TimeSpan pollDelay, + CancellationToken ct) + { + var deadline = DateTimeOffset.UtcNow.Add(timeout); + while (DateTimeOffset.UtcNow < deadline) + { + ct.ThrowIfCancellationRequested(); + + try + { + var snapshot = await provider.GetAsync(ct).ConfigureAwait(false); + if (string.Equals(snapshot.ClientId, expectedClientId, StringComparison.Ordinal) + && string.Equals(snapshot.NyxIdAuthority, expectedAuthority, StringComparison.Ordinal) + && string.Equals(snapshot.RedirectUri, expectedRedirectUri, StringComparison.Ordinal) + && string.Equals(snapshot.OauthScope, expectedOauthScope, StringComparison.Ordinal)) + { + return snapshot; + } + } + catch (AevatarOAuthClientNotProvisionedException) + { + // Projection has not yet materialized the very first state + // root for this actor — possible on a brand-new cluster + // where rebuild is the first provisioning event. + } + + await Task.Delay(pollDelay, ct).ConfigureAwait(false); + pollDelay = TimeSpan.FromMilliseconds(Math.Min(pollDelay.TotalMilliseconds * 2, 1000)); + } + return null; + } + + /// + /// Length-tolerant constant-time string compare. FixedTimeEquals + /// itself returns false on length mismatch in O(1), which leaks the + /// configured token's length to a timing observer — for an admin + /// break-glass surface keyed on a high-entropy token this residual leak + /// is acceptable (the attacker still has to brute-force the content). + /// The earlier shape returned early on right is null; the call + /// site short-circuits via TryGetValue so right is never null in + /// practice, but we still treat null as empty to keep the helper's + /// signature constant-time-uniform (PR #570 review, 4-model consensus). + /// + /// + /// SCOPE: this helper is intentionally private static and tied to + /// the rebuild admin-token check. It is NOT for general callers — if a new + /// caller needs constant-time string compare for a lower-entropy secret, + /// the length leak above becomes material; do not promote this to + /// internal/public without first replacing it with a length-padding scheme. + /// + private static bool ConstantTimeEquals(string left, string? right) + { + var leftBytes = Encoding.UTF8.GetBytes(left); + var rightBytes = Encoding.UTF8.GetBytes(right ?? string.Empty); + return CryptographicOperations.FixedTimeEquals(leftBytes, rightBytes); + } + + /// + /// Endpoint filter that performs the rebuild admin-token check before model binding + /// and per-request DI activation kick in. Without this filter the handler method + /// still rejects unauthenticated callers (it re-runs the same check inline), but + /// every unauthenticated POST would needlessly deserialize the body and resolve + /// IActorRuntime / IActorDispatchPort etc. — a small but real DoS amplifier on a + /// /rebuild that is supposed to be operator-only break-glass. + /// + internal sealed class RebuildAuthEndpointFilter : IEndpointFilter + { + public async ValueTask InvokeAsync(EndpointFilterInvocationContext context, EndpointFilterDelegate next) + { + var http = context.HttpContext; + var adminOptions = http.RequestServices + .GetRequiredService>() + .CurrentValue; + var configuredToken = adminOptions.RebuildToken; + if (string.IsNullOrEmpty(configuredToken)) + { + // Fall through to the handler so it can return the standard + // "rebuild_not_configured" 503; we don't want this filter to short-circuit + // and bypass that explicit operator-facing error. + return await next(context).ConfigureAwait(false); + } + + if (!http.Request.Headers.TryGetValue(AevatarOAuthAdminOptions.RebuildTokenHeader, out var presented) + || !ConstantTimeEquals(configuredToken, presented.ToString())) + { + return Results.Unauthorized(); + } + + return await next(context).ConfigureAwait(false); + } + } + // ─── Broker revocation webhook ─── internal static async Task HandleBrokerRevocationWebhookAsync( HttpContext http, [FromServices] BrokerRevocationWebhookValidator webhookValidator, [FromServices] IActorRuntime actorRuntime, + [FromServices] IActorDispatchPort actorDispatchPort, [FromServices] ILoggerFactory loggerFactory, CancellationToken ct) { @@ -386,12 +758,9 @@ internal static async Task HandleBrokerRevocationWebhookAsync( ? "nyxid_cae_revocation" : notification.Reason, }), - Route = new EnvelopeRoute - { - Direct = new DirectRoute { TargetActorId = actorId }, - }, + Route = EnvelopeRouteSemantics.CreateDirect(BrokerRevocationPublisherActorId, actorId), }; - await actor.HandleEventAsync(revokeEnvelope, ct).ConfigureAwait(false); + await actorDispatchPort.DispatchAsync(actor.Id, revokeEnvelope, ct).ConfigureAwait(false); } catch (Exception ex) { @@ -486,4 +855,85 @@ private static byte[] Base64UrlDecode(string value) } return Convert.FromBase64String(padded); } + + /// + /// Render the user-facing success page returned in the OAuth-callback + /// response. Issue #513 phase 1 asked for a "callback success → please pick + /// a model" prompt. The full version is a card update pushed back into + /// Lark, which requires capturing the /init card's adapter-owned message + /// id and passing it through the OAuth state token — substantial new + /// design surface left as a follow-up. This page is the browser-side + /// substitute the user sees immediately after the OAuth redirect, and it + /// names the next-step commands (/model, /whoami) explicitly + /// so the user is not left guessing what to type back in Lark. + /// + /// + /// Display name comes from the id_token "name" / sub claim; HTML-encoded + /// before interpolation so a malicious id_token cannot inject markup. + /// Other error paths in the callback intentionally keep returning JSON for + /// ops/programmatic consumers. + /// + internal static IResult RenderBoundSuccessHtml(string? displayName, bool alreadyBound) => + RenderBoundSuccess(displayName, alreadyBound, format: null); + + /// + /// Render the post-binding success response. Default is the HTML browser page that + /// users land on after clicking the OAuth approve button. Programmatic consumers + /// (CLI, SDK, integration tests) opt into a JSON envelope by passing + /// ?format=json on the callback URL — the same shape the endpoint returned + /// before the HTML render landed (PR #570 review #24). + /// + internal static IResult RenderBoundSuccess(string? displayName, bool alreadyBound, string? format) + { + if (string.Equals(format, "json", StringComparison.OrdinalIgnoreCase)) + { + return Results.Json(new + { + status = "bound", + already_bound = alreadyBound, + display_name = string.IsNullOrWhiteSpace(displayName) ? null : displayName, + }); + } + + return RenderBoundSuccessHtmlInternal(displayName, alreadyBound); + } + + internal static IResult RenderBoundSuccessHtmlInternal(string? displayName, bool alreadyBound) + { + var badge = alreadyBound ? "已绑定" : "绑定成功"; + var heading = alreadyBound ? "NyxID 账号已绑定" : "已绑定 NyxID 账号"; + var displayLine = string.IsNullOrWhiteSpace(displayName) + ? string.Empty + : $"

账号:{System.Net.WebUtility.HtmlEncode(displayName)}

"; + var body = alreadyBound + ? "

当前账号已经完成绑定,无需重复操作。可以关闭此页,回到 Lark 继续对话。

" + : "

可以关闭此页,回到 Lark 继续对话。

"; + + var html = $@" + + + + +NyxID 绑定 — {badge} + + + +{badge} +

{heading}

+{displayLine} +{body} +
+下一步
+回到 Lark 后,发送 /model 选择想用的模型,或 /whoami 查看当前绑定状态。 +
+ +"; + return Results.Content(html, "text/html; charset=utf-8"); + } } diff --git a/agents/Aevatar.GAgents.Channel.Identity/ExternalIdentityBindingGAgent.cs b/agents/Aevatar.GAgents.Channel.Identity/ExternalIdentityBindingGAgent.cs index 2fbbe4a21..cbfea5d09 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/ExternalIdentityBindingGAgent.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/ExternalIdentityBindingGAgent.cs @@ -83,8 +83,8 @@ public async Task HandleCommitBinding(CommitBindingCommand cmd) // was never activated (issue #549 follow-up: the binding scope // missed an EnsureProjectionForActorAsync wiring while every // other GAgent had one) leaves the readmodel empty, the OAuth - // callback's readiness wait times out, and the next inbound - // message's binding gate keeps re-sending the user back to /init. + // callback's readiness wait times out, and binding-required + // commands keep re-sending the user back to /init. // Apply is identity, so the binding facts are not mutated by // this event. await PersistDomainEventAsync(new ExternalIdentityBindingProjectionRebuildRequestedEvent @@ -118,11 +118,13 @@ await PersistDomainEventAsync(new ExternalIdentityBoundEvent } /// - /// Revokes the active binding. NO-OP when state has no active binding - /// (e.g. concurrent /unbind, or revoke-after-revoke from invalid_grant - /// retry). Caller must have already invoked the NyxID-side revoke - /// (or observed invalid_grant) — this command only transitions - /// local state. + /// Revokes the active binding. When state has no active binding (for + /// example concurrent /unbind, revoke-after-revoke from + /// invalid_grant, or remote-side self-heal after projection drift), + /// emits a no-op rebuild event so the readmodel is overwritten from the + /// actor's authoritative empty state. Caller must have already invoked + /// the NyxID-side revoke (or observed invalid_grant) — this command + /// only transitions local state. /// [EventHandler] public async Task HandleRevokeBinding(RevokeBindingCommand cmd) @@ -138,26 +140,37 @@ public async Task HandleRevokeBinding(RevokeBindingCommand cmd) if (!IsCommandSubjectMatchingActor(cmd.ExternalSubject)) return; + // Use the explicit "unspecified" sentinel so the persisted audit + // trail distinguishes "caller did not supply a reason" from a + // missing/empty value. The event Reason field is non-nullable in + // proto3 (defaults to ""), so the sentinel substitution lives at + // the boundary here rather than relying on per-call interpretation + // (kimi-k2p6 L109 / L124 5/5 consensus). + var reason = string.IsNullOrWhiteSpace(cmd.Reason) ? "unspecified" : cmd.Reason; + if (string.IsNullOrEmpty(State.BindingId)) { + // Remote revocation self-heal can land here when the actor state + // is already empty but the readmodel still contains an old active + // binding. Persisting an identity event republishes the committed + // state root, allowing the projector to overwrite that stale + // document without inventing query-time repair logic. + await PersistDomainEventAsync(new ExternalIdentityBindingProjectionRebuildRequestedEvent + { + Reason = $"revoke_without_active_binding:{reason}", + RequestedAt = Timestamp.FromDateTimeOffset(DateTimeOffset.UtcNow), + }); Logger.LogInformation( - "RevokeBinding skipped: no active binding for {Platform}:{Tenant}:{User}", + "RevokeBinding found no active binding for {Platform}:{Tenant}:{User}; rebuild requested so the projector materializes the authoritative empty state (reason={Reason})", cmd.ExternalSubject.Platform, cmd.ExternalSubject.Tenant, - cmd.ExternalSubject.ExternalUserId); + cmd.ExternalSubject.ExternalUserId, + reason); return; } var revokedBindingId = State.BindingId; - // Use the explicit "unspecified" sentinel so the persisted audit - // trail distinguishes "caller did not supply a reason" from a - // missing/empty value. The event Reason field is non-nullable in - // proto3 (defaults to ""), so the sentinel substitution lives at - // the boundary here rather than relying on per-call interpretation - // (kimi-k2p6 L109 / L124 5/5 consensus). - var reason = string.IsNullOrWhiteSpace(cmd.Reason) ? "unspecified" : cmd.Reason; - await PersistDomainEventAsync(new ExternalIdentityBindingRevokedEvent { ExternalSubject = cmd.ExternalSubject.Clone(), diff --git a/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionPort.cs b/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionPort.cs index 789cf2653..f530659a4 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionPort.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionPort.cs @@ -17,13 +17,14 @@ namespace Aevatar.GAgents.Channel.Identity; /// Pre-this-port, the binding scope was never activated for any actor and /// every legacy cluster's binding readmodel was empty even when the /// actor's State held an active binding — the OAuth callback's readiness -/// wait would time out, and the next inbound message's binding gate would -/// keep sending the user back to /init forever (issue #549 follow-up +/// wait would time out, and binding-required commands would keep sending +/// the user back to /init forever (issue #549 follow-up /// observed 2026-05-01: CommitBinding discarded: already bound /// without a corresponding readmodel materialization). /// public sealed class ExternalIdentityBindingProjectionPort - : MaterializationProjectionPortBase + : MaterializationProjectionPortBase, + IExternalIdentityBindingProjectionPort { public const string ProjectionKind = "external-identity-binding"; diff --git a/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionQueryPort.cs b/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionQueryPort.cs index 17c099775..dc737a605 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionQueryPort.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionQueryPort.cs @@ -8,8 +8,9 @@ namespace Aevatar.GAgents.Channel.Identity; /// Reads through the projection /// document reader (Elasticsearch / in-memory provider). No event-store replay, /// no actor state mirror, no query-time priming — see ADR-0018 §Projection -/// Readiness. A miss returns null; callers MUST drive the sender to -/// /init rather than fall back to bot-owner credentials. +/// Readiness. A miss returns null; binding-required command handlers can +/// prompt /init, while normal LLM turns may fall back to bot-owner +/// credentials. /// public sealed class ExternalIdentityBindingProjectionQueryPort : IExternalIdentityBindingQueryPort diff --git a/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionReadinessPort.cs b/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionReadinessPort.cs index 7ad020a53..102103012 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionReadinessPort.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjectionReadinessPort.cs @@ -57,6 +57,8 @@ expectedBindingId is null private static bool Matches(ExternalIdentityBindingDocument? document, string? expectedBindingId) { + if (expectedBindingId is null && document is null) + return true; if (document is null) return false; if (expectedBindingId is null) diff --git a/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjector.cs b/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjector.cs index bb3b77112..3247aa73e 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjector.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/Projection/ExternalIdentityBindingProjector.cs @@ -3,6 +3,8 @@ using Aevatar.CQRS.Projection.Runtime.Abstractions; using Aevatar.CQRS.Projection.Stores.Abstractions; using Aevatar.Foundation.Abstractions; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; namespace Aevatar.GAgents.Channel.Identity; @@ -14,18 +16,31 @@ namespace Aevatar.GAgents.Channel.Identity; /// the write dispatcher. Read side (`IExternalIdentityBindingQueryPort`) /// reads the same documents — see ADR-0018 §Projection Readiness. /// +/// +/// READMODEL CONTRACT: when state.BindingId is empty (revoked / never bound), +/// the projector DELETES the document rather than upserting an inactive record. This +/// is a deliberate semantic change from earlier builds that left an inactive document +/// behind: IExternalIdentityBindingQueryPort.ResolveAsync returns null +/// for revoked bindings now, which lets ExternalIdentityBindingProjectionReadinessPort.Matches +/// match the (null, null) tuple cleanly. Downstream consumers that want the +/// audit history (e.g. admin dashboards) must consume the committed-event log directly +/// — they cannot rely on a tombstone in the readmodel. +/// public sealed class ExternalIdentityBindingProjector : ICurrentStateProjectionMaterializer { private readonly IProjectionWriteDispatcher _writeDispatcher; private readonly IProjectionClock _clock; + private readonly ILogger _logger; public ExternalIdentityBindingProjector( IProjectionWriteDispatcher writeDispatcher, - IProjectionClock clock) + IProjectionClock clock, + ILogger? logger = null) { _writeDispatcher = writeDispatcher ?? throw new ArgumentNullException(nameof(writeDispatcher)); _clock = clock ?? throw new ArgumentNullException(nameof(clock)); + _logger = logger ?? NullLogger.Instance; } public async ValueTask ProjectAsync( @@ -56,6 +71,17 @@ public async ValueTask ProjectAsync( UpdatedAt = CommittedStateEventEnvelope.ResolveTimestamp(envelope, _clock.UtcNow), }; + if (string.IsNullOrEmpty(document.BindingId)) + { + _logger.LogWarning( + "Deleting external identity binding document {DocumentId} because projected BindingId is empty. event={EventId}, version={Version}", + document.Id, + document.LastEventId, + document.StateVersion); + await _writeDispatcher.DeleteAsync(document.Id, ct); + return; + } + await _writeDispatcher.UpsertAsync(document, ct); } } diff --git a/agents/Aevatar.GAgents.Channel.Identity/Projection/IExternalIdentityBindingProjectionPort.cs b/agents/Aevatar.GAgents.Channel.Identity/Projection/IExternalIdentityBindingProjectionPort.cs new file mode 100644 index 000000000..ddcf8bb7a --- /dev/null +++ b/agents/Aevatar.GAgents.Channel.Identity/Projection/IExternalIdentityBindingProjectionPort.cs @@ -0,0 +1,19 @@ +using Aevatar.CQRS.Projection.Core.Orchestration; + +namespace Aevatar.GAgents.Channel.Identity; + +/// +/// Abstraction for activating the projection materialization scope for a per-(platform, +/// tenant, external_user_id) . Consumers +/// (OAuth endpoints, identity slash-command self-heal) must depend on this interface +/// per CLAUDE.md "依赖反转" rather than the concrete +/// — that gives the host a seam to +/// swap implementations (e.g. fire-and-forget self-heal in tests vs. a real activation +/// service in production). +/// +public interface IExternalIdentityBindingProjectionPort +{ + Task EnsureProjectionForActorAsync( + string actorId, + CancellationToken ct = default); +} diff --git a/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthAdminOptions.cs b/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthAdminOptions.cs new file mode 100644 index 000000000..1b4d4b8fb --- /dev/null +++ b/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthAdminOptions.cs @@ -0,0 +1,35 @@ +namespace Aevatar.GAgents.Channel.Identity; + +/// +/// Operator credentials for the cluster-singleton OAuth client admin +/// surface. Currently only protects the rebuild endpoint +/// (POST /api/oauth/aevatar-client/rebuild) — see issue #549 for the +/// production wedge that motivated it. +/// +/// +/// Bound from configuration section ChannelIdentity:Admin. When +/// is empty the rebuild endpoint refuses to +/// run (503), so a misconfigured cluster is fail-secure rather than +/// fail-open. Production deploys set the token via env var +/// ChannelIdentity__Admin__RebuildToken; tests/dev clusters may +/// leave it unset and the endpoint stays disabled. +/// +public sealed class AevatarOAuthAdminOptions +{ + /// + /// Configuration section name under . + /// + public const string SectionName = "ChannelIdentity:Admin"; + + /// + /// Header callers send the rebuild token in. Constant-time compared to + /// ; mismatch returns 401. + /// + public const string RebuildTokenHeader = "X-Aevatar-Admin-Token"; + + /// + /// Shared secret required on the rebuild endpoint. Empty disables the + /// endpoint entirely (fail-secure default). + /// + public string RebuildToken { get; set; } = string.Empty; +} diff --git a/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthClientBootstrapService.cs b/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthClientBootstrapService.cs index 6671ab8c4..37dc2ed44 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthClientBootstrapService.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthClientBootstrapService.cs @@ -48,6 +48,7 @@ public sealed class AevatarOAuthClientBootstrapService : IHostedService private readonly IAevatarOAuthClientProvider _clientProvider; private readonly AevatarOAuthClientProjectionPort _projectionPort; private readonly IActorRuntime _actorRuntime; + private readonly IActorDispatchPort _actorDispatchPort; private readonly ILogger _logger; private readonly CancellationTokenSource _stoppingCts = new(); private Task? _bootstrapTask; @@ -56,6 +57,7 @@ public AevatarOAuthClientBootstrapService( IAevatarOAuthClientProvider clientProvider, AevatarOAuthClientProjectionPort projectionPort, IActorRuntime actorRuntime, + IActorDispatchPort actorDispatchPort, ILogger logger) { // Provider is registered as a singleton (so are its transitive deps); @@ -67,6 +69,7 @@ public AevatarOAuthClientBootstrapService( _clientProvider = clientProvider ?? throw new ArgumentNullException(nameof(clientProvider)); _projectionPort = projectionPort ?? throw new ArgumentNullException(nameof(projectionPort)); _actorRuntime = actorRuntime ?? throw new ArgumentNullException(nameof(actorRuntime)); + _actorDispatchPort = actorDispatchPort ?? throw new ArgumentNullException(nameof(actorDispatchPort)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } @@ -247,12 +250,11 @@ await _projectionPort RedirectUri = redirectUri, ClientName = ClientName, }), - Route = new EnvelopeRoute - { - Direct = new DirectRoute { TargetActorId = AevatarOAuthClientGAgent.WellKnownId }, - }, + Route = EnvelopeRouteSemantics.CreateDirect( + "channel-identity.oauth-bootstrap", + AevatarOAuthClientGAgent.WellKnownId), }; - await actor.HandleEventAsync(envelope, ct).ConfigureAwait(false); + await _actorDispatchPort.DispatchAsync(actor.Id, envelope, ct).ConfigureAwait(false); _logger.LogInformation( "Aevatar OAuth client EnsureProvisioned dispatched to {ActorId} (authority={Authority}). " + diff --git a/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthClientGAgent.cs b/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthClientGAgent.cs index a4c538a84..85309562b 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthClientGAgent.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/Provisioning/AevatarOAuthClientGAgent.cs @@ -289,9 +289,19 @@ private Task AbsorbPeerHmacSeedAsync(EventStoreOptimisticConcurrencyExcept /// production bootstrap path uses /// instead so the actor (not the /// caller) mediates the DCR call. Idempotent: re-issuing the same - /// triple is a no-op. Always seeds a fresh HMAC key when the state has - /// none — bootstrap and provisioning are single-step. + /// snapshot (client_id + authority + redirect_uri + oauth_scope) is a + /// no-op. Always seeds a fresh HMAC key when the state has none — + /// bootstrap and provisioning are single-step. /// + /// + /// The same-snapshot check covers redirect_uri + oauth_scope on top of + /// client_id + authority because the operator-rebuild path + /// (POST /api/oauth/aevatar-client/rebuild, issue #549) must be + /// able to heal a wedged actor whose state has the right client_id but + /// stale or empty redirect_uri / oauth_scope — leaving those drifted + /// would let the next bootstrap re-DCR and replace the operator's + /// freshly-pinned client_id with a new (orphan-creating) one. + /// [EventHandler] public async Task HandleProvision(ProvisionAevatarOAuthClientCommand cmd) { @@ -307,9 +317,21 @@ public async Task HandleProvision(ProvisionAevatarOAuthClientCommand cmd) return; } - var sameClient = string.Equals(State.ClientId, cmd.ClientId, StringComparison.Ordinal) - && string.Equals(State.NyxidAuthority, cmd.NyxidAuthority, StringComparison.Ordinal); - if (!sameClient) + // Empty cmd field = "field not supplied by this caller", NOT "set + // to empty". Otherwise a legacy / pre-redirect_uri caller (e.g. + // ProvisionAevatarOAuthClientCommand v1 wire-compatibility, manual + // operator scripts that only know client_id + authority) would + // overwrite previously-persisted redirect_uri / oauth_scope with + // "" — and the next bootstrap pass would observe the cleared + // value, detect drift, re-DCR the freshly-pinned client, and + // rotate it away. Codex P1 on PR #570. + var redirectUri = string.IsNullOrEmpty(cmd.RedirectUri) ? State.RedirectUri : cmd.RedirectUri; + var oauthScope = string.IsNullOrEmpty(cmd.OauthScope) ? State.OauthScope : cmd.OauthScope; + var sameSnapshot = string.Equals(State.ClientId, cmd.ClientId, StringComparison.Ordinal) + && string.Equals(State.NyxidAuthority, cmd.NyxidAuthority, StringComparison.Ordinal) + && string.Equals(State.RedirectUri, redirectUri, StringComparison.Ordinal) + && string.Equals(State.OauthScope, oauthScope, StringComparison.Ordinal); + if (!sameSnapshot) { await PersistDomainEventAsync(new AevatarOAuthClientProvisionedEvent { @@ -317,12 +339,14 @@ await PersistDomainEventAsync(new AevatarOAuthClientProvisionedEvent ClientIdIssuedAtUnix = cmd.ClientIdIssuedAtUnix, NyxidAuthority = cmd.NyxidAuthority, PersistedAt = Timestamp.FromDateTimeOffset(DateTimeOffset.UtcNow), - OauthScope = cmd.OauthScope ?? string.Empty, + OauthScope = oauthScope, + RedirectUri = redirectUri, }); Logger.LogInformation( - "Provisioned aevatar OAuth client: client_id={ClientId}, authority={Authority}", + "Provisioned aevatar OAuth client: client_id={ClientId}, authority={Authority}, redirect_uri={RedirectUri}", cmd.ClientId, - cmd.NyxidAuthority); + cmd.NyxidAuthority, + string.IsNullOrEmpty(redirectUri) ? "" : redirectUri); } if (State.HmacKey.Length == 0) diff --git a/agents/Aevatar.GAgents.Channel.Identity/Slash/UnbindChannelSlashCommandHandler.cs b/agents/Aevatar.GAgents.Channel.Identity/Slash/UnbindChannelSlashCommandHandler.cs index bbbeac6e3..4a529aff0 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/Slash/UnbindChannelSlashCommandHandler.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/Slash/UnbindChannelSlashCommandHandler.cs @@ -16,16 +16,16 @@ namespace Aevatar.GAgents.Channel.Identity.Slash; public sealed class UnbindChannelSlashCommandHandler : IChannelSlashCommandHandler { private readonly INyxIdCapabilityBroker _broker; - private readonly IActorRuntime _actorRuntime; + private readonly IActorDispatchPort _actorDispatchPort; private readonly ILogger _logger; public UnbindChannelSlashCommandHandler( INyxIdCapabilityBroker broker, - IActorRuntime actorRuntime, + IActorDispatchPort actorDispatchPort, ILogger logger) { _broker = broker ?? throw new ArgumentNullException(nameof(broker)); - _actorRuntime = actorRuntime ?? throw new ArgumentNullException(nameof(actorRuntime)); + _actorDispatchPort = actorDispatchPort ?? throw new ArgumentNullException(nameof(actorDispatchPort)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } @@ -70,9 +70,6 @@ public UnbindChannelSlashCommandHandler( { try { - var actor = await _actorRuntime - .CreateAsync(actorId, ct) - .ConfigureAwait(false); var envelope = new EventEnvelope { Id = Guid.NewGuid().ToString("N"), @@ -82,12 +79,9 @@ public UnbindChannelSlashCommandHandler( ExternalSubject = context.Subject.Clone(), Reason = "user_unbind", }), - Route = new EnvelopeRoute - { - Direct = new DirectRoute { TargetActorId = actorId }, - }, + Route = EnvelopeRouteSemantics.CreateDirect("channel.identity.unbind", actorId), }; - await actor.HandleEventAsync(envelope, ct).ConfigureAwait(false); + await _actorDispatchPort.DispatchAsync(actorId, envelope, ct).ConfigureAwait(false); localDispatchError = null; break; } diff --git a/agents/Aevatar.GAgents.Channel.Identity/Slash/WhoamiChannelSlashCommandHandler.cs b/agents/Aevatar.GAgents.Channel.Identity/Slash/WhoamiChannelSlashCommandHandler.cs index 6d22caa1a..1d8302fda 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/Slash/WhoamiChannelSlashCommandHandler.cs +++ b/agents/Aevatar.GAgents.Channel.Identity/Slash/WhoamiChannelSlashCommandHandler.cs @@ -4,15 +4,17 @@ namespace Aevatar.GAgents.Channel.Identity.Slash; /// -/// /whoami — show the inbound sender their current binding state. Always -/// requires a binding; the runner short-circuits unbound senders to the -/// /init prompt before invoking the handler. +/// /whoami — show the inbound sender their current binding state. Issue #513 +/// Phase 6 specifies /init, /unbind, and /whoami do NOT +/// require a binding so an unbound sender can introspect their own state +/// without being bounced through the binding gate. Bound senders see masked +/// binding info; unbound senders see "未绑定" with a /init hint. /// public sealed class WhoamiChannelSlashCommandHandler : IChannelSlashCommandHandler { public string Name => "whoami"; - public bool RequiresBinding => true; + public bool RequiresBinding => false; public ChannelSlashCommandUsage Usage => new( Name, @@ -28,13 +30,21 @@ public sealed class WhoamiChannelSlashCommandHandler : IChannelSlashCommandHandl ? context.SenderId : context.SenderName; - var lines = new[] - { - $"已绑定 NyxID 账号。", - $"- 平台账号:{senderName}", - $"- Binding ID:{Mask(bindingId)}", - $"- 平台:{context.Subject.Platform}", - }; + var lines = string.IsNullOrEmpty(bindingId) + ? new[] + { + "未绑定 NyxID 账号。", + $"- 平台账号:{senderName}", + $"- 平台:{context.Subject.Platform}", + "发送 /init 完成绑定。", + } + : new[] + { + "已绑定 NyxID 账号。", + $"- 平台账号:{senderName}", + $"- Binding ID:{Mask(bindingId)}", + $"- 平台:{context.Subject.Platform}", + }; var reply = new MessageContent { diff --git a/agents/Aevatar.GAgents.Channel.Identity/protos/aevatar_oauth_client.proto b/agents/Aevatar.GAgents.Channel.Identity/protos/aevatar_oauth_client.proto index ff6ae2d68..7eb286667 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/protos/aevatar_oauth_client.proto +++ b/agents/Aevatar.GAgents.Channel.Identity/protos/aevatar_oauth_client.proto @@ -74,16 +74,25 @@ message EnsureAevatarOAuthClientProvisionedCommand { } // Issued by tests / manual operator scripts that already hold a client_id -// (e.g. seeded fixture, post-rotation retag). Bootstrap NEVER uses this — -// it always sends EnsureAevatarOAuthClientProvisionedCommand instead so the -// actor mediates the DCR call. +// (e.g. seeded fixture, post-rotation retag, post-incident rebuild). Bootstrap +// NEVER uses this — it always sends EnsureAevatarOAuthClientProvisionedCommand +// instead so the actor mediates the DCR call. message ProvisionAevatarOAuthClientCommand { string client_id = 1; int64 client_id_issued_at_unix = 2; string nyxid_authority = 3; // Optional diagnostic scope for manually provisioned clients. Bootstrap - // never uses this command path; an empty value means unknown. + // never uses this command path; an empty value means unknown. The + // operator-rebuild path must set this to AevatarOAuthClientScopes + // .AuthorizationScope so the next bootstrap does not detect drift and + // re-DCR the freshly-pinned client. string oauth_scope = 4; + // Optional redirect URI. The operator-rebuild path (POST /api/oauth/ + // aevatar-client/rebuild) must set this to the resolver output so the next + // bootstrap does not detect redirect drift and re-DCR the freshly-pinned + // client. Tests / fixture seeds may leave it empty when they don't care + // about drift detection on a subsequent bootstrap pass. + string redirect_uri = 5; } // Issued by ops to force a fresh HMAC key rotation. Old tokens signed with diff --git a/agents/Aevatar.GAgents.Channel.Identity/protos/external_identity_binding.proto b/agents/Aevatar.GAgents.Channel.Identity/protos/external_identity_binding.proto index 0dcae6467..bc0586187 100644 --- a/agents/Aevatar.GAgents.Channel.Identity/protos/external_identity_binding.proto +++ b/agents/Aevatar.GAgents.Channel.Identity/protos/external_identity_binding.proto @@ -35,8 +35,9 @@ message CommitBindingCommand { } // Issued by the /unbind handler after a successful NyxID DELETE call, or by -// the turn path on `invalid_grant` from token-exchange. NO-OP at the actor -// when state has no active binding. +// the turn path on `invalid_grant` from token-exchange. When state has no +// active binding, the actor leaves binding facts unchanged but republishes +// its authoritative state root so stale readmodels can be overwritten. message RevokeBindingCommand { aevatar.gagents.channel.abstractions.ExternalSubjectRef external_subject = 1; // Free-form reason for audit (e.g. "user_unbind", "nyx_invalid_grant", @@ -59,13 +60,12 @@ message ExternalIdentityBindingRevokedEvent { } // Persisted when an inbound CommitBindingCommand is discarded because the -// actor already holds an active binding_id, OR when a deploy needs to re- -// publish the authoritative state root for a legacy binding actor whose -// projection scope was never activated. Apply is identity — the binding -// facts are not mutated. The projector still sees a state-root publication -// and materializes the existing binding into the readmodel, fixing the -// 2026-05-01 production regression where the binding scope was missing -// (issue #549 follow-up). +// actor already holds an active binding_id, when RevokeBindingCommand observes +// already-empty actor state, OR when a deploy needs to re-publish the +// authoritative state root for a legacy binding actor whose projection scope +// was never activated. Apply is identity — the binding facts are not mutated. +// The projector still sees a state-root publication and materializes the +// authoritative state into the readmodel. message ExternalIdentityBindingProjectionRebuildRequestedEvent { string reason = 1; google.protobuf.Timestamp requested_at = 2; diff --git a/agents/Aevatar.GAgents.Channel.Runtime/ChannelMetadataKeys.cs b/agents/Aevatar.GAgents.Channel.Runtime/ChannelMetadataKeys.cs index bb788f49a..168d25a47 100644 --- a/agents/Aevatar.GAgents.Channel.Runtime/ChannelMetadataKeys.cs +++ b/agents/Aevatar.GAgents.Channel.Runtime/ChannelMetadataKeys.cs @@ -39,8 +39,8 @@ public static class ChannelMetadataKeys /// /// Authoritative outbound Lark receive_id for the current workflow run, captured at /// agent-create time. Propagated via WorkflowChatRunRequest.Metadata so workflow - /// modules (e.g. TwitterPublishModule) can surface their result back into the same - /// chat without having to look up the catalog at execution time. + /// modules can surface their result back into the same chat without having to look up the + /// catalog at execution time. /// public const string LarkReceiveId = "channel.lark.receive_id"; /// Companion to — its receive_id_type. diff --git a/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.LarkCardStreaming.cs b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.LarkCardStreaming.cs new file mode 100644 index 000000000..5a7b8c0e1 --- /dev/null +++ b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.LarkCardStreaming.cs @@ -0,0 +1,535 @@ +using Aevatar.GAgents.Channel.Abstractions; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; + +namespace Aevatar.GAgents.Channel.Runtime; + +public sealed partial class ConversationGAgent +{ + private readonly Dictionary _larkCardStreamingStates = new(StringComparer.Ordinal); + + /// + /// Per-turn phase of the Lark CardKit streaming pipeline. Distinct from + /// (which models channel-relay edit-message + /// streaming): card streaming has its own lifecycle (allocate card entity, bind to + /// chat, stream element content, close streaming mode) and goes through the API-key + /// proxy directly rather than channel-relay's /reply{,/update} surface. + /// + /// + /// Fallback semantics: when card creation fails (), the + /// dispatcher routes the turn to the legacy text-edit sink (NyxRelayStreamingPhase + /// machine). Once is reached, the card path owns the turn — + /// mid-stream rate-limit / table-limit failures terminate the turn at + /// with the last flushed text persisted as partial. + /// + private enum LarkCardStreamingPhase + { + Idle, + Creating, + Streaming, + Completed, + Aborted, + Terminated, + CreationFailed, + } + + private enum LarkCardStreamingGuardSource + { + AcceptInterimChunk, + Finalize, + } + + /// + /// Actor-scoped, in-memory streaming state for one CardKit-driven turn. Keyed by + /// correlation_id, same lifecycle as . + /// + /// Lifecycle phase; gates interim updates and finalization. + /// + /// CardKit card entity id returned by cardkit/v1/cards. Null until + /// ; required for every element-content + /// and settings update afterwards. + /// + /// + /// Lark IM message id returned by the im/v1/messages send that bound the card + /// to a chat. Used by the unavailable-guard to detect upstream message recall. + /// + /// + /// Preserved card id for terminal full-card update if mid-stream we fall back to text + /// patch (table-limit class errors). Currently always equal to ; + /// reserved for the mid-stream-fallback follow-up (#589 Scope D). + /// + /// + /// Last text successfully streamed into the card element. Persisted as the user-visible + /// terminal state when finalization fails after streaming started. + /// + /// + /// Monotonic counter passed to every CardKit write. Pre-incremented before each call; + /// Lark rejects stale writes deterministically. + /// + /// + /// Element id within the card to stream into. Defaults to streaming_main; + /// must match the card template's element naming. + /// + /// Diagnostic reason captured on entry to terminal phases. + private sealed record LarkCardStreamingState( + LarkCardStreamingPhase Phase, + string? CardId, + string? CardMessageId, + string? OriginalCardId, + string LastFlushedText, + long Sequence, + string StreamingElementId, + string? TerminalReason) + { + public const string DefaultStreamingElementId = "streaming_main"; + + public static LarkCardStreamingState Initial { get; } = new( + LarkCardStreamingPhase.Idle, + CardId: null, + CardMessageId: null, + OriginalCardId: null, + LastFlushedText: string.Empty, + Sequence: 0, + StreamingElementId: DefaultStreamingElementId, + TerminalReason: null); + + /// Phase permits accepting a new chunk (initial or interim). + public bool AllowsInterimEdit => + Phase is LarkCardStreamingPhase.Idle + or LarkCardStreamingPhase.Streaming; + + /// + /// Card creation already failed — dispatcher should route subsequent chunks to the + /// text-edit sink for the rest of this turn. + /// + public bool AllowsTextEditFallback => + Phase is LarkCardStreamingPhase.Idle + or LarkCardStreamingPhase.CreationFailed; + + /// Phase permits attempting a finalize (close streaming + optional final update). + public bool AllowsFinalize => + Phase is LarkCardStreamingPhase.Streaming; + } + + private static bool IsTerminalLarkCardStreamingPhase(LarkCardStreamingPhase phase) => + phase is LarkCardStreamingPhase.Completed + or LarkCardStreamingPhase.Aborted + or LarkCardStreamingPhase.Terminated + or LarkCardStreamingPhase.CreationFailed; + + private static bool IsLegalLarkCardStreamingTransition(LarkCardStreamingPhase from, LarkCardStreamingPhase to) => + (from, to) switch + { + (LarkCardStreamingPhase.Idle, LarkCardStreamingPhase.Creating) => true, + + (LarkCardStreamingPhase.Creating, LarkCardStreamingPhase.Streaming) => true, + (LarkCardStreamingPhase.Creating, LarkCardStreamingPhase.CreationFailed) => true, + (LarkCardStreamingPhase.Creating, LarkCardStreamingPhase.Terminated) => true, + + (LarkCardStreamingPhase.Streaming, LarkCardStreamingPhase.Streaming) => true, + (LarkCardStreamingPhase.Streaming, LarkCardStreamingPhase.Completed) => true, + (LarkCardStreamingPhase.Streaming, LarkCardStreamingPhase.Aborted) => true, + (LarkCardStreamingPhase.Streaming, LarkCardStreamingPhase.Terminated) => true, + + _ => false, + }; + + private LarkCardStreamingState GetOrInitLarkCardStreamingState(string correlationId) => + _larkCardStreamingStates.GetValueOrDefault(correlationId) ?? LarkCardStreamingState.Initial; + + private static bool ShouldSkipLarkCardStreamingForUnavailable( + LarkCardStreamingState state, + LarkCardStreamingGuardSource source) => + source switch + { + LarkCardStreamingGuardSource.AcceptInterimChunk => !state.AllowsInterimEdit, + LarkCardStreamingGuardSource.Finalize => !state.AllowsFinalize, + _ => false, + }; + + private LarkCardStreamingState TransitionLarkCardStreamingPhase( + string correlationId, + LarkCardStreamingState current, + LarkCardStreamingPhase next, + string? terminalReason = null, + Func? fieldUpdate = null) + { + if (!IsLegalLarkCardStreamingTransition(current.Phase, next)) + { + Logger.LogWarning( + "Illegal Lark card streaming phase transition {From}->{To} for correlation={CorrelationId}; keeping current state", + current.Phase, next, correlationId); + return current; + } + + var carried = fieldUpdate?.Invoke(current) ?? current; + var updated = carried with + { + Phase = next, + TerminalReason = IsTerminalLarkCardStreamingPhase(next) + ? (terminalReason ?? carried.TerminalReason) + : carried.TerminalReason, + }; + _larkCardStreamingStates[correlationId] = updated; + return updated; + } + + private IConversationCardTurnRunner ResolveCardRunner() => + Services.GetService() ?? new NullConversationCardTurnRunner(); + + /// + /// Drives one CardKit-mode streaming chunk. Returns true when the card handler owns the + /// outcome (Idle->Creating[->Streaming], Streaming->Streaming, terminal-drop) and false + /// only when the caller should fall through to the legacy text-edit path — + /// CreationFailed phase signals "card path is dead for this turn, route the rest of the + /// chunks through edit-message streaming." + /// + private async Task HandleLarkCardStreamingChunkCoreAsync( + LlmReplyCardStreamChunkEvent evt, + string correlationId) + { + var state = GetOrInitLarkCardStreamingState(correlationId); + + // Already-decided text-edit fallback: let the caller continue down the text-edit path. + if (state.Phase is LarkCardStreamingPhase.CreationFailed) + { + _larkCardStreamingStates.Remove(correlationId); + return false; + } + + if (ShouldSkipLarkCardStreamingForUnavailable(state, LarkCardStreamingGuardSource.AcceptInterimChunk)) + return true; + + var runtimeContext = BuildNyxRelayRuntimeContext(evt.CorrelationId, evt.Activity); + var runner = ResolveCardRunner(); + + if (state.Phase is LarkCardStreamingPhase.Idle) + { + TransitionLarkCardStreamingPhase(correlationId, state, LarkCardStreamingPhase.Creating); + var creating = GetOrInitLarkCardStreamingState(correlationId); + ConversationCardCreateResult createResult; + try + { + // Bound the CardKit create round-trip so a stuck NyxID/Lark upstream can't + // pin the actor turn forever. Mirrors the text-edit streaming path's + // per-call cap (StreamingFailureUpdateTimeout); on timeout, the catch + // below routes the turn to the text-edit fallback path. + using var createCts = new CancellationTokenSource(StreamingFailureUpdateTimeout); + createResult = await runner.RunCardCreateAsync( + evt, + creating.StreamingElementId, + runtimeContext, + createCts.Token); + } + catch (Exception ex) + { + Logger.LogWarning(ex, "Card create threw; falling back to text-edit. correlation={CorrelationId}", evt.CorrelationId); + TransitionLarkCardStreamingPhase( + correlationId, + creating, + LarkCardStreamingPhase.CreationFailed, + terminalReason: $"create_threw:{ex.GetType().Name}"); + return false; + } + + if (!createResult.Success) + { + if (createResult.IsPostSendFailure) + { + // Card was already sent to the chat — falling back to text-edit would + // produce a duplicate visible reply. Terminate the turn at Terminated and + // persist a partial-card record using the orphan card_message_id so the + // event store has a terminal entry. The runner has already attempted a + // best-effort streaming-mode close on the orphan card. + Logger.LogWarning( + "Card post-send failure (create+send succeeded, first stream failed); terminating turn without text-edit fallback. correlation={CorrelationId}, code={ErrorCode}, cardId={CardId}", + evt.CorrelationId, + createResult.ErrorCode, + createResult.CardId); + var terminated = TransitionLarkCardStreamingPhase( + correlationId, + creating, + LarkCardStreamingPhase.Terminated, + terminalReason: $"create_post_send_failed:{createResult.ErrorCode}", + fieldUpdate: s => s with + { + CardId = createResult.CardId, + CardMessageId = createResult.CardMessageId, + OriginalCardId = createResult.CardId, + }); + await PersistCardStreamedCompletionAsync( + correlationId, + BuildLlmReplyCommandId(evt.CorrelationId), + evt.Activity, + evt.Activity, + terminated.CardMessageId ?? string.Empty, + terminated.LastFlushedText); + return true; + } + + Logger.LogInformation( + "Card create failed; falling back to text-edit for the rest of this turn. correlation={CorrelationId}, code={ErrorCode}, rateLimited={RateLimited}, tableLimit={TableLimit}, cardUnavailable={CardUnavailable}", + evt.CorrelationId, + createResult.ErrorCode, + createResult.IsRateLimited, + createResult.IsTableLimitExceeded, + createResult.IsCardUnavailable); + TransitionLarkCardStreamingPhase( + correlationId, + creating, + LarkCardStreamingPhase.CreationFailed, + terminalReason: $"create_failed:{createResult.ErrorCode}"); + return false; + } + + TransitionLarkCardStreamingPhase( + correlationId, + creating, + LarkCardStreamingPhase.Streaming, + fieldUpdate: s => s with + { + CardId = createResult.CardId, + CardMessageId = createResult.CardMessageId, + OriginalCardId = createResult.CardId, + LastFlushedText = evt.AccumulatedText, + Sequence = 1, + }); + return true; + } + + // Streaming: interim element-content update. Sequence pre-incremented; on success + // record the new sequence + last-flushed text so finalize knows whether to write. + var nextSequence = state.Sequence + 1; + ConversationCardStreamResult streamResult; + try + { + // Per-frame cap so a hung CardKit update can't pin the actor turn forever. + // On timeout the frame is dropped and the next chunk will retry the slot. + using var streamCts = new CancellationTokenSource(StreamingFailureUpdateTimeout); + streamResult = await runner.RunCardStreamAsync( + evt, + state.CardId ?? string.Empty, + state.StreamingElementId, + nextSequence, + runtimeContext, + streamCts.Token); + } + catch (Exception ex) + { + Logger.LogWarning(ex, "Card stream threw; dropping frame. correlation={CorrelationId}, seq={Sequence}", evt.CorrelationId, nextSequence); + return true; + } + + if (!streamResult.Success) + { + if (streamResult.IsRateLimited) + { + // Recoverable: skip the frame, keep sequence unchanged so the next chunk + // re-uses this slot. + Logger.LogDebug( + "Card stream rate-limited; dropping frame. correlation={CorrelationId}, seq={Sequence}", + evt.CorrelationId, nextSequence); + return true; + } + if (streamResult.IsTableLimitExceeded || streamResult.IsCardUnavailable) + { + Logger.LogWarning( + "Card stream terminal failure; ending turn. correlation={CorrelationId}, code={ErrorCode}", + evt.CorrelationId, streamResult.ErrorCode); + var terminated = TransitionLarkCardStreamingPhase( + correlationId, + state, + LarkCardStreamingPhase.Terminated, + terminalReason: $"stream_failed:{streamResult.ErrorCode}"); + // Persist the partial-card terminal record so the event store records the + // turn even though LlmReplyReady has not arrived yet. Without this the + // ProcessedCommandIds guard in HandleLlmReplyReadyAsync would still see no + // matching entry, fall through to the legacy reply path, and post a + // duplicate text reply on top of the visible card. + await PersistCardStreamedCompletionAsync( + correlationId, + BuildLlmReplyCommandId(evt.CorrelationId), + evt.Activity, + evt.Activity, + terminated.CardMessageId ?? string.Empty, + terminated.LastFlushedText); + return true; + } + Logger.LogInformation( + "Card stream non-terminal failure; continuing. correlation={CorrelationId}, code={ErrorCode}", + evt.CorrelationId, streamResult.ErrorCode); + return true; + } + + TransitionLarkCardStreamingPhase( + correlationId, + state, + LarkCardStreamingPhase.Streaming, + fieldUpdate: s => s with + { + LastFlushedText = evt.AccumulatedText, + Sequence = nextSequence, + }); + return true; + } + + /// + /// Drives the card-mode finalize when sees a + /// live Streaming phase. Persists a ConversationTurnCompletedEvent with + /// SentActivityId="lark-card-stream:{cardMessageId}" so observers can distinguish + /// the card path from the legacy nyx-relay-stream: path. + /// + private async Task TryCompleteCardStreamedReplyAsync( + LlmReplyReadyEvent evt, + string correlationId, + string commandId, + ChatActivity? referenceActivity) + { + var state = GetOrInitLarkCardStreamingState(correlationId); + // Idle: card path was never started for this turn (or already cleaned up); let the + // legacy edit-message finalize path handle it. CreationFailed: card create rejected + // pre-send, which already routed the chunks to the text-edit sink, so the text-edit + // finalize must run too. Both → return false to fall through. + if (state.Phase is LarkCardStreamingPhase.Idle) + return false; + if (state.Phase is LarkCardStreamingPhase.CreationFailed) + { + _larkCardStreamingStates.Remove(correlationId); + return false; + } + + // Already-terminal card phase (post-send-failure, mid-stream rate/unavailable, or + // a previous finalize): persistence already happened at the transition site, so + // simply consume the ready event without running text-edit finalize. The + // ProcessedCommandIds guard in HandleLlmReplyReadyAsync also short-circuits late + // ready events, but returning true here keeps the contract explicit. + if (state.Phase is LarkCardStreamingPhase.Completed + or LarkCardStreamingPhase.Aborted + or LarkCardStreamingPhase.Terminated) + { + _larkCardStreamingStates.Remove(correlationId); + return true; + } + + // Phase is Streaming or Creating. Creating during finalize is unexpected (card.create + // is synchronous within a single chunk's handler); treat it as Streaming with no + // prior interim text. Anything else falls through to text-edit, but the explicit + // guards above mean we only reach this point with phase=Streaming/Creating. + var finalText = evt.Outbound?.Text ?? string.Empty; + var finalDiffers = !string.IsNullOrWhiteSpace(finalText) + && !string.Equals(finalText, state.LastFlushedText, StringComparison.Ordinal); + + var runtimeContext = BuildNyxRelayRuntimeContext(evt.CorrelationId, evt.Activity); + var runner = ResolveCardRunner(); + var nextSequence = state.Sequence + 1; + var activityForToken = referenceActivity ?? evt.Activity ?? new ChatActivity(); + + ConversationCardFinalizeResult finalizeResult; + try + { + // Per-call cap so a hung CardKit finalize can't pin the actor turn forever. + // On timeout the catch below persists the last-flushed partial and transitions + // to Terminated, matching the existing finalize-throw recovery. + using var finalizeCts = new CancellationTokenSource(StreamingFailureUpdateTimeout); + finalizeResult = await runner.RunCardFinalizeAsync( + activityForToken, + state.CardId ?? string.Empty, + state.StreamingElementId, + finalText, + finalDiffers, + nextSequence, + runtimeContext, + finalizeCts.Token); + } + catch (Exception ex) + { + Logger.LogWarning(ex, "Card finalize threw; persisting last flushed partial. correlation={CorrelationId}", evt.CorrelationId); + TransitionLarkCardStreamingPhase( + correlationId, + state, + LarkCardStreamingPhase.Terminated, + terminalReason: $"finalize_threw:{ex.GetType().Name}"); + await PersistCardStreamedCompletionAsync( + correlationId, + commandId, + evt.Activity, + referenceActivity, + state.CardMessageId ?? string.Empty, + state.LastFlushedText); + return true; + } + + // visibleText must match what the user actually sees on the card. Two failure modes: + // * Final stream write failed → card shows LastFlushedText + // * Final stream succeeded but close-streaming failed → card shows finalText, just + // with a still-blinking cursor. Persist finalText so the durable record agrees + // with the visible state. + var visibleText = finalizeResult.FinalTextWritten ? finalText : state.LastFlushedText; + if (finalizeResult.Success) + { + TransitionLarkCardStreamingPhase( + correlationId, + state, + LarkCardStreamingPhase.Completed, + terminalReason: "completed"); + } + else + { + Logger.LogWarning( + "Card finalize failed; persisting partial. correlation={CorrelationId}, code={ErrorCode}", + evt.CorrelationId, finalizeResult.ErrorCode); + TransitionLarkCardStreamingPhase( + correlationId, + state, + LarkCardStreamingPhase.Terminated, + terminalReason: $"finalize_failed:{finalizeResult.ErrorCode}"); + } + + await PersistCardStreamedCompletionAsync( + correlationId, + commandId, + evt.Activity, + referenceActivity, + state.CardMessageId ?? string.Empty, + visibleText); + return true; + } + + /// + /// Persists the terminal ConversationTurnCompletedEvent for a card-streamed turn. + /// Decoupled from the inbound event type so both the LlmReplyReady finalize path and the + /// mid-stream Terminated path (post-send-failure / table-limit / unavailable, observed + /// while still processing chunks) can share one writer. + /// + private async Task PersistCardStreamedCompletionAsync( + string correlationId, + string commandId, + ChatActivity? eventActivity, + ChatActivity? referenceActivity, + string cardMessageId, + string outboundText) + { + var nowMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(); + var completed = new ConversationTurnCompletedEvent + { + ProcessedActivityId = string.Empty, + CausationCommandId = commandId, + SentActivityId = $"lark-card-stream:{cardMessageId}", + AuthPrincipal = "bot", + Conversation = eventActivity?.Conversation?.Clone() + ?? State.Conversation?.Clone() + ?? new ConversationReference(), + Outbound = new MessageContent { Text = outboundText }, + CompletedAtUnixMs = nowMs, + OutboundDelivery = ToOutboundDeliveryReceipt(eventActivity?.OutboundDelivery), + }; + await PersistDomainEventAsync(completed); + RemoveNyxRelayReplyToken(correlationId, referenceActivity); + _larkCardStreamingStates.Remove(correlationId); + Logger.LogInformation( + "Completed card-streamed LLM reply: correlation={CorrelationId} cardMessageId={CardMessageId} conversation={Key}", + correlationId, + cardMessageId, + completed.Conversation?.CanonicalKey); + } +} diff --git a/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.NyxRelayStreaming.cs b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.NyxRelayStreaming.cs new file mode 100644 index 000000000..3ba1bf86b --- /dev/null +++ b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.NyxRelayStreaming.cs @@ -0,0 +1,153 @@ +using Microsoft.Extensions.Logging; + +namespace Aevatar.GAgents.Channel.Runtime; + +public sealed partial class ConversationGAgent +{ + /// + /// Per-turn phase of the NyxID-relay edit-message streaming pipeline. + /// + /// + /// The reply token consumes on the first successful send. After that, only + /// /reply/update is valid; falling back to /reply would reuse a dead JTI + /// and surface as 401. The two boolean flags this enum replaces (Disabled + + /// SuppressInterim) failed to express that asymmetry directly, so callers had + /// to derive it from PlatformMessageId emptiness. The phase enum makes the + /// asymmetry the primary state. + /// + private enum NyxRelayStreamingPhase + { + Idle, + PlaceholderSent, + Streaming, + SuppressingInterim, + DisabledPreSend, + TerminalSucceeded, + TerminalPartial, + } + + /// + /// Identifies which streaming entry point is asking the unavailable guard to decide + /// whether to short-circuit. Different sources have different "should I bail?" semantics. + /// + private enum NyxRelayStreamingGuardSource + { + AcceptInterimChunk, + Finalize, + } + + /// + /// Actor-scoped, in-memory streaming state for one conversation turn. Never persisted. + /// Keyed by correlation_id, same lifecycle as . + /// + private sealed record NyxRelayStreamingState( + NyxRelayStreamingPhase Phase, + string? PlatformMessageId, + string LastFlushedText, + int EditCount, + string? TerminalReason) + { + public static NyxRelayStreamingState Initial { get; } = + new(NyxRelayStreamingPhase.Idle, null, string.Empty, 0, null); + + public bool AllowsInterimEdit => + Phase is NyxRelayStreamingPhase.Idle + or NyxRelayStreamingPhase.PlaceholderSent + or NyxRelayStreamingPhase.Streaming; + + public bool AllowsFinalEdit => + Phase is NyxRelayStreamingPhase.PlaceholderSent + or NyxRelayStreamingPhase.Streaming + or NyxRelayStreamingPhase.SuppressingInterim; + + public bool AllowsReplyFallback => + Phase is NyxRelayStreamingPhase.Idle + or NyxRelayStreamingPhase.DisabledPreSend; + } + + private static bool IsTerminalNyxRelayStreamingPhase(NyxRelayStreamingPhase phase) => + phase is NyxRelayStreamingPhase.DisabledPreSend + or NyxRelayStreamingPhase.TerminalSucceeded + or NyxRelayStreamingPhase.TerminalPartial; + + private static bool IsLegalNyxRelayStreamingTransition(NyxRelayStreamingPhase from, NyxRelayStreamingPhase to) => + (from, to) switch + { + (NyxRelayStreamingPhase.Idle, NyxRelayStreamingPhase.PlaceholderSent) => true, + (NyxRelayStreamingPhase.Idle, NyxRelayStreamingPhase.DisabledPreSend) => true, + + (NyxRelayStreamingPhase.PlaceholderSent, NyxRelayStreamingPhase.Streaming) => true, + (NyxRelayStreamingPhase.PlaceholderSent, NyxRelayStreamingPhase.SuppressingInterim) => true, + (NyxRelayStreamingPhase.PlaceholderSent, NyxRelayStreamingPhase.TerminalSucceeded) => true, + (NyxRelayStreamingPhase.PlaceholderSent, NyxRelayStreamingPhase.TerminalPartial) => true, + + (NyxRelayStreamingPhase.Streaming, NyxRelayStreamingPhase.Streaming) => true, + (NyxRelayStreamingPhase.Streaming, NyxRelayStreamingPhase.SuppressingInterim) => true, + (NyxRelayStreamingPhase.Streaming, NyxRelayStreamingPhase.TerminalSucceeded) => true, + (NyxRelayStreamingPhase.Streaming, NyxRelayStreamingPhase.TerminalPartial) => true, + + (NyxRelayStreamingPhase.SuppressingInterim, NyxRelayStreamingPhase.TerminalSucceeded) => true, + (NyxRelayStreamingPhase.SuppressingInterim, NyxRelayStreamingPhase.TerminalPartial) => true, + + _ => false, + }; + + private NyxRelayStreamingState GetOrInitNyxRelayStreamingState(string correlationId) => + _nyxRelayStreamingStates.GetValueOrDefault(correlationId) ?? NyxRelayStreamingState.Initial; + + /// + /// Single guard that owns the "should this streaming callback short-circuit?" decision. + /// Every public handler that touches the streaming path defers to this helper at the + /// top instead of repeating ad-hoc checks. Returns true when the caller should bail. + /// + /// + /// The Finalize branch also short-circuits when + /// is empty: a turn whose first send did not surface a platform message id (Nyx returned + /// an empty PlatformMessageId on initial /reply) cannot be finalized via + /// /reply/update — we have no upstream message to address — so the legacy + /// RunLlmReplyAsync fallback owns the terminal user-visible state. This preserves + /// the explicit empty-PlatformMessageId check that lived in the pre-refactor path. + /// + private static bool ShouldSkipNyxRelayStreamingForUnavailable( + NyxRelayStreamingState state, + NyxRelayStreamingGuardSource source) => + source switch + { + NyxRelayStreamingGuardSource.AcceptInterimChunk => !state.AllowsInterimEdit, + NyxRelayStreamingGuardSource.Finalize => + state.AllowsReplyFallback || string.IsNullOrEmpty(state.PlatformMessageId), + _ => false, + }; + + /// + /// Validates the transition, applies if any, writes the + /// updated state, and returns it. Illegal transitions are logged at warn level and + /// return the unchanged current state — actor turns must keep making progress. + /// + private NyxRelayStreamingState TransitionNyxRelayStreamingPhase( + string correlationId, + NyxRelayStreamingState current, + NyxRelayStreamingPhase next, + string? terminalReason = null, + Func? fieldUpdate = null) + { + if (!IsLegalNyxRelayStreamingTransition(current.Phase, next)) + { + Logger.LogWarning( + "Illegal Nyx relay streaming phase transition {From}->{To} for correlation={CorrelationId}; keeping current state", + current.Phase, next, correlationId); + return current; + } + + var carried = fieldUpdate?.Invoke(current) ?? current; + var updated = carried with + { + Phase = next, + TerminalReason = IsTerminalNyxRelayStreamingPhase(next) + ? (terminalReason ?? carried.TerminalReason) + : carried.TerminalReason, + }; + _nyxRelayStreamingStates[correlationId] = updated; + return updated; + } +} diff --git a/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.cs b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.cs index c1df93f33..f07706780 100644 --- a/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.cs +++ b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/ConversationGAgent.cs @@ -30,14 +30,15 @@ public sealed partial class ConversationGAgent : GAgentBase _nyxRelayReplyTokens = new(StringComparer.Ordinal); private readonly Dictionary _nyxRelayStreamingStates = new(StringComparer.Ordinal); - /// - /// Actor-scoped, in-memory streaming state for one conversation turn. Never persisted: tracks - /// the upstream platform message id of the placeholder send and the two distinct failure - /// modes that can disable parts of the streaming path. Keyed by correlation_id, same - /// lifecycle as . - /// - /// - /// The two failure flags carry different semantics with respect to the NyxID reply token: - /// - /// Disabled means streaming was aborted before any successful send, so - /// the reply token is still available and the actor may safely fall back to a single-shot - /// /reply via . - /// SuppressInterim means the first chunk already consumed the reply token (the - /// placeholder or first delta landed) but a later interim edit failed. The final edit must - /// still be attempted via /reply/update; falling back to /reply would reuse a - /// dead token and turn the partial into the user-visible terminal state. - /// - /// - private sealed record NyxRelayStreamingState( - string? PlatformMessageId, - string LastFlushedText, - int EditCount, - bool Disabled, - bool SuppressInterim) - { - public static NyxRelayStreamingState Initial { get; } = new(null, string.Empty, 0, false, false); - - /// - /// True once the first successful send has landed: the NyxID reply token has been - /// consumed and any further outbound must go through /reply/update. Used as the - /// "token is dead, don't fall back to /reply" guard. - /// - public bool ReplyTokenConsumed => !string.IsNullOrEmpty(PlatformMessageId); - } - /// /// Sliding window cap on retained processed ids. Keeps state size bounded while still /// catching typical redelivery windows (seconds to minutes). @@ -107,6 +73,8 @@ protected override ConversationGAgentState TransitionState(ConversationGAgentSta .On(ApplyContinueRejected) .On(ApplyContinueFailed) .On(ApplyInboundTurnRetryScheduled) + .On(ApplyLastReplyDelivered) + .On(ApplyLastReplyDeliveryFailed) .OrCurrent(); /// @@ -156,16 +124,18 @@ private async Task HandleInboundActivityCoreAsync( var nowMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(); if (result.LlmReplyRequest is not null) { - // The transient inbox copy keeps reply_token + expiry so the LLM worker can - // echo them back inside LlmReplyReadyEvent; the persisted state copy must - // not carry the credential into the event store / projection / read model. - var inboxCopy = result.LlmReplyRequest.Clone(); - inboxCopy.TargetActorId = Id; - var persistedCopy = inboxCopy.Clone(); + // The transient run command copy keeps reply_token + expiry + per-call credentials + // in Metadata so the run actor can echo them back inside LlmReplyReadyEvent and + // forward them to the LLM call; the persisted state copy must not carry any of + // those credentials into the event store / projection / read model. + var runCopy = result.LlmReplyRequest.Clone(); + runCopy.TargetActorId = Id; + var persistedCopy = runCopy.Clone(); persistedCopy.ReplyToken = string.Empty; persistedCopy.ReplyTokenExpiresAtUnixMs = 0; + LlmReplyCredentialMetadataKeys.StripFrom(persistedCopy.Metadata); await PersistDomainEventAsync(persistedCopy); - await DispatchPendingLlmReplyAsync(inboxCopy, CancellationToken.None); + await DispatchPendingLlmReplyAsync(runCopy, CancellationToken.None); Logger.LogInformation( "Accepted inbound activity for deferred LLM reply: activity={ActivityId} conversation={Key}", activity.Id, @@ -314,6 +284,19 @@ public async Task HandleDeferredLlmReplyDroppedAsync(DeferredLlmReplyDroppedEven { ArgumentNullException.ThrowIfNull(evt); + // ADR-0021 §6 / canon §9 absorbing-finalized: a late drop notification for an + // already-finalized turn (e.g. the run actor's terminal-cleanup callback fires + // after a successful reply already landed) must no-op rather than overwrite the + // turn outcome with a synthetic ConversationContinueFailedEvent. + if (IsLlmReplyTurnFinalized(evt.CorrelationId)) + { + Logger.LogDebug( + "Ignoring deferred LLM reply drop for already-finalized turn: correlation={CorrelationId} reason={Reason}", + evt.CorrelationId, + evt.Reason); + return; + } + var pending = FindPendingLlmReplyRequest(evt.CorrelationId); if (pending is null) { @@ -332,7 +315,7 @@ public async Task HandleDeferredLlmReplyDroppedAsync(DeferredLlmReplyDroppedEven CausationId = string.Empty, Kind = FailureKind.PermanentAdapterError, ErrorCode = reason, - ErrorSummary = "Deferred LLM reply request was dropped by the inbox pre-LLM gate.", + ErrorSummary = "Deferred LLM reply request was dropped by the run actor pre-LLM gate.", NotRetryable = new Google.Protobuf.WellKnownTypes.Empty(), FailedAtUnixMs = evt.DroppedAtUnixMs > 0 ? evt.DroppedAtUnixMs @@ -342,7 +325,7 @@ public async Task HandleDeferredLlmReplyDroppedAsync(DeferredLlmReplyDroppedEven RemoveNyxRelayReplyToken(evt.CorrelationId, pending.Activity); Logger.LogInformation( - "Retired pending LLM reply after inbox drop: correlation={CorrelationId} reason={Reason}", + "Retired pending LLM reply after run drop: correlation={CorrelationId} reason={Reason}", evt.CorrelationId, reason); } @@ -378,11 +361,11 @@ public async Task HandleDeferredInboundTurnRetryRequestedAsync(DeferredInboundTu private async Task DispatchPendingLlmReplyAsync(NeedsLlmReplyEvent request, CancellationToken ct) { - var inbox = Services.GetService(); - if (inbox is null) + var dispatcher = Services.GetService(); + if (dispatcher is null) { Logger.LogWarning( - "Channel LLM reply inbox not registered; scheduling durable retry: correlation={CorrelationId}", + "Channel LLM reply run dispatcher not registered; scheduling durable retry: correlation={CorrelationId}", request.CorrelationId); await ScheduleDeferredLlmReplyDispatchAsync(request, DeferredLlmDispatchRetryDelay, ct); return; @@ -391,24 +374,30 @@ private async Task DispatchPendingLlmReplyAsync(NeedsLlmReplyEvent request, Canc // Retry and rehydration paths read `request` from State.PendingLlmReplyRequests, // which always carries an empty ReplyToken (the inbound handler strips it before // persist). If the actor is still alive and the in-memory dict still has the - // token for this correlation, re-enrich the inbox copy so the subscriber's relay - // credential gate does not mistake a legitimate retry for a dead request. + // token for this correlation, re-enrich the run command copy so AgentRunGAgent's + // relay credential gate does not mistake a legitimate retry for a dead request. var enriched = EnrichWithRuntimeReplyTokenIfNeeded(request); try { - await inbox.EnqueueAsync(enriched.Clone(), ct); + var outcome = await dispatcher.DispatchAsync(enriched.Clone(), ct); Logger.LogInformation( - "Enqueued LLM reply request to inbox: correlation={CorrelationId} conversation={Key} replyTokenSource={Source}", + "Dispatched LLM reply run request: correlation={CorrelationId} conversation={Key} replyTokenSource={Source} phase={Phase} commandId={CommandId}", enriched.CorrelationId, enriched.Activity?.Conversation?.CanonicalKey, - DescribeEnqueuedReplyTokenSource(request, enriched)); + DescribeDispatchedReplyTokenSource(request, enriched), + outcome.Phase, + outcome.CommandId); + // C3 will branch on outcome.Phase to retire the pending entry on + // Rejected* outcomes. Today the run actor inbox handler drops + // stale requests and surfaces them through DeferredLlmReplyDroppedEvent, + // so behaviour is preserved either way. } catch (Exception ex) { Logger.LogError( ex, - "Failed to enqueue LLM reply request; scheduling durable retry: correlation={CorrelationId}", + "Failed to dispatch LLM reply run request; scheduling durable retry: correlation={CorrelationId}", request.CorrelationId); await ScheduleDeferredLlmReplyDispatchAsync(request, DeferredLlmDispatchRetryDelay, ct); } @@ -439,7 +428,7 @@ private NeedsLlmReplyEvent EnrichWithRuntimeReplyTokenIfNeeded(NeedsLlmReplyEven return enriched; } - private static string DescribeEnqueuedReplyTokenSource( + private static string DescribeDispatchedReplyTokenSource( NeedsLlmReplyEvent original, NeedsLlmReplyEvent enriched) { @@ -457,7 +446,7 @@ public async Task HandleLlmReplyReadyAsync(LlmReplyReadyEvent evt) var commandId = BuildLlmReplyCommandId(evt.CorrelationId); var pendingRequest = FindPendingLlmReplyRequest(evt.CorrelationId); - if (State.ProcessedCommandIds.Contains(commandId)) + if (IsLlmReplyTurnFinalized(evt.CorrelationId)) { Logger.LogInformation( "Duplicate LLM reply ready event {CorrelationId} (conversation={Key}); skipping outbound", @@ -506,6 +495,18 @@ public async Task HandleLlmReplyReadyAsync(LlmReplyReadyEvent evt) CompletedAtUnixMs = nowMs, OutboundDelivery = ToOutboundDeliveryReceipt(result.OutboundDelivery), }; + // ADR-0021 chain.delivered observable: persist the user-visible delivery ack + // before the turn-completed summary event so readers do not need to infer + // delivery status from the channel sink return code, and so existing + // "events.Last() is turn-completed" consumers stay correct. + var delivered = new LlmReplyDeliveredEvent + { + CorrelationId = evt.CorrelationId ?? string.Empty, + RunId = evt.CorrelationId ?? string.Empty, + AckedAtUnixMs = nowMs, + ChannelMessageId = result.OutboundDelivery?.ReplyMessageId ?? string.Empty, + }; + await PersistDomainEventAsync(delivered); await PersistDomainEventAsync(completed); RemoveNyxRelayReplyToken(evt.CorrelationId, pendingRequest?.Activity ?? evt.Activity); Logger.LogInformation( @@ -527,6 +528,18 @@ public async Task HandleLlmReplyReadyAsync(LlmReplyReadyEvent evt) FailedAtUnixMs = nowMs, }; AssignRetryPolicy(failed, result); + // ADR-0021 chain.delivered failure observable: structured delivery failure persists + // before the chain-finalizing failure event so existing "events.Last() is + // ConversationContinueFailedEvent" consumers stay correct. + var deliveryFailed = new LlmReplyDeliveryFailedEvent + { + CorrelationId = evt.CorrelationId ?? string.Empty, + RunId = evt.CorrelationId ?? string.Empty, + FailedAtUnixMs = nowMs, + ErrorCode = result.ErrorCode ?? string.Empty, + ErrorMessage = result.ErrorSummary ?? string.Empty, + }; + await PersistDomainEventAsync(deliveryFailed); await PersistDomainEventAsync(failed); SweepExpiredNyxRelayReplyTokens(); if (failed.RetryPolicyCase == ConversationContinueFailedEvent.RetryPolicyOneofCase.NotRetryable) @@ -561,7 +574,21 @@ await ScheduleDeferredLlmReplyDispatchAsync( /// boundary and the edit ordering is enforced by actor serialization. /// [EventHandler] - public async Task HandleLlmReplyStreamChunkAsync(LlmReplyStreamChunkEvent evt) + public Task HandleLlmReplyStreamChunkAsync(LlmReplyStreamChunkEvent evt) + { + ArgumentNullException.ThrowIfNull(evt); + return HandleNyxRelayStreamingChunkCoreAsync(evt); + } + + /// + /// CardKit-streaming chunks travel on a structurally distinct proto type so a misbehaving + /// persistence layer cannot silently re-route a replayed event back to the card sink. The + /// card handler owns Idle / Creating / Streaming / terminal transitions; on + /// CreationFailed it returns false and we drop into the legacy text-edit core + /// helper so the user still sees a reply for the rest of the turn. + /// + [EventHandler] + public async Task HandleLlmReplyCardStreamChunkAsync(LlmReplyCardStreamChunkEvent evt) { ArgumentNullException.ThrowIfNull(evt); @@ -569,40 +596,85 @@ public async Task HandleLlmReplyStreamChunkAsync(LlmReplyStreamChunkEvent evt) if (correlationId is null || evt.Activity is null || string.IsNullOrWhiteSpace(evt.AccumulatedText)) { Logger.LogDebug( - "Dropping malformed streaming chunk: correlation={CorrelationId}", + "Dropping malformed card streaming chunk: correlation={CorrelationId}", evt.CorrelationId); return; } - var state = _nyxRelayStreamingStates.GetValueOrDefault(correlationId) ?? NyxRelayStreamingState.Initial; - if (state.Disabled || state.SuppressInterim) + if (IsLlmReplyTurnFinalized(evt.CorrelationId)) + { + // Turn already finalized; drop any late chunk that sneaks in via the actor inbox. return; + } - if (State.ProcessedCommandIds.Contains(BuildLlmReplyCommandId(evt.CorrelationId))) + // Plain `await`: actor turns run on a single-threaded scheduler and the continuation + // must observe that context for subsequent state mutations on + // `_larkCardStreamingStates` / `_nyxRelayStreamingStates`. + if (await HandleLarkCardStreamingChunkCoreAsync(evt, correlationId)) + return; + + // CardCreation failed (pre-flight or first chunk). Route the rest of the turn through + // the legacy text-edit core so the user still gets a reply. Synthesize the equivalent + // edit-message chunk from the card-event payload — both proto types carry the same + // fields so the projection is loss-less. + await HandleNyxRelayStreamingChunkCoreAsync(new LlmReplyStreamChunkEvent + { + CorrelationId = evt.CorrelationId, + RegistrationId = evt.RegistrationId, + Activity = evt.Activity.Clone(), + AccumulatedText = evt.AccumulatedText, + ChunkAtUnixMs = evt.ChunkAtUnixMs, + }); + } + + private async Task HandleNyxRelayStreamingChunkCoreAsync(LlmReplyStreamChunkEvent evt) + { + var correlationId = NormalizeOptional(evt.CorrelationId); + if (correlationId is null || evt.Activity is null || string.IsNullOrWhiteSpace(evt.AccumulatedText)) + { + Logger.LogDebug( + "Dropping malformed streaming chunk: correlation={CorrelationId}", + evt.CorrelationId); + return; + } + + if (IsLlmReplyTurnFinalized(evt.CorrelationId)) { // Turn already finalized; drop any late chunk that sneaks in via the actor inbox. return; } + var state = GetOrInitNyxRelayStreamingState(correlationId); + if (ShouldSkipNyxRelayStreamingForUnavailable(state, NyxRelayStreamingGuardSource.AcceptInterimChunk)) + return; + var runtimeContext = BuildNyxRelayRuntimeContext(evt.CorrelationId, evt.Activity); if (runtimeContext.NyxRelayReplyToken is null) { Logger.LogInformation( "Streaming chunk received but relay reply token is unavailable; disabling streaming for turn. correlation={CorrelationId}", evt.CorrelationId); - _nyxRelayStreamingStates[correlationId] = state with { Disabled = true }; + TransitionNyxRelayStreamingPhase( + correlationId, + state, + NyxRelayStreamingPhase.DisabledPreSend, + terminalReason: "no_reply_token"); return; } var runner = ResolveRunner(); + // Bound the upstream edit so a stuck relay/network can't pin the actor turn forever + // (PR #562 review). 10s matches the failure-path timeout below; the edit is best-effort, + // so timing out cleanly into the !result.Success branch preserves correctness. + using var streamChunkCts = new CancellationTokenSource(StreamingFailureUpdateTimeout); var result = await runner.RunStreamChunkAsync( evt, state.PlatformMessageId, runtimeContext, - CancellationToken.None); + streamChunkCts.Token); if (!result.Success) { - if (state.ReplyTokenConsumed) + if (state.AllowsFinalEdit) { // First chunk already consumed the reply token. Skip further interim edits but // preserve PlatformMessageId so the final edit on LlmReplyReady can still try @@ -613,7 +685,11 @@ public async Task HandleLlmReplyStreamChunkAsync(LlmReplyStreamChunkEvent evt) evt.CorrelationId, result.ErrorCode, result.EditUnsupported); - _nyxRelayStreamingStates[correlationId] = state with { SuppressInterim = true }; + TransitionNyxRelayStreamingPhase( + correlationId, + state, + NyxRelayStreamingPhase.SuppressingInterim, + terminalReason: $"interim_edit_failed:{result.ErrorCode}"); } else { @@ -624,21 +700,29 @@ public async Task HandleLlmReplyStreamChunkAsync(LlmReplyStreamChunkEvent evt) evt.CorrelationId, result.ErrorCode, result.EditUnsupported); - _nyxRelayStreamingStates[correlationId] = state with { Disabled = true }; + TransitionNyxRelayStreamingPhase( + correlationId, + state, + NyxRelayStreamingPhase.DisabledPreSend, + terminalReason: $"first_send_failed:{result.ErrorCode}"); } return; } - var isFirstChunk = string.IsNullOrEmpty(state.PlatformMessageId); + var isFirstChunk = state.Phase == NyxRelayStreamingPhase.Idle; var newPlatformMessageId = string.IsNullOrWhiteSpace(result.PlatformMessageId) ? state.PlatformMessageId : result.PlatformMessageId; - _nyxRelayStreamingStates[correlationId] = state with - { - PlatformMessageId = newPlatformMessageId, - LastFlushedText = evt.AccumulatedText, - EditCount = isFirstChunk ? 0 : state.EditCount + 1, - }; + TransitionNyxRelayStreamingPhase( + correlationId, + state, + isFirstChunk ? NyxRelayStreamingPhase.PlaceholderSent : NyxRelayStreamingPhase.Streaming, + fieldUpdate: s => s with + { + PlatformMessageId = newPlatformMessageId, + LastFlushedText = evt.AccumulatedText, + EditCount = isFirstChunk ? 0 : s.EditCount + 1, + }); } private async Task TryCompleteStreamedReplyAsync( @@ -647,22 +731,86 @@ private async Task TryCompleteStreamedReplyAsync( ChatActivity? referenceActivity, ConversationTurnRuntimeContext runtimeContext) { - if (evt.TerminalState != LlmReplyTerminalState.Completed) - return false; - var correlationId = NormalizeOptional(evt.CorrelationId); if (correlationId is null) return false; - if (!_nyxRelayStreamingStates.TryGetValue(correlationId, out var state)) - return false; - // Disabled means the initial send never landed, so the reply token is still usable - // and the caller may fall back to a single-shot /reply. A missing PlatformMessageId - // with SuppressInterim would be inconsistent, but treat it the same for safety. - if (state.Disabled || string.IsNullOrEmpty(state.PlatformMessageId)) + // Card path takes precedence when active; falls through to text-edit when card never + // started (Idle), card creation failed (CreationFailed → text-edit fallback), or card + // finished as a terminal phase. Plain `await` so the continuation stays on the + // actor's single-threaded scheduler (no ConfigureAwait(false) — it would let the + // post-await `_nyxRelayStreamingStates` reads run off the actor turn). + if (await TryCompleteCardStreamedReplyAsync(evt, correlationId, commandId, referenceActivity)) + return true; + + var state = GetOrInitNyxRelayStreamingState(correlationId); + if (ShouldSkipNyxRelayStreamingForUnavailable(state, NyxRelayStreamingGuardSource.Finalize)) return false; var platformMessageId = state.PlatformMessageId!; + + // Streaming-start already consumed the reply token. On Failed, falling through to + // RunLlmReplyAsync would issue a fresh /reply against the dead token and surface + // as `401 Reply token already used` to NyxID — leaving the user staring at the + // streaming partial (often just "...") forever with no error explanation. Self-heal + // by editing the existing placeholder in place with the classified failure text; + // turn is then terminal (no retry, no second /reply). + if (evt.TerminalState == LlmReplyTerminalState.Failed) + { + var failureText = NormalizeOptional(evt.Outbound?.Text) + ?? NormalizeOptional(evt.ErrorSummary) + ?? "Sorry, the reply failed. Please try again."; + var runner = ResolveRunner(); + var failureChunk = new LlmReplyStreamChunkEvent + { + CorrelationId = evt.CorrelationId, + RegistrationId = evt.RegistrationId, + Activity = referenceActivity?.Clone() ?? evt.Activity?.Clone() ?? new ChatActivity(), + AccumulatedText = failureText, + ChunkAtUnixMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), + }; + using var failureUpdateCts = new CancellationTokenSource(StreamingFailureUpdateTimeout); + var failureResult = await runner.RunStreamChunkAsync( + failureChunk, + platformMessageId, + runtimeContext, + failureUpdateCts.Token); + if (failureResult.Success) + { + Logger.LogWarning( + "LLM reply failed after streaming-start; updated placeholder with failure text. correlation={CorrelationId}, errorCode={ErrorCode}, platformMessageId={PlatformMessageId}", + evt.CorrelationId, + evt.ErrorCode, + platformMessageId); + TransitionNyxRelayStreamingPhase( + correlationId, + state, + NyxRelayStreamingPhase.TerminalSucceeded, + terminalReason: $"failed_self_heal:{evt.ErrorCode}"); + await PersistStreamedCompletionAsync(evt, commandId, referenceActivity, platformMessageId, failureText, state.EditCount + 1); + return true; + } + + // Edit failed too (rare — Lark may reject a message edit for unrelated reasons). + // Falling back to /reply would still hit the dead token, so persist the last + // flushed partial as terminal. The user sees the partial (potentially empty) + // but we don't spin on a guaranteed 401. + Logger.LogWarning( + "Streaming LLM failure-update could not edit placeholder; persisting last flushed partial as terminal. correlation={CorrelationId}, code={Code}, platformMessageId={PlatformMessageId}", + evt.CorrelationId, + failureResult.ErrorCode, + platformMessageId); + TransitionNyxRelayStreamingPhase( + correlationId, + state, + NyxRelayStreamingPhase.TerminalPartial, + terminalReason: $"failed_self_heal_edit_failed:{failureResult.ErrorCode}"); + await PersistStreamedCompletionAsync(evt, commandId, referenceActivity, platformMessageId, state.LastFlushedText, state.EditCount); + return true; + } + + if (evt.TerminalState != LlmReplyTerminalState.Completed) + return false; var finalText = evt.Outbound?.Text ?? string.Empty; if (string.IsNullOrWhiteSpace(finalText)) { @@ -674,6 +822,11 @@ private async Task TryCompleteStreamedReplyAsync( "Streaming LLM reply final text was empty; persisting last flushed partial as terminal. correlation={CorrelationId} platformMessageId={PlatformMessageId}", evt.CorrelationId, platformMessageId); + TransitionNyxRelayStreamingPhase( + correlationId, + state, + NyxRelayStreamingPhase.TerminalPartial, + terminalReason: "empty_final_text"); await PersistStreamedCompletionAsync(evt, commandId, referenceActivity, platformMessageId, state.LastFlushedText, state.EditCount); return true; } @@ -690,11 +843,12 @@ private async Task TryCompleteStreamedReplyAsync( AccumulatedText = finalText, ChunkAtUnixMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), }; + using var finalChunkCts = new CancellationTokenSource(StreamingFailureUpdateTimeout); var finalResult = await runner.RunStreamChunkAsync( finalChunk, platformMessageId, runtimeContext, - CancellationToken.None); + finalChunkCts.Token); if (!finalResult.Success) { // The reply token was already consumed by the first chunk, so falling back to @@ -707,12 +861,22 @@ private async Task TryCompleteStreamedReplyAsync( evt.CorrelationId, finalResult.ErrorCode, platformMessageId); + TransitionNyxRelayStreamingPhase( + correlationId, + state, + NyxRelayStreamingPhase.TerminalPartial, + terminalReason: $"final_edit_failed:{finalResult.ErrorCode}"); await PersistStreamedCompletionAsync(evt, commandId, referenceActivity, platformMessageId, state.LastFlushedText, state.EditCount); return true; } edits += 1; } + TransitionNyxRelayStreamingPhase( + correlationId, + state, + NyxRelayStreamingPhase.TerminalSucceeded, + terminalReason: "completed"); await PersistStreamedCompletionAsync(evt, commandId, referenceActivity, platformMessageId, finalText, edits); return true; } @@ -739,6 +903,19 @@ private async Task PersistStreamedCompletionAsync( CompletedAtUnixMs = nowMs, OutboundDelivery = ToOutboundDeliveryReceipt(evt.Activity?.OutboundDelivery), }; + // ADR-0021 chain.delivered observable: the streaming path always reaches this + // function with a user-visible placeholder message id (any partial / full / + // failure-self-heal text the user actually saw). Persist a Delivered event + // BEFORE the turn-completed summary so "events.Last() is turn-completed" + // consumers keep working. + var delivered = new LlmReplyDeliveredEvent + { + CorrelationId = evt.CorrelationId ?? string.Empty, + RunId = evt.CorrelationId ?? string.Empty, + AckedAtUnixMs = nowMs, + ChannelMessageId = $"nyx-relay-stream:{platformMessageId}", + }; + await PersistDomainEventAsync(delivered); await PersistDomainEventAsync(completed); RemoveNyxRelayReplyToken(evt.CorrelationId, referenceActivity); Logger.LogInformation( @@ -857,6 +1034,14 @@ private static string AuthPrincipalForContinue(ConversationContinueRequestedEven private static string BuildLlmReplyCommandId(string? correlationId) => $"llm:{correlationId?.Trim() ?? string.Empty}"; + // ADR-0021 §6 / canon §9 — single source of truth for "this LLM reply turn is + // already finalized". Every reply-ready / dropped / streaming-chunk handler entry + // uses this so late or duplicate signals uniformly no-op. The dedup key is the + // `llm:` form appended to ProcessedCommandIds by + // ApplyTurnCompleted / ApplyContinueFailed when the turn reaches chain.finalized. + private bool IsLlmReplyTurnFinalized(string? correlationId) => + State.ProcessedCommandIds.Contains(BuildLlmReplyCommandId(correlationId)); + private static string BuildDeferredLlmReplyCallbackId(string? correlationId) => $"conversation-llm-dispatch:{correlationId?.Trim() ?? string.Empty}"; @@ -1053,9 +1238,9 @@ private ConversationTurnRuntimeContext BuildNyxRelayRuntimeContextForReply( { var activity = pendingActivity ?? evt.Activity; - // Inbox-echoed credential is the authoritative source — it survives actor + // Run-echoed credential is the authoritative source: it survives actor // deactivation between inbound capture and LLM reply ready, which the in-memory - // dict cannot. Fall back to the dict only when the inbox didn't carry a token + // dict cannot. Fall back to the dict only when the run event didn't carry a token // (legacy in-flight messages from before this change deployed). var inlineToken = NormalizeOptional(evt.ReplyToken); if (inlineToken is not null) @@ -1082,7 +1267,7 @@ private string DescribeReplyTokenSource(LlmReplyReadyEvent evt, ConversationTurn if (runtimeContext.NyxRelayReplyToken is null) return "none"; if (!string.IsNullOrWhiteSpace(evt.ReplyToken)) - return "inbox-echo"; + return "run-echo"; return "actor-runtime-dict"; } @@ -1107,6 +1292,7 @@ private void RemoveNyxRelayReplyToken(string? correlationId, ChatActivity? activ { _nyxRelayReplyTokens.Remove(normalizedCorrelationId); _nyxRelayStreamingStates.Remove(normalizedCorrelationId); + _larkCardStreamingStates.Remove(normalizedCorrelationId); } } @@ -1236,6 +1422,47 @@ private static ConversationGAgentState ApplyContinueFailed( return next; } + // ADR-0021 chain.delivered observable: user-visible delivery succeeded via the channel sink. + private static ConversationGAgentState ApplyLastReplyDelivered( + ConversationGAgentState current, + LlmReplyDeliveredEvent evt) + { + var next = current.Clone(); + next.LastReplyDelivery = new ReplyDeliveryStatus + { + RunId = evt.RunId ?? string.Empty, + Delivered = new ReplyDeliveryStatus.Types.Delivered + { + AckedAtUnixMs = evt.AckedAtUnixMs, + ChannelMessageId = evt.ChannelMessageId ?? string.Empty, + }, + }; + if (evt.AckedAtUnixMs > 0) + next.LastUpdatedUnixMs = evt.AckedAtUnixMs; + return next; + } + + // ADR-0021 chain.delivered failure observable: channel sink rejected the reply (4xx/5xx/timeout). + private static ConversationGAgentState ApplyLastReplyDeliveryFailed( + ConversationGAgentState current, + LlmReplyDeliveryFailedEvent evt) + { + var next = current.Clone(); + next.LastReplyDelivery = new ReplyDeliveryStatus + { + RunId = evt.RunId ?? string.Empty, + Failed = new ReplyDeliveryStatus.Types.DeliveryFailed + { + FailedAtUnixMs = evt.FailedAtUnixMs, + ErrorCode = evt.ErrorCode ?? string.Empty, + ErrorMessage = evt.ErrorMessage ?? string.Empty, + }, + }; + if (evt.FailedAtUnixMs > 0) + next.LastUpdatedUnixMs = evt.FailedAtUnixMs; + return next; + } + private NeedsLlmReplyEvent? FindPendingLlmReplyRequest(string? correlationId) { var normalizedCorrelationId = NormalizeOptional(correlationId); diff --git a/agents/Aevatar.GAgents.Channel.Runtime/Conversation/IChannelLlmReplyInbox.cs b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/IChannelLlmReplyInbox.cs deleted file mode 100644 index f3d10ce82..000000000 --- a/agents/Aevatar.GAgents.Channel.Runtime/Conversation/IChannelLlmReplyInbox.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Aevatar.GAgents.Channel.Runtime; - -public interface IChannelLlmReplyInbox -{ - Task EnqueueAsync(NeedsLlmReplyEvent request, CancellationToken ct); -} diff --git a/agents/Aevatar.GAgents.Channel.Runtime/Conversation/IChannelLlmReplyRunDispatcher.cs b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/IChannelLlmReplyRunDispatcher.cs new file mode 100644 index 000000000..143b5f640 --- /dev/null +++ b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/IChannelLlmReplyRunDispatcher.cs @@ -0,0 +1,68 @@ +namespace Aevatar.GAgents.Channel.Runtime; + +/// +/// Stateless port used by to hand one deferred +/// LLM reply run to its run-scoped continuation owner. +/// +/// +/// The synchronous return only promises accepted per ADR-0021: the run +/// request has been validated as fresh and enqueued onto the run actor's inbox. +/// It does NOT promise the LLM has started, that any reply has been produced, +/// or that any user-visible delivery has happened. Strong guarantees only +/// arrive via downstream events. +/// +public interface IChannelLlmReplyRunDispatcher +{ + Task DispatchAsync(NeedsLlmReplyEvent request, CancellationToken ct); +} + +/// +/// Synchronous outcome of . +/// +/// +/// The completion phase actually reached. By contract dispatcher implementations +/// MUST only return or one of the +/// Rejected* variants — never Committed or Delivered; those +/// strong phases are observed asynchronously per ADR-0021. +/// +/// +/// Stable id of the dispatched command (run actor envelope id). Empty when the +/// outcome is a rejection that occurred before envelope construction. +/// +/// +/// Id of the target AgentRunGAgent the request was routed to, when +/// available; null when no actor was created (e.g. stale-rejected). +/// +/// +/// Wall-clock at which the dispatcher accepted/rejected the request. Zero when +/// not applicable. +/// +public sealed record DispatchOutcome( + DispatchPhase Phase, + string CommandId, + string? RunActorId, + long AcceptedAtUnixMs); + +/// +/// Phase reached by . +/// +/// +/// Per ADR-0021 the dispatcher is only allowed to report Accepted or one +/// of the Rejected* variants. Stronger phases (committed, delivered, +/// finalized) are not observable at the synchronous dispatcher boundary. +/// +public enum DispatchPhase +{ + Accepted = 0, + /// + /// The request's requested_at_unix_ms exceeded the freshness window, + /// so the dispatcher refused to enqueue it (the run actor would have + /// dropped it anyway). + /// + RejectedStale = 1, + /// + /// The request's correlation_id matches an already-dispatched run + /// command and was suppressed to keep the run actor inbox idempotent. + /// + RejectedDuplicate = 2, +} diff --git a/agents/Aevatar.GAgents.Channel.Runtime/Conversation/IConversationCardTurnRunner.cs b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/IConversationCardTurnRunner.cs new file mode 100644 index 000000000..7459bd1a4 --- /dev/null +++ b/agents/Aevatar.GAgents.Channel.Runtime/Conversation/IConversationCardTurnRunner.cs @@ -0,0 +1,216 @@ +using Aevatar.GAgents.Channel.Abstractions; + +namespace Aevatar.GAgents.Channel.Runtime; + +/// +/// Runs the CardKit-streaming variant of a bot turn inside . +/// Parallel to but with three distinct operations +/// (create-and-send, interim element stream, finalize) to match Lark CardKit's lifecycle. +/// The grain owns the per-turn LarkCardStreamingState; this seam only does the +/// outbound call and translates the response into a runner-shaped result. +/// +/// +/// All three operations are invoked under the actor's turn-serial invariant, so the runner +/// implementation must be safe under that single-threaded contract. The +/// sequence parameter is owned by the grain (pre-incremented before each call) and +/// passed verbatim into the CardKit API. +/// +public interface IConversationCardTurnRunner +{ + /// + /// Allocates a new CardKit card entity (POST /open-apis/cardkit/v1/cards), binds it + /// to the chat via an interactive im/v1/messages send referencing the new + /// card_id, and writes the initial accumulated text into + /// . Implicit sequence = 1. + /// + Task RunCardCreateAsync( + LlmReplyCardStreamChunkEvent chunk, + string streamingElementId, + ConversationTurnRuntimeContext runtimeContext, + CancellationToken ct); + + /// + /// Streams the latest accumulated text into the existing card element. Sequence is + /// pre-incremented by the grain. Lark rejects stale sequences deterministically. + /// + Task RunCardStreamAsync( + LlmReplyCardStreamChunkEvent chunk, + string cardId, + string elementId, + long sequence, + ConversationTurnRuntimeContext runtimeContext, + CancellationToken ct); + + /// + /// Closes the card's streaming mode (cursor disappears) and, if the final text differs + /// from the last interim flush, writes one more element-content update so the persisted + /// card matches the LLM's final output. + /// + /// + /// Carries TransportExtras.NyxUserAccessToken for the proxy call. Stream chunk + /// methods read it from the chunk's own activity; finalize is invoked from the + /// LlmReplyReadyEvent path so the actor passes the event's reference activity + /// here instead of a chunk. + /// + Task RunCardFinalizeAsync( + ChatActivity referenceActivity, + string cardId, + string elementId, + string finalText, + bool finalTextDiffersFromLastFlushed, + long sequence, + ConversationTurnRuntimeContext runtimeContext, + CancellationToken ct); +} + +/// +/// Outcome of . The classification +/// flags drive the grain's fallback decision: +/// +/// Pre-send failures (create call rejected before any chat-visible side effect): the +/// actor transitions to CreationFailed and falls back to the legacy text-edit sink +/// so the user still sees a reply. / +/// imply this path. +/// Post-send failures (create + send succeeded but the first stream-content write +/// failed — see ): an empty card is already visible in the +/// chat. Falling back to text-edit would produce a duplicate reply. The actor terminates +/// the turn at Terminated using the surfaced / +/// and persists the partial-card terminal record. The runner +/// makes a best-effort settings patch to close streaming mode on the orphan card before +/// returning so the cursor does not blink forever. +/// on its own terminates the turn (no fallback). +/// +/// +public sealed record ConversationCardCreateResult( + bool Success, + string? CardId, + string? CardMessageId, + bool IsRateLimited, + bool IsTableLimitExceeded, + bool IsCardUnavailable, + bool IsPostSendFailure, + string ErrorCode, + string ErrorSummary) +{ + public static ConversationCardCreateResult Succeeded(string cardId, string cardMessageId) => + new(true, cardId, cardMessageId, false, false, false, false, string.Empty, string.Empty); + + public static ConversationCardCreateResult Failed( + string errorCode, + string errorSummary, + bool isRateLimited = false, + bool isTableLimitExceeded = false, + bool isCardUnavailable = false) => + new(false, null, null, isRateLimited, isTableLimitExceeded, isCardUnavailable, false, errorCode, errorSummary); + + /// + /// Failure factory for the "card was already sent to the chat but the first + /// element-content write failed" case. The actor must NOT fall back to text-edit + /// (the orphan card is already visible) — it transitions the turn to Terminated + /// and uses / for the + /// persisted partial-card record. + /// + public static ConversationCardCreateResult PostSendFailed( + string cardId, + string cardMessageId, + string errorCode, + string errorSummary, + bool isRateLimited = false, + bool isTableLimitExceeded = false, + bool isCardUnavailable = false) => + new(false, cardId, cardMessageId, isRateLimited, isTableLimitExceeded, isCardUnavailable, true, errorCode, errorSummary); +} + +/// +/// Outcome of . Mid-stream +/// rate-limit (Lark 230020) is recoverable — the grain skips the frame and continues. +/// Table-limit (230099/11310) and unavailability terminate the turn. +/// +public sealed record ConversationCardStreamResult( + bool Success, + bool IsRateLimited, + bool IsTableLimitExceeded, + bool IsCardUnavailable, + string ErrorCode, + string ErrorSummary) +{ + public static ConversationCardStreamResult Succeeded() => + new(true, false, false, false, string.Empty, string.Empty); + + public static ConversationCardStreamResult Failed( + string errorCode, + string errorSummary, + bool isRateLimited = false, + bool isTableLimitExceeded = false, + bool isCardUnavailable = false) => + new(false, isRateLimited, isTableLimitExceeded, isCardUnavailable, errorCode, errorSummary); +} + +/// True only when both the optional final stream write AND the +/// streaming-mode close succeeded. +/// +/// True when the trailing element-content write either succeeded OR was skipped +/// (final text equals last flushed). False only when the runner attempted the trailing +/// write and it failed; lets the actor persist the visible-state text correctly when +/// success is false but the final text actually did land before the close-streaming-mode +/// failure. +/// +public sealed record ConversationCardFinalizeResult( + bool Success, + bool FinalTextWritten, + string ErrorCode, + string ErrorSummary) +{ + public static ConversationCardFinalizeResult Succeeded() => + new(true, true, string.Empty, string.Empty); + + /// + /// Failure factory. distinguishes between "trailing + /// write failed; user sees stale interim" (false) and "trailing write succeeded but + /// streaming-mode close failed; user sees the final text with a still-blinking cursor" + /// (true). + /// + public static ConversationCardFinalizeResult Failed(string errorCode, string errorSummary, bool finalTextWritten = false) => + new(false, finalTextWritten, errorCode, errorSummary); +} + +/// +/// No-op default. Every CardKit operation reports a transient failure that disables the +/// card path so the grain can fall back to the legacy text-edit sink. Production DI registers +/// a real implementation when CardKit is enabled. +/// +public sealed class NullConversationCardTurnRunner : IConversationCardTurnRunner +{ + public Task RunCardCreateAsync( + LlmReplyCardStreamChunkEvent chunk, + string streamingElementId, + ConversationTurnRuntimeContext runtimeContext, + CancellationToken ct) => + Task.FromResult(ConversationCardCreateResult.Failed( + "no_card_runner", + "no IConversationCardTurnRunner registered")); + + public Task RunCardStreamAsync( + LlmReplyCardStreamChunkEvent chunk, + string cardId, + string elementId, + long sequence, + ConversationTurnRuntimeContext runtimeContext, + CancellationToken ct) => + Task.FromResult(ConversationCardStreamResult.Failed( + "no_card_runner", + "no IConversationCardTurnRunner registered")); + + public Task RunCardFinalizeAsync( + ChatActivity referenceActivity, + string cardId, + string elementId, + string finalText, + bool finalTextDiffersFromLastFlushed, + long sequence, + ConversationTurnRuntimeContext runtimeContext, + CancellationToken ct) => + Task.FromResult(ConversationCardFinalizeResult.Failed( + "no_card_runner", + "no IConversationCardTurnRunner registered")); +} diff --git a/agents/Aevatar.GAgents.Channel.Runtime/DependencyInjection/ChannelRuntimeServiceCollectionExtensions.cs b/agents/Aevatar.GAgents.Channel.Runtime/DependencyInjection/ChannelRuntimeServiceCollectionExtensions.cs index aa2c48d55..5de88c691 100644 --- a/agents/Aevatar.GAgents.Channel.Runtime/DependencyInjection/ChannelRuntimeServiceCollectionExtensions.cs +++ b/agents/Aevatar.GAgents.Channel.Runtime/DependencyInjection/ChannelRuntimeServiceCollectionExtensions.cs @@ -48,6 +48,7 @@ public static IServiceCollection AddChannelRuntime( services.TryAddSingleton(); services.TryAddSingleton(); services.TryAddSingleton(); + services.TryAddSingleton(); // ─── Tombstone compaction options + diagnostics + ES watermark ─── services.AddOptions(); diff --git a/agents/Aevatar.GAgents.Channel.Runtime/IConversationReplyGenerator.cs b/agents/Aevatar.GAgents.Channel.Runtime/IConversationReplyGenerator.cs index bc7c04912..176cb9b69 100644 --- a/agents/Aevatar.GAgents.Channel.Runtime/IConversationReplyGenerator.cs +++ b/agents/Aevatar.GAgents.Channel.Runtime/IConversationReplyGenerator.cs @@ -5,13 +5,47 @@ namespace Aevatar.GAgents.Channel.Runtime; public interface IConversationReplyGenerator { /// - /// Generates the full LLM reply text. If is supplied, the - /// generator forwards progressive deltas as the stream advances; implementations must tolerate - /// a null sink by simply accumulating the final text. + /// Generates the full LLM reply for one Lark / channel turn and returns the actor-edge + /// closeout for the streaming run. /// - Task GenerateReplyAsync( + /// + /// Per ADR-0021 §6 / canon §8 the run-level streaming closeout lives at the actor edge, + /// not inside ChatRuntime. Implementations MUST: + /// + /// Aggregate Usage across all internal LLM rounds (tool-call loop), returning + /// the sum on . + /// Surface the last non-empty FinishReason observed across rounds on + /// . + /// Forward progressive deltas to when supplied; tolerate + /// a null sink by accumulating into the final text. + /// + /// Round-internal terminal markers (per-round IsLast / per-round Usage) MUST NOT + /// escape this boundary — callers consume a single closeout via the returned record. + /// + Task GenerateReplyAsync( ChatActivity activity, IReadOnlyDictionary metadata, IStreamingReplySink? streamingSink, CancellationToken ct); } + +/// +/// Single actor-edge closeout for a streaming reply run. ADR-0021 §6 / canon §8. +/// +/// The final reply text accumulated across all internal LLM rounds. +/// Cross-round token usage sum, or null when no provider reported it. +/// The last non-empty finish reason observed across rounds, or null. +public sealed record ConversationReplyResult( + string? Text, + ReplyTokenUsage? Usage, + string? FinishReason); + +/// +/// Channel-runtime-side token usage projection. Mirrors Aevatar.AI.Abstractions.LLMProviders.TokenUsage +/// to avoid a layer-violating dependency from Channel.Runtime onto AI.Abstractions +/// (see CLAUDE.md "依赖反转"). +/// +public sealed record ReplyTokenUsage( + int PromptTokens, + int CompletionTokens, + int TotalTokens); diff --git a/agents/Aevatar.GAgents.Channel.Runtime/IStreamingReplySink.cs b/agents/Aevatar.GAgents.Channel.Runtime/IStreamingReplySink.cs index 1769c0a4a..64b09f271 100644 --- a/agents/Aevatar.GAgents.Channel.Runtime/IStreamingReplySink.cs +++ b/agents/Aevatar.GAgents.Channel.Runtime/IStreamingReplySink.cs @@ -2,13 +2,13 @@ namespace Aevatar.GAgents.Channel.Runtime; /// /// Receives per-delta streaming updates from so the reply -/// inbox can fan the accumulated text to the conversation actor as it is being generated. The +/// run actor can fan the accumulated text to the conversation actor as it is being generated. The /// actor is the sole holder of the relay reply token, so only it is allowed to drive the relay /// placeholder send and subsequent edit calls; this sink therefore fans out signals (chunk events) /// and never touches the outbound port directly. /// /// -/// Implementations are per-turn and owned by the inbox runtime. A null sink signals that streaming +/// Implementations are per-turn and owned by the run actor. A null sink signals that streaming /// is disabled for the turn (for example, the feature flag is off, the activity is not a relay /// turn, or an earlier failure invalidated the turn); generators must tolerate a null sink by /// simply accumulating the final text without calling any sink method. diff --git a/agents/Aevatar.GAgents.Channel.Runtime/LlmReplyCredentialMetadataKeys.cs b/agents/Aevatar.GAgents.Channel.Runtime/LlmReplyCredentialMetadataKeys.cs new file mode 100644 index 000000000..8857c0151 --- /dev/null +++ b/agents/Aevatar.GAgents.Channel.Runtime/LlmReplyCredentialMetadataKeys.cs @@ -0,0 +1,36 @@ +using Google.Protobuf.Collections; + +namespace Aevatar.GAgents.Channel.Runtime; + +/// +/// Per-call credentials that flow through from the +/// inbound channel turn runner into and onward to +/// AgentRunGAgent for the actual LLM call. These keys must never reach the persisted +/// state, event store, projection, or read model. +/// +/// +/// Mirrors the string constants defined by +/// Aevatar.AI.Abstractions.LLMProviders.LLMRequestMetadataKeys. The constants are +/// duplicated here because Aevatar.GAgents.Channel.Runtime intentionally does not depend +/// on the AI abstractions package — these are wire-stable identifiers, so duplication is +/// preferable to introducing a downstream-to-upstream reference. +/// +internal static class LlmReplyCredentialMetadataKeys +{ + public const string NyxIdAccessToken = "nyxid.access_token"; + public const string NyxIdOrgToken = "nyxid.org_token"; + public const string SenderNyxIdAccessToken = "nyxid.sender_access_token"; + + public static readonly IReadOnlyList All = new[] + { + NyxIdAccessToken, + NyxIdOrgToken, + SenderNyxIdAccessToken, + }; + + public static void StripFrom(MapField metadata) + { + foreach (var key in All) + metadata.Remove(key); + } +} diff --git a/agents/Aevatar.GAgents.Channel.Runtime/TurnStreamingReplySink.cs b/agents/Aevatar.GAgents.Channel.Runtime/TurnStreamingReplySink.cs index 8d846797b..8932588a7 100644 --- a/agents/Aevatar.GAgents.Channel.Runtime/TurnStreamingReplySink.cs +++ b/agents/Aevatar.GAgents.Channel.Runtime/TurnStreamingReplySink.cs @@ -1,6 +1,7 @@ using Aevatar.Foundation.Abstractions; using Aevatar.GAgents.Channel.Abstractions; using Aevatar.GAgents.Channel.Runtime; +using Google.Protobuf; using Google.Protobuf.WellKnownTypes; using Microsoft.Extensions.Logging; @@ -31,7 +32,7 @@ namespace Aevatar.GAgents.Channel.Runtime; /// bypasses the throttle so the actor sees the complete text /// once the stream ends; if a dispatch is in flight, the final text reflushes after it and /// awaits the dispatch loop's drain signal before returning so the -/// caller (the inbox runtime) does not race the ready event past the final chunk. +/// caller (the run actor) does not race the ready event past the final chunk. /// /// /// @@ -52,6 +53,8 @@ public sealed class TurnStreamingReplySink : IStreamingReplySink, IDisposable private readonly string _registrationId; private readonly ChatActivity _activityTemplate; private readonly TimeSpan _throttle; + private readonly int _maxInterimChunks; + private readonly bool _cardMode; private readonly TimeProvider _timeProvider; private readonly ILogger? _logger; @@ -65,7 +68,7 @@ public sealed class TurnStreamingReplySink : IStreamingReplySink, IDisposable private bool _dispatchInProgress; private bool _disposed; // Signaled by the dispatch loop when it fully drains. FinalizeAsync awaits this when a - // dispatch is already in flight so the caller does not race the inbox runtime's + // dispatch is already in flight so the caller does not race AgentRunGAgent's // LlmReplyReadyEvent past the final chunk dispatch (the ConversationGAgent // processed-command guard would otherwise drop the late chunk). private TaskCompletionSource? _drainTcs; @@ -78,7 +81,9 @@ public TurnStreamingReplySink( ChatActivity activityTemplate, TimeSpan throttle, TimeProvider timeProvider, - ILogger? logger = null) + ILogger? logger = null, + int maxInterimChunks = int.MaxValue, + bool cardMode = false) { _actorDispatchPort = actorDispatchPort ?? throw new ArgumentNullException(nameof(actorDispatchPort)); if (string.IsNullOrWhiteSpace(targetActorId)) @@ -90,6 +95,8 @@ public TurnStreamingReplySink( _registrationId = registrationId ?? string.Empty; _activityTemplate = activityTemplate ?? throw new ArgumentNullException(nameof(activityTemplate)); _throttle = throttle < TimeSpan.Zero ? TimeSpan.Zero : throttle; + _maxInterimChunks = maxInterimChunks < 0 ? 0 : maxInterimChunks; + _cardMode = cardMode; _timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider)); _logger = logger; } @@ -109,7 +116,7 @@ public Task OnDeltaAsync(string accumulatedText, CancellationToken ct) => /// Applies the final accumulated text, bypassing the throttle so the actor can drive the final /// edit once the stream ends. If a dispatch is already in flight, the final text is stashed and /// this call awaits the dispatch loop's drain signal so the final chunk is on the wire before - /// the caller proceeds (the inbox runtime sends LlmReplyReadyEvent immediately after). + /// the caller proceeds (AgentRunGAgent sends LlmReplyReadyEvent immediately after). /// public Task FinalizeAsync(string finalText, CancellationToken ct) => FlushAsync(finalText, isFinal: true, ct); @@ -158,6 +165,19 @@ private async Task FlushAsync(string text, bool isFinal, CancellationToken ct) return; } + // Lark/Feishu refuses message edits past a per-message cap (~20 in mainnet, code + // 230072). Once that cap is reached the platform rejects every subsequent edit + // including the final flush, leaving the user with a truncated reply. Cap interim + // dispatches here so the final always has headroom; we still stash the latest text + // so FinalizeAsync can dispatch the complete content when the stream ends. + if (!isFinal && _chunksEmitted >= _maxInterimChunks) + { + _pendingText = text; + _hasPending = true; + CancelTimerLocked(); + return; + } + if (_dispatchInProgress) { // A dispatch is in flight. Stash the latest text; the dispatch loop's reflush @@ -168,7 +188,7 @@ private async Task FlushAsync(string text, bool isFinal, CancellationToken ct) if (isFinal) { // Block FinalizeAsync until the dispatch loop drains the stashed final text. - // Without this wait, ChannelLlmReplyInboxRuntime sends LlmReplyReadyEvent + // Without this wait, AgentRunGAgent sends LlmReplyReadyEvent // first and ConversationGAgent's processed-command guard drops the late // final chunk. _drainTcs ??= new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); @@ -265,7 +285,7 @@ private async Task DispatchLoopAsync(string firstText, CancellationToken ct) { await DispatchOneAsync(current, ct).ConfigureAwait(false); - string? next; + string? next = null; lock (_lock) { if (_disposed || !_hasPending) @@ -286,6 +306,49 @@ private async Task DispatchLoopAsync(string firstText, CancellationToken ct) break; } + var nextIsFinal = _drainTcs is not null; + + // Stop dispatching interim chunks once the cap is reached. Leave the + // latest text pending so FinalizeAsync can still observe that an interim + // update is deferred, but do not signal a drain for non-final text. + if (!nextIsFinal && _chunksEmitted >= _maxInterimChunks) + { + _dispatchInProgress = false; + break; + } + + // Throttle gate between dispatches. Without this, the loop drains stashed + // text at network round-trip pace (~50ms) and exhausts the platform-side + // per-message edit cap (Lark code 230072). When the throttle window has + // not elapsed, arm the deferred timer atomically with releasing + // _dispatchInProgress so a concurrent OnDeltaAsync (PR #562 review #17) + // cannot squeeze in between the release and the arm and observe a stale + // (no-timer + not-dispatching) state. Final dispatches bypass the + // throttle so the user sees the complete text immediately when the + // stream ends. + // + // Invariant: if we reach this branch, nextIsFinal == false, so _drainTcs + // must be null. The timer is armed before _dispatchInProgress is released, + // so a concurrent delta cannot observe a no-timer + not-dispatching gap. + if (!nextIsFinal && _throttle > TimeSpan.Zero) + { + var elapsed = _timeProvider.GetUtcNow() - _lastEmitAt; + if (elapsed < _throttle) + { + var delay = _throttle - elapsed; + if (!_disposed && _hasPending && _flushTimer is null) + { + _flushTimer = _timeProvider.CreateTimer( + OnFlushTimerFired, + state: null, + dueTime: delay, + period: Timeout.InfiniteTimeSpan); + } + _dispatchInProgress = false; + break; + } + } + next = _pendingText; _pendingText = string.Empty; _hasPending = false; @@ -312,14 +375,26 @@ private async Task DispatchLoopAsync(string firstText, CancellationToken ct) private async Task DispatchOneAsync(string text, CancellationToken ct) { - var chunk = new LlmReplyStreamChunkEvent - { - CorrelationId = _correlationId, - RegistrationId = _registrationId, - Activity = _activityTemplate.Clone(), - AccumulatedText = text, - ChunkAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), - }; + // Card mode dispatches a structurally distinct message type so persistence layers + // cannot silently re-route a replayed event back to the card sink. The two proto + // types carry identical payloads; the type identity itself signals routing. + IMessage chunk = _cardMode + ? new LlmReplyCardStreamChunkEvent + { + CorrelationId = _correlationId, + RegistrationId = _registrationId, + Activity = _activityTemplate.Clone(), + AccumulatedText = text, + ChunkAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), + } + : new LlmReplyStreamChunkEvent + { + CorrelationId = _correlationId, + RegistrationId = _registrationId, + Activity = _activityTemplate.Clone(), + AccumulatedText = text, + ChunkAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), + }; var envelope = new EventEnvelope { Id = Guid.NewGuid().ToString("N"), diff --git a/agents/Aevatar.GAgents.Channel.Runtime/protos/conversation_events.proto b/agents/Aevatar.GAgents.Channel.Runtime/protos/conversation_events.proto index ea0cc98e6..3c335b834 100644 --- a/agents/Aevatar.GAgents.Channel.Runtime/protos/conversation_events.proto +++ b/agents/Aevatar.GAgents.Channel.Runtime/protos/conversation_events.proto @@ -30,10 +30,10 @@ message NeedsLlmReplyEvent { aevatar.gagents.channel.abstractions.ChatActivity activity = 4; map metadata = 5; int64 requested_at_unix_ms = 6; - // Transient inbox-only credential. The actor MUST clear `reply_token` and + // Transient run-command-only credential. The actor MUST clear `reply_token` and // `reply_token_expires_at_unix_ms` (set them to the empty default) on the - // copy passed to PersistDomainEventAsync; only the inbox-bound copy may - // carry them so the LLM worker can echo the credential back without the + // copy passed to PersistDomainEventAsync; only the run-bound copy may + // carry them so AgentRunGAgent can echo the credential back without the // actor's in-memory dict surviving deactivation. Never persist to event // store, projection, or read model. string reply_token = 7; @@ -70,16 +70,16 @@ message LlmReplyReadyEvent { string error_code = 7; string error_summary = 8; int64 ready_at_unix_ms = 9; - // Transient inbox-echoed credential carried back from the LLM worker so the + // Transient run-echoed credential carried back from AgentRunGAgent so the // actor's outbound relay reply does not depend on its in-memory token dict // surviving deactivation. The actor consumes these fields directly and never - // persists them. The inbox subscriber copies the values from the inbound + // persists them. AgentRunGAgent copies the values from the inbound // NeedsLlmReplyEvent verbatim. string reply_token = 10; int64 reply_token_expires_at_unix_ms = 11; } -// Per-delta streaming signal dispatched from the LLM inbox runtime to the conversation actor while +// Per-delta streaming signal dispatched from AgentRunGAgent to the conversation actor while // the reply is still being generated. The actor owns the outbound reply credential and the // placeholder message identifier for the turn, so it must be the one issuing the relay placeholder // send and subsequent edit calls. This message carries only the cumulative accumulated text for @@ -87,6 +87,14 @@ message LlmReplyReadyEvent { // in-memory keyed by `correlation_id`. This event must never be persisted — it is a runtime-only // signal. message LlmReplyStreamChunkEvent { + // Field 6 (`card_mode`) was a runtime-only routing flag that has been promoted to its own + // message type (`LlmReplyCardStreamChunkEvent`) so the structural contract of this domain- + // event-shaped envelope no longer carries any "should I re-route to a different sink?" + // signal. Reserved here so accidental reuse of the field number, or a stale serializer + // built before the split, fails loudly instead of silently flipping back to card mode. + reserved 6; + reserved "card_mode"; + string correlation_id = 1; string registration_id = 2; // Clone of the inbound activity so the actor/turn runner can resolve the platform, conversation, @@ -97,6 +105,24 @@ message LlmReplyStreamChunkEvent { int64 chunk_at_unix_ms = 5; } +// Per-delta streaming signal for the Lark CardKit (card-mode) outbound path. Identical +// payload to LlmReplyStreamChunkEvent, but a separate proto type so the routing decision is +// structural: there is no boolean a misbehaving persistence layer can flip — the actor's +// HandleLlmReplyCardStreamChunkAsync handler is reachable only via this type. Like its +// edit-message sibling, this event is a runtime-only signal and must never be persisted to +// the event store, projection, or any durable state. +message LlmReplyCardStreamChunkEvent { + string correlation_id = 1; + string registration_id = 2; + // Clone of the inbound activity so the actor/runner can resolve the platform, conversation, + // delivery context, and TransportExtras (NyxUserAccessToken, NyxLarkChatId, NyxLarkUnionId) + // without re-reading from durable state. + aevatar.gagents.channel.abstractions.ChatActivity activity = 3; + // Current accumulated reply text (not a delta slice). Each chunk supersedes the previous one. + string accumulated_text = 4; + int64 chunk_at_unix_ms = 5; +} + message DeferredLlmReplyDispatchRequestedEvent { string correlation_id = 1; int64 requested_at_unix_ms = 2; @@ -128,7 +154,7 @@ message NyxRelayReplyTokenCleanupRequestedEvent { int64 requested_at_unix_ms = 2; } -// Sent by ChannelLlmReplyInboxRuntime when its pre-LLM gates (stale age, +// Sent by AgentRunGAgent when its pre-LLM gates (stale age, // missing relay credential, malformed payload) refuse to process a deferred // LLM reply. The actor consumes this to retire the matching pending entry // from State.PendingLlmReplyRequests via a NotRetryable @@ -191,3 +217,32 @@ enum FailureKind { FAILURE_KIND_CREDENTIAL_RESOLUTION_FAILED = 3; FAILURE_KIND_PLATFORM_UNAVAILABLE = 4; } + +// Persisted by ConversationGAgent after the channel sink (e.g. Lark +// edit_message) has ack'd the final reply chunk. Drives +// ConversationGAgentState.last_reply_delivery into the Delivered outcome, +// satisfying chain.delivered per ADR-0021. Single source of user-visible +// delivery truth — downstream readers MUST NOT rely on lark API log lines +// or other side-channel signals. +message LlmReplyDeliveredEvent { + string correlation_id = 1; + // AgentRunGAgent run_id producing this reply (used to anchor delivery + // against a specific committed reply). + string run_id = 2; + int64 acked_at_unix_ms = 3; + // Channel-side message id from the sink ack (e.g. Lark message_id). + string channel_message_id = 4; +} + +// Persisted by ConversationGAgent when the channel sink rejects or times +// out on the reply send/edit. Drives +// ConversationGAgentState.last_reply_delivery into the DeliveryFailed +// outcome. The downstream ConversationContinueFailedEvent is the +// chain-finalizing signal — this event captures the structured cause. +message LlmReplyDeliveryFailedEvent { + string correlation_id = 1; + string run_id = 2; + int64 failed_at_unix_ms = 3; + string error_code = 4; + string error_message = 5; +} diff --git a/agents/Aevatar.GAgents.Channel.Runtime/protos/conversation_state.proto b/agents/Aevatar.GAgents.Channel.Runtime/protos/conversation_state.proto index ad1d028aa..5afb98e2c 100644 --- a/agents/Aevatar.GAgents.Channel.Runtime/protos/conversation_state.proto +++ b/agents/Aevatar.GAgents.Channel.Runtime/protos/conversation_state.proto @@ -15,6 +15,36 @@ message ConversationGAgentState { int64 last_updated_unix_ms = 5; repeated NeedsLlmReplyEvent pending_llm_reply_requests = 6; repeated PendingInboundTurn pending_inbound_turns = 7; + // User-visible delivery outcome of the most recent LLM reply turn — + // single field by design (multi-turn history reconstructable from event + // log). See ADR-0021 chain.delivered phase. + ReplyDeliveryStatus last_reply_delivery = 8; +} + +// Channel-sink ack tracking for the most recent reply turn. Carries either +// an in-flight pending marker, a successful delivery (with channel-side +// message id) or a structured failure. Used by ConversationGAgent to make +// chain.delivered observable per ADR-0021; downstream readers MUST NOT +// infer delivery status from any other state field. +message ReplyDeliveryStatus { + string run_id = 1; + oneof outcome { + Pending pending = 2; + Delivered delivered = 3; + DeliveryFailed failed = 4; + } + message Pending { + int64 started_at_unix_ms = 1; + } + message Delivered { + int64 acked_at_unix_ms = 1; + string channel_message_id = 2; + } + message DeliveryFailed { + int64 failed_at_unix_ms = 1; + string error_code = 2; + string error_message = 3; + } } message PendingSession { diff --git a/agents/Aevatar.GAgents.NyxidChat/Aevatar.GAgents.NyxidChat.csproj b/agents/Aevatar.GAgents.NyxidChat/Aevatar.GAgents.NyxidChat.csproj index 85b7ac4fb..4d1f9ab4d 100644 --- a/agents/Aevatar.GAgents.NyxidChat/Aevatar.GAgents.NyxidChat.csproj +++ b/agents/Aevatar.GAgents.NyxidChat/Aevatar.GAgents.NyxidChat.csproj @@ -24,6 +24,7 @@ + @@ -35,10 +36,21 @@ + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + diff --git a/agents/Aevatar.GAgents.NyxidChat/AgentRunDispatcher.cs b/agents/Aevatar.GAgents.NyxidChat/AgentRunDispatcher.cs new file mode 100644 index 000000000..4085bdde1 --- /dev/null +++ b/agents/Aevatar.GAgents.NyxidChat/AgentRunDispatcher.cs @@ -0,0 +1,149 @@ +using Aevatar.Foundation.Abstractions; +using Aevatar.GAgents.Channel.Runtime; +using Google.Protobuf.WellKnownTypes; +using Microsoft.Extensions.Logging; + +namespace Aevatar.GAgents.NyxidChat; + +/// +/// Thin Channel.Runtime port implementation that creates the run actor and +/// dispatches the start command. It holds no run state. +/// +public sealed class AgentRunDispatcher : IChannelLlmReplyRunDispatcher +{ + // Must match AgentRunGAgent.MaxRunRequestAgeMs so the dispatcher rejects + // freshness violations at the boundary rather than letting them propagate + // to the run actor inbox (where they would just be dropped). See ADR-0021. + private const long MaxRequestAgeMs = 5L * 60_000L; + + private readonly IActorRuntime _actorRuntime; + private readonly IStreamProvider _streamProvider; + private readonly TimeProvider _timeProvider; + private readonly ILogger _logger; + private readonly SemaphoreSlim _dispatchGate = new(1, 1); + + public AgentRunDispatcher( + IActorRuntime actorRuntime, + IStreamProvider streamProvider, + ILogger logger, + TimeProvider? timeProvider = null) + { + _actorRuntime = actorRuntime ?? throw new ArgumentNullException(nameof(actorRuntime)); + _streamProvider = streamProvider ?? throw new ArgumentNullException(nameof(streamProvider)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + _timeProvider = timeProvider ?? TimeProvider.System; + } + + public async Task DispatchAsync(NeedsLlmReplyEvent request, CancellationToken ct) + { + ArgumentNullException.ThrowIfNull(request); + if (string.IsNullOrWhiteSpace(request.CorrelationId)) + throw new InvalidOperationException("Deferred LLM reply request requires correlation_id for AgentRunGAgent dispatch."); + + var runId = request.CorrelationId.Trim(); + var actorId = AgentRunGAgent.BuildActorId(runId); + await _dispatchGate.WaitAsync(ct); + try + { + var nowMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(); + + if (request.RequestedAtUnixMs > 0 && nowMs - request.RequestedAtUnixMs > MaxRequestAgeMs) + { + _logger.LogWarning( + "Rejected stale deferred LLM reply run at dispatcher: runId={RunId} ageMs={AgeMs} thresholdMs={Threshold} target={TargetActorId}", + runId, + nowMs - request.RequestedAtUnixMs, + MaxRequestAgeMs, + request.TargetActorId); + return new DispatchOutcome( + Phase: DispatchPhase.RejectedStale, + CommandId: string.Empty, + RunActorId: null, + AcceptedAtUnixMs: 0); + } + + if (await _actorRuntime.ExistsAsync(actorId)) + { + _logger.LogInformation( + "Rejected duplicate deferred LLM reply run at dispatcher: runId={RunId} actorId={ActorId} target={TargetActorId}", + runId, + actorId, + request.TargetActorId); + return new DispatchOutcome( + Phase: DispatchPhase.RejectedDuplicate, + CommandId: string.Empty, + RunActorId: actorId, + AcceptedAtUnixMs: 0); + } + + var actor = await _actorRuntime.CreateAsync(actorId, ct); + + var commandId = BuildStartCommandId(runId); + var command = new AgentRunStartRequested + { + Request = request.Clone(), + }; + var envelope = new EventEnvelope + { + Id = commandId, + Timestamp = Timestamp.FromDateTimeOffset(_timeProvider.GetUtcNow()), + Payload = Any.Pack(command), + Route = EnvelopeRouteSemantics.CreateDirect("channel-llm-reply-run-dispatcher", actor.Id), + Propagation = new EnvelopePropagation + { + CorrelationId = runId, + }, + Runtime = new EnvelopeRuntime + { + Deduplication = new DeliveryDeduplication + { + OperationId = commandId, + }, + }, + }; + + try + { + await _streamProvider.GetStream(actor.Id).ProduceAsync(envelope, ct); + } + catch + { + await DestroyCreatedActorAfterDispatchFailureAsync(actor.Id); + throw; + } + + _logger.LogInformation( + "Accepted deferred LLM reply run for actor inbox: runId={RunId} actorId={ActorId} commandId={CommandId} target={TargetActorId}", + runId, + actor.Id, + commandId, + request.TargetActorId); + return new DispatchOutcome( + Phase: DispatchPhase.Accepted, + CommandId: commandId, + RunActorId: actor.Id, + AcceptedAtUnixMs: nowMs); + } + finally + { + _dispatchGate.Release(); + } + } + + private static string BuildStartCommandId(string runId) => $"agent-run-start:{runId}"; + + private async Task DestroyCreatedActorAfterDispatchFailureAsync(string actorId) + { + try + { + await _actorRuntime.DestroyAsync(actorId, CancellationToken.None); + } + catch (Exception ex) + { + _logger.LogWarning( + ex, + "Failed to destroy agent run actor after dispatch enqueue failed: actorId={ActorId}", + actorId); + } + } +} diff --git a/agents/Aevatar.GAgents.NyxidChat/AgentRunGAgent.cs b/agents/Aevatar.GAgents.NyxidChat/AgentRunGAgent.cs new file mode 100644 index 000000000..ac64e8e86 --- /dev/null +++ b/agents/Aevatar.GAgents.NyxidChat/AgentRunGAgent.cs @@ -0,0 +1,1117 @@ +using Aevatar.AI.Abstractions.LLMProviders; +using Aevatar.Foundation.Abstractions; +using Aevatar.Foundation.Abstractions.Attributes; +using Aevatar.Foundation.Abstractions.Runtime.Callbacks; +using Aevatar.Foundation.Core; +using Aevatar.Foundation.Core.EventSourcing; +using Aevatar.GAgents.Channel.Abstractions; +using Aevatar.GAgents.Channel.NyxIdRelay; +using Aevatar.GAgents.Channel.Runtime; +using Aevatar.Studio.Application.Studio.Abstractions; +using Google.Protobuf; +using Google.Protobuf.WellKnownTypes; +using Microsoft.Extensions.Logging; + +namespace Aevatar.GAgents.NyxidChat; + +/// +/// Run-scoped continuation owner for one deferred channel LLM reply. +/// +public sealed class AgentRunGAgent : GAgentBase +{ + public const string ActorIdPrefix = "channel-agent-run:"; + + internal const long MaxRunRequestAgeMs = 5 * 60 * 1000; + + /// + /// Hard upper bound on a single LLM reply turn. Mirrors + /// NyxIdRelayOptions.ResponseTimeoutSeconds (default 300s). + /// A configured value of 0 or negative is treated as "disable the cap". + /// + internal const int FallbackTimeoutSecondsDefault = 300; + + /// + /// Standalone budget for metadata enrichment (scope resolve + UserConfig lookup). + /// + internal static readonly TimeSpan MetadataBuildBudget = TimeSpan.FromSeconds(15); + + internal static readonly TimeSpan TerminalCleanupDelay = TimeSpan.FromMinutes(5); + private const string TerminalCleanupCallbackPrefix = "agent-run-terminal-cleanup"; + internal static readonly TimeSpan OutputDispatchTimeout = TimeSpan.FromSeconds(10); + internal static readonly TimeSpan OutputDispatchRetryDelay = TimeSpan.FromSeconds(5); + private const string OutputDispatchRetryCallbackPrefix = "agent-run-output-dispatch-retry"; + + private readonly IActorRuntime _actorRuntime; + private readonly IActorDispatchPort _actorDispatchPort; + private readonly IConversationReplyGenerator _replyGenerator; + private readonly IInteractiveReplyCollector? _interactiveReplyCollector; + private readonly Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions? _relayOptions; + private readonly INyxIdRelayScopeResolver? _scopeResolver; + private readonly IUserConfigQueryPort? _userConfigQueryPort; + private readonly IActorRuntimeCallbackScheduler? _callbackScheduler; + private readonly TimeProvider _timeProvider; + private readonly ILogger _logger; + + public AgentRunGAgent( + IActorRuntime actorRuntime, + IActorDispatchPort actorDispatchPort, + IConversationReplyGenerator replyGenerator, + IInteractiveReplyCollector? interactiveReplyCollector, + Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions? relayOptions, + ILogger logger, + INyxIdRelayScopeResolver? scopeResolver = null, + IUserConfigQueryPort? userConfigQueryPort = null, + IActorRuntimeCallbackScheduler? callbackScheduler = null, + TimeProvider? timeProvider = null) + { + _actorRuntime = actorRuntime ?? throw new ArgumentNullException(nameof(actorRuntime)); + _actorDispatchPort = actorDispatchPort ?? throw new ArgumentNullException(nameof(actorDispatchPort)); + _replyGenerator = replyGenerator ?? throw new ArgumentNullException(nameof(replyGenerator)); + _interactiveReplyCollector = interactiveReplyCollector; + _relayOptions = relayOptions; + _scopeResolver = scopeResolver; + _userConfigQueryPort = userConfigQueryPort; + _callbackScheduler = callbackScheduler; + _timeProvider = timeProvider ?? TimeProvider.System; + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public static string BuildActorId(string correlationId) + { + ArgumentException.ThrowIfNullOrWhiteSpace(correlationId); + return ActorIdPrefix + correlationId.Trim(); + } + + protected override AgentRunGAgentState TransitionState(AgentRunGAgentState current, IMessage evt) => + StateTransitionMatcher + .Match(current, evt) + .On(ApplyStarted) + .On(ApplyReplyProduced) + .On(ApplyReplyDispatched) + .On(ApplyDropped) + .On(ApplyFailed) + .On(ApplyCleanupCompleted) + .OrCurrent(); + + // ADR-0021 §6 / canon §9 absorbing-terminal check. Combined with + // `cleanup_completed_at_unix_ms != 0` this defines chain.finalized. + // Every reply-ready / dropped / failed / cleanup handler MUST short-circuit + // on a terminal status; late / stale signals must no-op. + internal static bool IsTerminal(AgentRunStatus status) => + status is AgentRunStatus.Dropped + or AgentRunStatus.Failed + or AgentRunStatus.ReplyHandedOff; + + private bool IsTerminal() => IsTerminal(State.Status); + + private bool IsCleanupAlreadyCompleted() => State.CleanupCompletedAtUnixMs != 0; + + [EventHandler] + public async Task HandleStartAsync(AgentRunStartRequested command) + { + ArgumentNullException.ThrowIfNull(command); + if (command.Request is null) + { + _logger.LogWarning("Dropping malformed agent run start command without request: runActor={RunActorId}", Id); + return; + } + + var request = command.Request.Clone(); + var runId = NormalizeOptional(request.CorrelationId) ?? Id; + var startedAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(); + + // ADR-0021 chain.finalized precondition: terminal status means the run has + // already dropped, failed, or handed the reply off. Late starts must no-op + // beyond (re-)scheduling cleanup — never re-run the LLM / tool chain. + // Cleanup is itself idempotent on `cleanup_completed_at != 0`. + if (IsTerminal()) + { + _logger.LogInformation( + "Ignoring duplicate terminal agent run start: runId={RunId} status={Status} cleanupCompleted={CleanupCompleted}", + runId, + State.Status, + IsCleanupAlreadyCompleted()); + if (!IsCleanupAlreadyCompleted()) + await ScheduleTerminalCleanupAsync(NormalizeOptional(State.RunId) ?? runId); + return; + } + + // ReplyProduced but not yet handed off: this is the output-dispatch retry path — + // re-deliver the persisted payload without re-running the LLM / tool chain so + // we don't repeat tool side effects (SSH exec, external API calls, billing) + // or produce a different reply. + if (State.Status is AgentRunStatus.ReplyProduced) + { + _logger.LogInformation( + "Re-dispatching previously produced reply (output-dispatch retry): runId={RunId} correlation={CorrelationId}", + runId, + request.CorrelationId); + try + { + await ReDispatchProducedReplyAsync(request, runId); + } + catch (AgentRunOutputDispatchException ex) + { + if (!await TryHandleOutputDispatchFailureAsync(request, runId, ex)) + throw; + } + return; + } + + if (string.IsNullOrWhiteSpace(State.RunId)) + { + await PersistDomainEventAsync(new AgentRunStartedEvent + { + RunId = runId, + CorrelationId = request.CorrelationId, + TargetActorId = request.TargetActorId, + StartedAtUnixMs = startedAtUnixMs, + }); + } + + try + { + await ProcessAsync(request, runId); + } + catch (AgentRunOutputDispatchException ex) + { + if (await TryHandleOutputDispatchFailureAsync(request, runId, ex)) + return; + + await PersistFailedAsync( + request, + runId, + "agent_run_output_dispatch_failed", + ex.Message); + } + catch (Exception ex) + { + await FailAfterUnexpectedExceptionAsync(request, runId, ex); + } + } + + [EventHandler] + public async Task HandleCleanupAsync(AgentRunCleanupRequested command) + { + ArgumentNullException.ThrowIfNull(command); + + // ADR-0021 §6 / canon §9 — cleanup is an absorbing operation. It is only + // valid for runs that have reached terminal status; stale runId references + // (the actor identity changed under us) and late callbacks (cleanup already + // completed) must both no-op so duplicates do not destroy a fresh run. + if (!IsTerminal()) + return; + + if (!string.IsNullOrWhiteSpace(command.RunId) && + !string.IsNullOrWhiteSpace(State.RunId) && + !string.Equals(command.RunId, State.RunId, StringComparison.Ordinal)) + { + return; + } + + if (IsCleanupAlreadyCompleted()) + { + _logger.LogDebug( + "Ignoring duplicate terminal cleanup: runId={RunId} cleanupCompletedAtUnixMs={CleanupAt}", + NormalizeOptional(State.RunId) ?? command.RunId, + State.CleanupCompletedAtUnixMs); + return; + } + + await PersistDomainEventAsync(new AgentRunCleanupCompletedEvent + { + RunId = NormalizeOptional(State.RunId) ?? command.RunId ?? string.Empty, + CorrelationId = State.CorrelationId ?? string.Empty, + CompletedAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), + }); + + await _actorRuntime.DestroyAsync(Id, CancellationToken.None); + } + + private async Task ProcessAsync(NeedsLlmReplyEvent request, string runId) + { + _logger.LogInformation( + "Processing agent run LLM reply request: runId={RunId} correlation={CorrelationId} target={TargetActorId}", + runId, + request.CorrelationId, + request.TargetActorId); + + if (request.Activity is null || string.IsNullOrWhiteSpace(request.TargetActorId)) + { + _logger.LogWarning( + "Dropping malformed deferred LLM reply request: runId={RunId}, correlation={CorrelationId}, target={TargetActorId}", + runId, + request.CorrelationId, + request.TargetActorId); + await DropAsync(request, runId, "malformed_deferred_llm_reply_request"); + return; + } + + // Stale gate: NyxID relay reply tokens have a ~30 min TTL and the user access + // token used for the LLM call expires inside ~15 min. A request that has been + // delayed past the run window cannot lead to a successful reply. + var nowMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(); + if (request.RequestedAtUnixMs > 0 && nowMs - request.RequestedAtUnixMs > MaxRunRequestAgeMs) + { + _logger.LogInformation( + "Dropping stale LLM reply request: runId={RunId} correlation={CorrelationId} ageMs={AgeMs}", + runId, + request.CorrelationId, + nowMs - request.RequestedAtUnixMs); + await DropAsync(request, runId, "stale_agent_run_request_dropped"); + return; + } + + // Relay credential gate: relay turns require a fresh reply_token to send the + // outbound. A relay request with no command-carried token cannot be delivered, + // so skip the LLM call entirely. + if (IsRelayRequest(request) && string.IsNullOrWhiteSpace(request.ReplyToken)) + { + _logger.LogWarning( + "Dropping relay LLM reply request without command-carried reply_token: runId={RunId} correlation={CorrelationId}", + runId, + request.CorrelationId); + await DropAsync(request, runId, "missing_relay_reply_token"); + return; + } + + await EnsureTargetActorAsync(request.TargetActorId); + + string replyText; + MessageContent? outboundIntent = null; + var terminalState = LlmReplyTerminalState.Completed; + var errorCode = string.Empty; + var errorSummary = string.Empty; + using TurnStreamingReplySink? streamingSink = TryBuildStreamingSink(request, request.TargetActorId); + + IReadOnlyDictionary effectiveMetadata; + using (var metadataCts = new CancellationTokenSource(MetadataBuildBudget)) + { + try + { + effectiveMetadata = await BuildEffectiveMetadataAsync(request, metadataCts.Token); + } + catch (OperationCanceledException ex) when (metadataCts.IsCancellationRequested) + { + _logger.LogWarning( + ex, + "Deferred LLM reply metadata build timed out after {TimeoutSeconds}s: runId={RunId} correlation={CorrelationId}", + (int)MetadataBuildBudget.TotalSeconds, + runId, + request.CorrelationId); + replyText = "Sorry, I couldn't load your model preferences in time. Please try again."; + terminalState = LlmReplyTerminalState.Failed; + errorCode = "llm_reply_metadata_timeout"; + errorSummary = $"Metadata enrichment exceeded {(int)MetadataBuildBudget.TotalSeconds}s budget."; + await FinalizeFailureStreamingSinkAsync(streamingSink, replyText, outboundIntent); + await ProduceAndDispatchAsync(request, runId, replyText, outboundIntent, terminalState, errorCode, errorSummary); + return; + } + } + + var fallbackTimeout = ResolveFallbackTimeout(); + using var timeoutCts = fallbackTimeout > TimeSpan.Zero + ? new CancellationTokenSource(fallbackTimeout) + : new CancellationTokenSource(); + + try + { + IDisposable? interactiveReplyScope = null; + try + { + if (ShouldCaptureInteractiveReply(request.Activity)) + interactiveReplyScope = _interactiveReplyCollector?.BeginScope(); + + // ADR-0021 §6 / canon §8 actor-edge closeout: the generator returns a + // single ConversationReplyResult per run carrying aggregated Usage and the + // last FinishReason. Round-internal terminal markers no longer leak past + // ChatRuntime, so this is the lone closeout observation point. + var replyResult = await _replyGenerator.GenerateReplyAsync( + request.Activity, + effectiveMetadata, + streamingSink, + timeoutCts.Token); + replyText = replyResult.Text ?? string.Empty; + if (replyResult.Usage is not null || !string.IsNullOrEmpty(replyResult.FinishReason)) + { + _logger.LogInformation( + "LLM reply closeout: runId={RunId} correlation={CorrelationId} promptTokens={Prompt} completionTokens={Completion} totalTokens={Total} finishReason={FinishReason}", + runId, + request.CorrelationId, + replyResult.Usage?.PromptTokens, + replyResult.Usage?.CompletionTokens, + replyResult.Usage?.TotalTokens, + replyResult.FinishReason ?? "(none)"); + } + outboundIntent = _interactiveReplyCollector?.TryTake(); + } + finally + { + interactiveReplyScope?.Dispose(); + } + + if (streamingSink is not null && + outboundIntent is null && + !string.IsNullOrWhiteSpace(replyText)) + { + await streamingSink.FinalizeAsync(replyText, CancellationToken.None); + } + + if (outboundIntent is null && string.IsNullOrWhiteSpace(replyText)) + { + terminalState = LlmReplyTerminalState.Failed; + errorCode = "empty_reply"; + errorSummary = "Reply generator returned an empty response."; + replyText = "Sorry, I wasn't able to generate a response. Please try again."; + } + } + catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested) + { + terminalState = LlmReplyTerminalState.Failed; + errorCode = "llm_reply_timeout"; + errorSummary = $"LLM reply generation exceeded {(int)fallbackTimeout.TotalSeconds}s budget."; + replyText = "Sorry, this took too long to process - the model or one of its tools didn't " + + "respond in time. Please try again, or rephrase the request."; + _logger.LogWarning( + ex, + "Deferred LLM reply timed out after {TimeoutSeconds}s: runId={RunId} correlation={CorrelationId}", + (int)fallbackTimeout.TotalSeconds, + runId, + request.CorrelationId); + } + catch (Exception ex) + { + terminalState = LlmReplyTerminalState.Failed; + errorCode = "llm_reply_failed"; + errorSummary = ex.Message; + replyText = NyxIdRelayErrorClassifier.Classify(ex.Message); + _logger.LogWarning( + ex, + "Deferred LLM reply generation failed: runId={RunId} correlation={CorrelationId}", + runId, + request.CorrelationId); + } + + if (terminalState == LlmReplyTerminalState.Failed) + { + // Streaming-sink failure finalize: when the LLM run terminates with a fallback + // text (timeout / classifier / empty reply), surface that text on the live + // streaming card/edit message before the LlmReplyReadyEvent lands. Carried over + // from feature/lark-bot's dispatch hardening. + await FinalizeFailureStreamingSinkAsync(streamingSink, replyText, outboundIntent); + } + + await ProduceAndDispatchAsync( + request, + runId, + replyText, + outboundIntent, + terminalState, + errorCode, + errorSummary); + } + + private async Task FinalizeFailureStreamingSinkAsync( + TurnStreamingReplySink? streamingSink, + string replyText, + MessageContent? outboundIntent) + { + if (streamingSink is not null && + outboundIntent is null && + !string.IsNullOrWhiteSpace(replyText)) + { + try + { + await streamingSink.FinalizeAsync(replyText, CancellationToken.None); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Failed to finalize streaming failure text for agent run {ActorId}", Id); + } + } + } + + /// + /// Persists the immutable produced reply payload BEFORE attempting to dispatch the + /// LlmReplyReadyEvent to the conversation actor. If dispatch then fails, the + /// output-dispatch retry path replays from state via + /// instead of re-running the LLM / + /// tool chain — which would otherwise repeat side effects (SSH exec, external API + /// calls, billing) and could surface a different reply than the persisted one. + /// + private async Task ProduceAndDispatchAsync( + NeedsLlmReplyEvent request, + string runId, + string replyText, + MessageContent? outboundIntent, + LlmReplyTerminalState terminalState, + string errorCode, + string errorSummary) + { + await PersistReplyProducedAsync( + request, + runId, + replyText, + outboundIntent, + terminalState, + errorCode, + errorSummary); + + await DispatchReadyEventAsync(request, replyText, outboundIntent, terminalState, errorCode, errorSummary); + + // Past the point of user-visible delivery. State persistence failures and cleanup + // scheduling failures MUST NOT propagate out — otherwise HandleStartAsync's outer + // `catch (Exception)` would call FailAfterUnexpectedExceptionAsync, which would + // re-enter ProduceAndDispatchAsync with a fallback reply and deliver a SECOND + // user-visible message ("Sorry, I couldn't complete this reply..."). Log and + // continue; the actor stays at Status=ReplyProduced && !ReplyDispatched, and the + // terminal cleanup callback simply doesn't fire (actor lingers until normal + // grain idle eviction). The conversation actor has already accepted the reply. + await TryFinalizeAfterDispatchAsync(request, runId); + } + + /// + /// Output-dispatch retry path: re-deliver the produced payload from state without + /// re-running the LLM. Triggered when sees + /// State.Status == ReplyProduced (committed but not yet handed off). + /// + private async Task ReDispatchProducedReplyAsync(NeedsLlmReplyEvent request, string runId) + { + var outbound = State.ProducedOutbound; + await DispatchReadyEventAsync( + request, + State.ProducedReplyText ?? string.Empty, + outbound, + State.ProducedTerminalState, + State.ErrorCode ?? string.Empty, + State.ErrorSummary ?? string.Empty); + + // Past the point of user-visible delivery — swallow persistence/cleanup errors so + // they don't escalate to a duplicate fallback dispatch. See ProduceAndDispatchAsync + // for the full rationale. + await TryFinalizeAfterDispatchAsync(request, runId); + } + + /// + /// Post-dispatch state finalization. Once has + /// succeeded the user has the reply, so any state-persistence or cleanup-scheduling + /// failure from here on must NOT bubble up — otherwise the outer exception path + /// would treat this as an unhandled failure and re-dispatch a fallback reply, + /// surfacing a duplicate message to the user. + /// + private async Task TryFinalizeAfterDispatchAsync(NeedsLlmReplyEvent request, string runId) + { + try + { + await PersistReplyDispatchedAsync(request, runId); + } + catch (Exception ex) + { + _logger.LogError( + ex, + "Failed to persist AgentRunReplyDispatchedEvent after successful dispatch; " + + "state will replay as ReplyProduced+!ReplyDispatched until next reconciliation. " + + "runId={RunId} correlation={CorrelationId}", + runId, + request.CorrelationId); + } + + try + { + await ScheduleTerminalCleanupAsync(runId); + } + catch (Exception ex) + { + _logger.LogWarning( + ex, + "Failed to schedule terminal cleanup after successful dispatch; actor may " + + "linger until normal grain idle eviction. runId={RunId} correlation={CorrelationId}", + runId, + request.CorrelationId); + } + } + + private async Task DropAsync(NeedsLlmReplyEvent request, string runId, string reason) + { + if (CanNotifyDrop(request)) + await DispatchDropNotificationAsync(request, reason); + + await PersistDomainEventAsync(new AgentRunDroppedEvent + { + RunId = runId, + CorrelationId = request.CorrelationId, + TargetActorId = request.TargetActorId, + Reason = reason, + DroppedAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), + }); + + await ScheduleTerminalCleanupAsync(runId); + } + + private async Task PersistReplyProducedAsync( + NeedsLlmReplyEvent request, + string runId, + string replyText, + MessageContent? outbound, + LlmReplyTerminalState terminalState, + string errorCode, + string errorSummary) + { + var evt = new AgentRunReplyProducedEvent + { + RunId = runId, + CorrelationId = request.CorrelationId, + TargetActorId = request.TargetActorId, + TerminalState = terminalState, + ErrorCode = errorCode, + ErrorSummary = errorSummary, + ProducedAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), + ReplyText = replyText ?? string.Empty, + }; + if (outbound is not null) + evt.Outbound = outbound.Clone(); + await PersistDomainEventAsync(evt); + } + + private async Task PersistReplyDispatchedAsync(NeedsLlmReplyEvent request, string runId) + { + await PersistDomainEventAsync(new AgentRunReplyDispatchedEvent + { + RunId = runId, + CorrelationId = request.CorrelationId, + TargetActorId = request.TargetActorId, + DispatchedAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), + }); + } + + private async Task PersistFailedAsync( + NeedsLlmReplyEvent request, + string runId, + string errorCode, + string errorSummary) + { + await PersistDomainEventAsync(new AgentRunFailedEvent + { + RunId = runId, + CorrelationId = request.CorrelationId, + TargetActorId = request.TargetActorId, + ErrorCode = errorCode, + ErrorSummary = errorSummary, + FailedAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), + }); + + await ScheduleTerminalCleanupAsync(runId); + } + + private async Task FailAfterUnexpectedExceptionAsync(NeedsLlmReplyEvent request, string runId, Exception ex) + { + const string errorCode = "agent_run_unhandled_exception"; + var errorSummary = ex.Message; + _logger.LogError( + ex, + "Agent run failed with unhandled exception: runId={RunId} correlation={CorrelationId}", + runId, + request.CorrelationId); + + if (request.Activity is null || string.IsNullOrWhiteSpace(request.TargetActorId)) + { + // Cannot dispatch a fallback reply at all; terminate the run as Failed so the + // state is not left stuck in Started. + await PersistFailedAsync(request, runId, errorCode, errorSummary); + return; + } + + // Persist the fallback reply BEFORE dispatching so a dispatch retry replays from + // state rather than re-entering ProcessAsync (which would just throw again). If + // dispatch itself fails and we cannot schedule a retry, fall through to a Failed + // terminal marker with the dispatch error appended to errorSummary (carried over + // from feature/lark-bot's dispatch hardening). + try + { + await ProduceAndDispatchAsync( + request, + runId, + "Sorry, I couldn't complete this reply. Please try again.", + null, + LlmReplyTerminalState.Failed, + errorCode, + errorSummary); + } + catch (AgentRunOutputDispatchException dispatchEx) + { + if (await TryHandleOutputDispatchFailureAsync(request, runId, dispatchEx)) + return; + + errorSummary = $"{errorSummary}; failed to dispatch failure notification: {dispatchEx.Message}"; + await PersistFailedAsync(request, runId, errorCode, errorSummary); + } + } + + private async Task DispatchReadyEventAsync( + NeedsLlmReplyEvent request, + string replyText, + MessageContent? outboundIntent, + LlmReplyTerminalState terminalState, + string errorCode, + string errorSummary) + { + if (string.IsNullOrWhiteSpace(request.TargetActorId)) + return; + + var ready = new LlmReplyReadyEvent + { + CorrelationId = request.CorrelationId, + RegistrationId = request.RegistrationId, + SourceActorId = Id, + Activity = request.Activity!.Clone(), + Outbound = outboundIntent?.Clone() ?? new MessageContent { Text = replyText }, + TerminalState = terminalState, + ErrorCode = errorCode, + ErrorSummary = errorSummary, + ReadyAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), + // Echo the command-only relay credential straight back so ConversationGAgent's + // outbound reply does not depend on its in-memory token dict still having the + // entry. The actor consumes these fields and never persists them. + ReplyToken = request.ReplyToken ?? string.Empty, + ReplyTokenExpiresAtUnixMs = request.ReplyTokenExpiresAtUnixMs, + }; + try + { + using var outputCts = new CancellationTokenSource(OutputDispatchTimeout); + await SendToAsync(request.TargetActorId, ready, outputCts.Token); + } + catch (Exception ex) + { + throw new AgentRunOutputDispatchException( + $"Failed to send LLM reply ready event to conversation actor '{request.TargetActorId}'.", + ex); + } + } + + private TurnStreamingReplySink? TryBuildStreamingSink(NeedsLlmReplyEvent request, string targetActorId) + { + if (_relayOptions is not { StreamingRepliesEnabled: true }) + return null; + if (request.Activity?.OutboundDelivery is not + { + ReplyMessageId.Length: > 0, + CorrelationId.Length: > 0, + }) + { + return null; + } + if (string.IsNullOrWhiteSpace(request.CorrelationId)) + return null; + + var cardMode = _relayOptions.StreamingCardKitEnabled; + var throttle = TimeSpan.FromMilliseconds(Math.Max(0, cardMode + ? _relayOptions.StreamingCardKitFlushIntervalMs + : _relayOptions.StreamingFlushIntervalMs)); + var maxInterimChunks = cardMode + ? int.MaxValue + : Math.Max(0, _relayOptions.StreamingMaxInterimChunks); + return new TurnStreamingReplySink( + _actorDispatchPort, + targetActorId, + request.CorrelationId, + request.RegistrationId, + request.Activity.Clone(), + throttle, + _timeProvider, + _logger, + maxInterimChunks, + cardMode); + } + + private async Task> BuildEffectiveMetadataAsync( + NeedsLlmReplyEvent request, + CancellationToken ct) + { + var metadata = new Dictionary(request.Metadata, StringComparer.Ordinal); + + await ApplyBotOwnerLlmConfigAsync(request, metadata, ct); + + var userAccessToken = request.Activity?.TransportExtras?.NyxUserAccessToken?.Trim(); + if (!string.IsNullOrWhiteSpace(userAccessToken)) + { + metadata[LLMRequestMetadataKeys.NyxIdAccessToken] = userAccessToken; + metadata[LLMRequestMetadataKeys.NyxIdOrgToken] = userAccessToken; + } + + return metadata; + } + + private async Task ApplyBotOwnerLlmConfigAsync( + NeedsLlmReplyEvent request, + IDictionary metadata, + CancellationToken ct) + { + if (_scopeResolver is null || _userConfigQueryPort is null) + return; + + var apiKeyId = request.Activity?.Bot?.Value?.Trim(); + if (string.IsNullOrWhiteSpace(apiKeyId)) + return; + + string? scopeId; + try + { + scopeId = await _scopeResolver.ResolveScopeIdByApiKeyAsync(apiKeyId, ct); + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + throw; + } + catch (Exception ex) + { + _logger.LogWarning( + ex, + "Failed to resolve bot owner scope id for LLM config: runId={RunId} correlation={CorrelationId} apiKeyId={ApiKeyId}", + Id, + request.CorrelationId, + apiKeyId); + return; + } + + if (string.IsNullOrWhiteSpace(scopeId)) + { + _logger.LogDebug( + "No bot owner scope id resolved for LLM config: runId={RunId} correlation={CorrelationId} apiKeyId={ApiKeyId}", + Id, + request.CorrelationId, + apiKeyId); + return; + } + + try + { + var config = await _userConfigQueryPort.GetAsync(scopeId, ct); + if (!string.IsNullOrWhiteSpace(config.DefaultModel)) + metadata[LLMRequestMetadataKeys.ModelOverride] = config.DefaultModel.Trim(); + if (!string.IsNullOrWhiteSpace(config.PreferredLlmRoute)) + metadata[LLMRequestMetadataKeys.NyxIdRoutePreference] = config.PreferredLlmRoute.Trim(); + if (config.MaxToolRounds > 0) + metadata[LLMRequestMetadataKeys.MaxToolRoundsOverride] = + config.MaxToolRounds.ToString(System.Globalization.CultureInfo.InvariantCulture); + + _logger.LogInformation( + "Applied bot owner LLM config: runId={RunId} correlation={CorrelationId} scopeId={ScopeId} model={Model} route={Route}", + Id, + request.CorrelationId, + scopeId, + string.IsNullOrWhiteSpace(config.DefaultModel) ? "" : config.DefaultModel, + string.IsNullOrWhiteSpace(config.PreferredLlmRoute) ? "" : config.PreferredLlmRoute); + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + throw; + } + catch (Exception ex) + { + _logger.LogWarning( + ex, + "Failed to load bot owner LLM config: runId={RunId} correlation={CorrelationId} scopeId={ScopeId}", + Id, + request.CorrelationId, + scopeId); + } + } + + private TimeSpan ResolveFallbackTimeout() + { + if (_relayOptions is null) + return TimeSpan.FromSeconds(FallbackTimeoutSecondsDefault); + var configured = _relayOptions.ResponseTimeoutSeconds; + if (configured <= 0) + return TimeSpan.Zero; + return TimeSpan.FromSeconds(configured); + } + + private static bool IsRelayRequest(NeedsLlmReplyEvent request) => + request.Activity?.OutboundDelivery is + { + ReplyMessageId.Length: > 0, + CorrelationId.Length: > 0, + }; + + private static bool CanNotifyDrop(NeedsLlmReplyEvent request) => + !string.IsNullOrWhiteSpace(request.TargetActorId) && + !string.IsNullOrWhiteSpace(request.CorrelationId); + + private async Task DispatchDropNotificationAsync(NeedsLlmReplyEvent request, string reason) + { + var dropped = new DeferredLlmReplyDroppedEvent + { + CorrelationId = request.CorrelationId, + Reason = reason, + DroppedAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), + }; + + try + { + using var outputCts = new CancellationTokenSource(OutputDispatchTimeout); + await SendToAsync(request.TargetActorId, dropped, outputCts.Token); + } + catch (Exception ex) + { + throw new AgentRunOutputDispatchException( + $"Failed to send deferred LLM reply drop event to conversation actor '{request.TargetActorId}' (reason '{reason}').", + ex); + } + } + + private async Task TryHandleOutputDispatchFailureAsync( + NeedsLlmReplyEvent request, + string runId, + AgentRunOutputDispatchException ex) + { + _logger.LogWarning( + ex, + "Agent run output notification was not accepted; run remains retryable: runId={RunId} correlation={CorrelationId}", + runId, + request.CorrelationId); + + if (await TryScheduleStartRetryAsync(request, runId)) + return true; + + _logger.LogWarning( + ex, + "Agent run output retry could not be scheduled; persisting terminal failure: runId={RunId} correlation={CorrelationId}", + runId, + request.CorrelationId); + return false; + } + + private async Task TryScheduleStartRetryAsync(NeedsLlmReplyEvent request, string runId) + { + if (_callbackScheduler is null) + return false; + + try + { + await _callbackScheduler.ScheduleTimeoutAsync( + BuildTimeoutRequest( + BuildOutputDispatchRetryCallbackId(runId), + OutputDispatchRetryDelay, + new AgentRunStartRequested + { + Request = request.Clone(), + }), + ct: CancellationToken.None); + return true; + } + catch (Exception ex) + { + _logger.LogWarning( + ex, + "Failed to schedule agent run output retry: runId={RunId} actorId={ActorId}", + runId, + Id); + return false; + } + } + + private async Task ScheduleTerminalCleanupAsync(string runId) + { + if (_callbackScheduler is null) + return; + + try + { + await _callbackScheduler.ScheduleTimeoutAsync( + BuildTimeoutRequest( + BuildCleanupCallbackId(runId), + TerminalCleanupDelay, + new AgentRunCleanupRequested + { + RunId = runId, + RequestedAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), + }), + ct: CancellationToken.None); + } + catch (Exception ex) + { + _logger.LogWarning( + ex, + "Failed to schedule terminal agent run cleanup: runId={RunId} actorId={ActorId}", + runId, + Id); + } + } + + private RuntimeCallbackTimeoutRequest BuildTimeoutRequest( + string callbackId, + TimeSpan dueTime, + IMessage evt) + { + return new RuntimeCallbackTimeoutRequest + { + ActorId = Id, + CallbackId = callbackId, + TriggerEnvelope = new EventEnvelope + { + Id = Guid.NewGuid().ToString("N"), + Timestamp = Timestamp.FromDateTimeOffset(_timeProvider.GetUtcNow()), + Payload = Any.Pack(evt), + Route = EnvelopeRouteSemantics.CreateTopologyPublication(Id, TopologyAudience.Self), + }, + DueTime = dueTime, + }; + } + + private static string BuildCleanupCallbackId(string runId) + { + var normalized = NormalizeOptional(runId) ?? "unknown"; + var chars = normalized + .Select(static ch => char.IsLetterOrDigit(ch) || ch is '-' or '_' ? ch : '_') + .Take(96) + .ToArray(); + return $"{TerminalCleanupCallbackPrefix}:{new string(chars)}"; + } + + private static string BuildOutputDispatchRetryCallbackId(string runId) + { + var normalized = NormalizeOptional(runId) ?? "unknown"; + var chars = normalized + .Select(static ch => char.IsLetterOrDigit(ch) || ch is '-' or '_' ? ch : '_') + .Take(96) + .ToArray(); + return $"{OutputDispatchRetryCallbackPrefix}:{new string(chars)}"; + } + + private async Task EnsureTargetActorAsync(string targetActorId) + { + if (string.IsNullOrWhiteSpace(targetActorId)) + return; + + var actor = await _actorRuntime.GetAsync(targetActorId); + if (actor is null) + await _actorRuntime.CreateAsync(targetActorId, CancellationToken.None); + } + + private bool ShouldCaptureInteractiveReply(ChatActivity? activity) + { + if (_interactiveReplyCollector is null) + return false; + + if (_relayOptions is { InteractiveRepliesEnabled: false }) + return false; + + return activity?.OutboundDelivery is + { + ReplyMessageId.Length: > 0, + CorrelationId.Length: > 0, + }; + } + + private static AgentRunGAgentState ApplyStarted(AgentRunGAgentState current, AgentRunStartedEvent evt) + { + var next = current.Clone(); + next.RunId = evt.RunId; + next.CorrelationId = evt.CorrelationId; + next.TargetActorId = evt.TargetActorId; + next.Status = AgentRunStatus.Started; + next.StartedAtUnixMs = evt.StartedAtUnixMs; + return next; + } + + private static AgentRunGAgentState ApplyReplyProduced( + AgentRunGAgentState current, + AgentRunReplyProducedEvent evt) + { + var next = current.Clone(); + next.RunId = string.IsNullOrWhiteSpace(next.RunId) ? evt.RunId : next.RunId; + next.CorrelationId = string.IsNullOrWhiteSpace(next.CorrelationId) ? evt.CorrelationId : next.CorrelationId; + next.TargetActorId = string.IsNullOrWhiteSpace(next.TargetActorId) ? evt.TargetActorId : next.TargetActorId; + next.Status = AgentRunStatus.ReplyProduced; + next.CompletedAtUnixMs = evt.ProducedAtUnixMs; + next.ErrorCode = evt.ErrorCode; + next.ErrorSummary = evt.ErrorSummary; + next.ProducedReplyText = evt.ReplyText ?? string.Empty; + next.ProducedOutbound = evt.Outbound?.Clone(); + next.ProducedTerminalState = evt.TerminalState; + // Backward-compat: AgentRunReplyProducedEvents persisted by the pre-refactor + // codepath have no reply_text / outbound / terminal_state fields (proto3 defaults + // on deserialize). Historically, Status=ReplyProduced was only written *after* the + // LlmReplyReadyEvent was successfully dispatched (old code's `await Dispatch...; + // await PersistReplyProduced...;` order), so those events semantically mean + // "handed off". Promote them straight to REPLY_HANDED_OFF on replay so: + // 1. ReDispatchProducedReplyAsync doesn't fire with an empty payload + // (would surface as a blank reply / structural error to the user). + // 2. HandleCleanupAsync recognizes them as terminal so the actor can be destroyed. + // + // Discriminator: legacy events have BOTH an empty reply_text AND a null outbound. + // The empty-text-alone check is not enough — interactive-only turns + // (reply_with_interaction etc.) legitimately produce empty reply_text + non-null + // outbound (card / button intent). Misclassifying those as "historical" would skip + // the dispatch retry on failure and silently drop the user's interactive reply. + if (string.IsNullOrEmpty(evt.ReplyText) && evt.Outbound is null) + next.Status = AgentRunStatus.ReplyHandedOff; + // For new events, Status stays at REPLY_PRODUCED here; promoted to REPLY_HANDED_OFF + // by ApplyReplyDispatched once the LlmReplyReadyEvent is accepted by the + // conversation actor (see ADR-0021). + return next; + } + + private static AgentRunGAgentState ApplyReplyDispatched( + AgentRunGAgentState current, + AgentRunReplyDispatchedEvent evt) + { + var next = current.Clone(); + next.RunId = string.IsNullOrWhiteSpace(next.RunId) ? evt.RunId : next.RunId; + next.CorrelationId = string.IsNullOrWhiteSpace(next.CorrelationId) ? evt.CorrelationId : next.CorrelationId; + next.TargetActorId = string.IsNullOrWhiteSpace(next.TargetActorId) ? evt.TargetActorId : next.TargetActorId; + // Promote committed -> handed-off (ADR-0021 AgentRunGAgent-side terminal). + next.Status = AgentRunStatus.ReplyHandedOff; + return next; + } + + private static AgentRunGAgentState ApplyDropped(AgentRunGAgentState current, AgentRunDroppedEvent evt) + { + var next = current.Clone(); + next.RunId = string.IsNullOrWhiteSpace(next.RunId) ? evt.RunId : next.RunId; + next.CorrelationId = string.IsNullOrWhiteSpace(next.CorrelationId) ? evt.CorrelationId : next.CorrelationId; + next.TargetActorId = string.IsNullOrWhiteSpace(next.TargetActorId) ? evt.TargetActorId : next.TargetActorId; + next.Status = AgentRunStatus.Dropped; + next.CompletedAtUnixMs = evt.DroppedAtUnixMs; + next.ErrorCode = evt.Reason; + next.ErrorSummary = string.Empty; + return next; + } + + private static AgentRunGAgentState ApplyFailed(AgentRunGAgentState current, AgentRunFailedEvent evt) + { + var next = current.Clone(); + next.RunId = string.IsNullOrWhiteSpace(next.RunId) ? evt.RunId : next.RunId; + next.CorrelationId = string.IsNullOrWhiteSpace(next.CorrelationId) ? evt.CorrelationId : next.CorrelationId; + next.TargetActorId = string.IsNullOrWhiteSpace(next.TargetActorId) ? evt.TargetActorId : next.TargetActorId; + next.Status = AgentRunStatus.Failed; + next.CompletedAtUnixMs = evt.FailedAtUnixMs; + next.ErrorCode = evt.ErrorCode; + next.ErrorSummary = evt.ErrorSummary; + return next; + } + + // ADR-0021 §6 / canon §9 — combined with a terminal AgentRunStatus, a non-zero + // cleanup_completed_at_unix_ms is the chain.finalized observable. Late cleanup + // callbacks short-circuit on this field so duplicates do not re-destroy the actor. + private static AgentRunGAgentState ApplyCleanupCompleted( + AgentRunGAgentState current, + AgentRunCleanupCompletedEvent evt) + { + var next = current.Clone(); + next.RunId = string.IsNullOrWhiteSpace(next.RunId) ? evt.RunId : next.RunId; + next.CorrelationId = string.IsNullOrWhiteSpace(next.CorrelationId) ? evt.CorrelationId : next.CorrelationId; + next.CleanupCompletedAtUnixMs = evt.CompletedAtUnixMs; + return next; + } + + private static string? NormalizeOptional(string? value) + { + var trimmed = value?.Trim(); + return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed; + } + + private sealed class AgentRunOutputDispatchException(string message, Exception innerException) + : Exception(message, innerException); +} diff --git a/agents/Aevatar.GAgents.NyxidChat/ChannelCardConversationTurnRunner.cs b/agents/Aevatar.GAgents.NyxidChat/ChannelCardConversationTurnRunner.cs new file mode 100644 index 000000000..3e6d38c98 --- /dev/null +++ b/agents/Aevatar.GAgents.NyxidChat/ChannelCardConversationTurnRunner.cs @@ -0,0 +1,398 @@ +using System.Text.Json; +using Aevatar.AI.ToolProviders.Lark; +using Aevatar.GAgents.Channel.Abstractions; +using Aevatar.GAgents.Channel.Runtime; +using Aevatar.GAgents.Platform.Lark; +using Microsoft.Extensions.Logging; + +namespace Aevatar.GAgents.NyxidChat; + +/// +/// Production for the Lark CardKit streaming +/// path. Composes (cardkit/v1/* endpoints) with +/// (im/v1/messages with msg_type=interactive) +/// to drive the create → send → stream → finalize lifecycle. Auth: bot owner's NyxID +/// access token from activity.TransportExtras.NyxUserAccessToken; receive target: +/// nyx_lark_chat_id for groups, falling back to nyx_lark_union_id for p2p +/// DMs (cross-app safe per the proto's documented invariants). +/// +public sealed class ChannelCardConversationTurnRunner : IConversationCardTurnRunner +{ + private static readonly JsonSerializerOptions JsonOptions = new(); + + private readonly ILarkCardKitClient _cardKit; + private readonly ILarkNyxClient _larkClient; + private readonly ILogger _logger; + + public ChannelCardConversationTurnRunner( + ILarkCardKitClient cardKit, + ILarkNyxClient larkClient, + ILogger logger) + { + _cardKit = cardKit ?? throw new ArgumentNullException(nameof(cardKit)); + _larkClient = larkClient ?? throw new ArgumentNullException(nameof(larkClient)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public async Task RunCardCreateAsync( + LlmReplyCardStreamChunkEvent chunk, + string streamingElementId, + ConversationTurnRuntimeContext runtimeContext, + CancellationToken ct) + { + ArgumentNullException.ThrowIfNull(chunk); + if (chunk.Activity is null) + return ConversationCardCreateResult.Failed("activity_required", "Stream chunk event is missing the source activity."); + + var token = ResolveToken(chunk.Activity); + if (token is null) + return ConversationCardCreateResult.Failed("token_missing", "NyxID user access token is missing on the activity's TransportExtras."); + + var receiveTarget = ResolveReceiveTarget(chunk.Activity); + if (receiveTarget is null) + return ConversationCardCreateResult.Failed("receive_target_missing", "Lark chat_id and union_id are both missing on TransportExtras."); + + // 1. Allocate a CardKit entity holding an empty streaming element. The first chunk's + // text lands via StreamElementContentAsync (step 3) so the card_json schema and + // the streaming wire format stay decoupled. + var initialCardJson = LarkStreamingCardShell.BuildInitialCardJson(streamingElementId); + string createResponse; + try + { + createResponse = await _cardKit.CreateCardAsync( + token, + new LarkCardKitCreateRequest("card_json", initialCardJson), + ct); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "CardKit card.create threw for correlation={CorrelationId}", chunk.CorrelationId); + return ConversationCardCreateResult.Failed("card_create_threw", ex.Message); + } + + if (LarkProxyResponseParser.TryParseError(createResponse, out var createError)) + return ClassifyCreateFailure("card_create_failed", createError); + + var cardId = ExtractCardId(createResponse); + if (string.IsNullOrWhiteSpace(cardId)) + return ConversationCardCreateResult.Failed("card_id_missing", "card.create response did not include data.card_id."); + + // 2. Bind the card to the chat by sending an interactive message that references it. + var contentJson = JsonSerializer.Serialize( + new { type = "card", data = new { card_id = cardId } }, + JsonOptions); + string sendResponse; + try + { + sendResponse = await _larkClient.SendMessageAsync( + token, + new LarkSendMessageRequest( + TargetType: receiveTarget.Value.ReceiveIdType, + TargetId: receiveTarget.Value.ReceiveId, + MessageType: "interactive", + ContentJson: contentJson, + IdempotencyKey: chunk.CorrelationId), + ct); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Card send-to-chat threw for correlation={CorrelationId}, card_id={CardId}", chunk.CorrelationId, cardId); + return ConversationCardCreateResult.Failed("card_send_threw", ex.Message); + } + + if (LarkProxyResponseParser.TryParseError(sendResponse, out var sendError)) + return ClassifyCreateFailure("card_send_failed", sendError); + + var cardMessageId = LarkProxyResponseParser.ParseSendSuccess(sendResponse).MessageId + ?? string.Empty; + + // 3. Write the first chunk's text into the streaming element. Sequence = 1 (the + // grain pre-allocates this value; subsequent chunks pass sequence+1 each call). + // The card has already been bound to the chat (step 2), so any failure from here + // on is a *post-send* failure: an empty card is visible in the chat. We must + // return PostSendFailed (not Failed) so the actor terminates the turn instead + // of falling back to text-edit and producing a duplicate reply. + string firstStreamResponse; + try + { + firstStreamResponse = await _cardKit.StreamElementContentAsync( + token, + new LarkCardKitStreamElementContentRequest( + CardId: cardId, + ElementId: streamingElementId, + Content: chunk.AccumulatedText, + Sequence: 1, + IdempotencyKey: $"{chunk.CorrelationId}-1"), + ct); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "CardKit first stream threw for correlation={CorrelationId}, card_id={CardId}", chunk.CorrelationId, cardId); + await TryBestEffortCloseStreamingAsync(token, cardId, sequence: 2, ct).ConfigureAwait(false); + return ConversationCardCreateResult.PostSendFailed( + cardId, + cardMessageId, + "card_first_stream_threw", + ex.Message); + } + + if (LarkProxyResponseParser.TryParseError(firstStreamResponse, out var firstStreamError)) + { + await TryBestEffortCloseStreamingAsync(token, cardId, sequence: 2, ct).ConfigureAwait(false); + return ClassifyPostSendFailure(cardId, cardMessageId, "card_first_stream_failed", firstStreamError); + } + + return ConversationCardCreateResult.Succeeded(cardId, cardMessageId); + } + + /// + /// Best-effort settings patch to close streaming_mode on a card whose first + /// content write failed. Stops the typewriter cursor on the orphan empty card so the + /// chat does not show a perpetually-loading bubble. Failures are logged and swallowed — + /// the parent operation has already failed; this is a UX cleanup, not a correctness gate. + /// + private async Task TryBestEffortCloseStreamingAsync(string token, string cardId, long sequence, CancellationToken ct) + { + try + { + await _cardKit.SetCardSettingsAsync( + token, + new LarkCardKitSettingsRequest( + CardId: cardId, + SettingsJson: """{"streaming_mode": false}""", + Sequence: sequence, + IdempotencyKey: $"orphan-close-{cardId}"), + ct); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Best-effort close of orphan streaming card failed; cursor may remain visible. card_id={CardId}", cardId); + } + } + + public async Task RunCardStreamAsync( + LlmReplyCardStreamChunkEvent chunk, + string cardId, + string elementId, + long sequence, + ConversationTurnRuntimeContext runtimeContext, + CancellationToken ct) + { + ArgumentNullException.ThrowIfNull(chunk); + if (chunk.Activity is null) + return ConversationCardStreamResult.Failed("activity_required", "Stream chunk event is missing the source activity."); + + var token = ResolveToken(chunk.Activity); + if (token is null) + return ConversationCardStreamResult.Failed("token_missing", "NyxID user access token is missing on the activity's TransportExtras."); + + string response; + try + { + response = await _cardKit.StreamElementContentAsync( + token, + new LarkCardKitStreamElementContentRequest( + CardId: cardId, + ElementId: elementId, + Content: chunk.AccumulatedText, + Sequence: sequence, + IdempotencyKey: $"{chunk.CorrelationId}-{sequence}"), + ct); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "CardKit interim stream threw for correlation={CorrelationId}, card_id={CardId}, seq={Sequence}", chunk.CorrelationId, cardId, sequence); + return ConversationCardStreamResult.Failed("card_stream_threw", ex.Message); + } + + if (LarkProxyResponseParser.TryParseError(response, out var error)) + return ClassifyStreamFailure(error); + + return ConversationCardStreamResult.Succeeded(); + } + + public async Task RunCardFinalizeAsync( + ChatActivity referenceActivity, + string cardId, + string elementId, + string finalText, + bool finalTextDiffersFromLastFlushed, + long sequence, + ConversationTurnRuntimeContext runtimeContext, + CancellationToken ct) + { + ArgumentNullException.ThrowIfNull(referenceActivity); + + var token = ResolveToken(referenceActivity); + if (token is null) + return ConversationCardFinalizeResult.Failed("token_missing", "NyxID user access token is missing on the reference activity's TransportExtras."); + + // 1. If final text drifted from the last flushed interim, write it before closing + // streaming mode. Order matters: closing streaming first would freeze the cursor + // on the stale text. Track whether the trailing write actually landed so the + // actor can pick the right user-visible text on a partial-failure terminal. + long workingSequence = sequence; + var finalTextWritten = !finalTextDiffersFromLastFlushed || string.IsNullOrWhiteSpace(finalText); + if (finalTextDiffersFromLastFlushed && !string.IsNullOrWhiteSpace(finalText)) + { + try + { + var streamFinalResponse = await _cardKit.StreamElementContentAsync( + token, + new LarkCardKitStreamElementContentRequest( + CardId: cardId, + ElementId: elementId, + Content: finalText, + Sequence: workingSequence, + IdempotencyKey: $"final-{cardId}-{workingSequence}"), + ct); + if (LarkProxyResponseParser.TryParseError(streamFinalResponse, out var streamFinalError)) + return ConversationCardFinalizeResult.Failed("card_final_stream_failed", streamFinalError, finalTextWritten: false); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "CardKit final stream threw for card_id={CardId}, seq={Sequence}", cardId, workingSequence); + return ConversationCardFinalizeResult.Failed("card_final_stream_threw", ex.Message, finalTextWritten: false); + } + finalTextWritten = true; + workingSequence++; + } + + // 2. Close the card's streaming mode so the typewriter cursor disappears. + try + { + var settingsResponse = await _cardKit.SetCardSettingsAsync( + token, + new LarkCardKitSettingsRequest( + CardId: cardId, + SettingsJson: """{"streaming_mode": false}""", + Sequence: workingSequence, + IdempotencyKey: $"close-{cardId}-{workingSequence}"), + ct); + if (LarkProxyResponseParser.TryParseError(settingsResponse, out var settingsError)) + return ConversationCardFinalizeResult.Failed("card_close_streaming_failed", settingsError, finalTextWritten: finalTextWritten); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "CardKit close-streaming threw for card_id={CardId}, seq={Sequence}", cardId, workingSequence); + return ConversationCardFinalizeResult.Failed("card_close_streaming_threw", ex.Message, finalTextWritten: finalTextWritten); + } + + return ConversationCardFinalizeResult.Succeeded(); + } + + private static string? ResolveToken(ChatActivity activity) + { + var token = activity.TransportExtras?.NyxUserAccessToken?.Trim(); + return string.IsNullOrWhiteSpace(token) ? null : token; + } + + private static (string ReceiveIdType, string ReceiveId)? ResolveReceiveTarget(ChatActivity activity) + { + // Group / channel / thread: the relay-side chat_id is cross-app safe within the tenant. + var chatId = activity.TransportExtras?.NyxLarkChatId?.Trim(); + var conversationScope = activity.Conversation?.Scope ?? ConversationScope.Unspecified; + var isGroupLike = conversationScope is ConversationScope.Group + or ConversationScope.Channel + or ConversationScope.Thread; + if (isGroupLike && !string.IsNullOrWhiteSpace(chatId)) + return ("chat_id", chatId); + + // Direct message: the chat_id is bot-specific and not cross-app safe; prefer union_id. + var unionId = activity.TransportExtras?.NyxLarkUnionId?.Trim(); + if (!string.IsNullOrWhiteSpace(unionId)) + return ("union_id", unionId); + + // Fall back to chat_id for DMs only when union_id is unavailable. The relay populates + // union_id whenever it can resolve it, so this branch generally does not fire. + if (!string.IsNullOrWhiteSpace(chatId)) + return ("chat_id", chatId); + + return null; + } + + /// + /// Best-effort extract of data.card_id from the cardkit/v1/cards response. + /// Returns null when the field is missing or malformed; the caller treats null as a + /// terminal create failure. + /// + private static string? ExtractCardId(string response) + { + try + { + using var document = JsonDocument.Parse(response); + if (document.RootElement.TryGetProperty("data", out var data) && + data.TryGetProperty("card_id", out var cardIdProp) && + cardIdProp.ValueKind == JsonValueKind.String) + { + return cardIdProp.GetString(); + } + } + catch (JsonException) + { + return null; + } + return null; + } + + private static ConversationCardCreateResult ClassifyCreateFailure(string contextErrorCode, string larkError) => + ConversationCardCreateResult.Failed( + errorCode: contextErrorCode, + errorSummary: larkError, + isRateLimited: ContainsLarkCode(larkError, 230020), + isTableLimitExceeded: ContainsLarkCode(larkError, 11310), + isCardUnavailable: ContainsLarkCode(larkError, 230099) || ContainsLarkCode(larkError, 230100)); + + /// + /// Same classification as but threads the + /// already-allocated / through + /// the result so the actor can persist the partial-card terminal record. Used for any + /// failure that occurs after im/v1/messages has bound the card to the chat. + /// + private static ConversationCardCreateResult ClassifyPostSendFailure( + string cardId, + string cardMessageId, + string contextErrorCode, + string larkError) => + ConversationCardCreateResult.PostSendFailed( + cardId: cardId, + cardMessageId: cardMessageId, + errorCode: contextErrorCode, + errorSummary: larkError, + isRateLimited: ContainsLarkCode(larkError, 230020), + isTableLimitExceeded: ContainsLarkCode(larkError, 11310), + isCardUnavailable: ContainsLarkCode(larkError, 230099) || ContainsLarkCode(larkError, 230100)); + + private static ConversationCardStreamResult ClassifyStreamFailure(string larkError) => + ConversationCardStreamResult.Failed( + errorCode: "card_stream_failed", + errorSummary: larkError, + isRateLimited: ContainsLarkCode(larkError, 230020), + isTableLimitExceeded: ContainsLarkCode(larkError, 11310), + isCardUnavailable: ContainsLarkCode(larkError, 230099) || ContainsLarkCode(larkError, 230100)); + + /// + /// Boundary-aware match against 's + /// output shape ("lark_code={n} ..."). The needle's trailing position must be + /// the end of the string OR a non-digit; without the boundary check, looking for + /// lark_code=23002 would falsely match a string containing lark_code=230020. + /// + private static bool ContainsLarkCode(string error, int code) + { + if (string.IsNullOrEmpty(error)) + return false; + var needle = $"lark_code={code}"; + var index = 0; + while (index <= error.Length - needle.Length) + { + var found = error.IndexOf(needle, index, StringComparison.Ordinal); + if (found < 0) + return false; + var endIndex = found + needle.Length; + if (endIndex == error.Length || !char.IsDigit(error[endIndex])) + return true; + index = endIndex; + } + return false; + } +} diff --git a/agents/Aevatar.GAgents.NyxidChat/ChannelConversationTurnRunner.cs b/agents/Aevatar.GAgents.NyxidChat/ChannelConversationTurnRunner.cs index 96ed5aa87..d2a6bfd47 100644 --- a/agents/Aevatar.GAgents.NyxidChat/ChannelConversationTurnRunner.cs +++ b/agents/Aevatar.GAgents.NyxidChat/ChannelConversationTurnRunner.cs @@ -1,3 +1,4 @@ +using System.Net.Http; using System.Text.Json; using Aevatar.AI.Abstractions.LLMProviders; using Aevatar.AI.Abstractions.ToolProviders; @@ -7,6 +8,7 @@ using Aevatar.GAgents.Authoring.Lark; using Aevatar.GAgents.Channel.Abstractions; using Aevatar.GAgents.Channel.Abstractions.Slash; +using Aevatar.GAgents.Channel.Identity; using Aevatar.GAgents.Channel.Identity.Abstractions; using Aevatar.GAgents.Channel.Identity.Slash; using Aevatar.GAgents.Channel.NyxIdRelay; @@ -24,6 +26,10 @@ namespace Aevatar.GAgents.NyxidChat; public sealed class ChannelConversationTurnRunner : IConversationTurnRunner { + private const string DailySkillName = "chrono-ai-daily"; + + private sealed record ResolvedSenderBinding(string BindingId, ExternalSubjectRef Subject); + private readonly IServiceProvider _toolServiceProvider; private readonly IChannelBotRegistrationQueryPort _registrationQueryPort; private readonly IChannelBotRegistrationQueryByNyxIdentityPort? _registrationQueryByNyxIdentityPort; @@ -95,10 +101,10 @@ public async Task RunInboundAsync( return ConversationTurnResult.PermanentFailure("registration_not_found", "Channel registration not found."); // Capture the typing-reaction Task instead of `_ =`-discarding it. The direct-reply - // AgentBuilder path can complete fast enough that the swap fires before Lark has - // persisted the typing reaction; the swap GET would then find nothing to delete and - // leave both Typing + DONE on the message. Threading the task to the swap site lets - // the swap await-with-timeout the typing POST first. The deferred-LLM and streaming + // AgentBuilder path can complete fast enough that the clear fires before Lark has + // persisted the typing reaction; the clear GET would then find nothing to delete and + // leave Typing on the message. Threading the task to the clear site lets the clear + // await-with-timeout the typing POST first. The deferred-LLM and streaming // paths don't get this task (different invocation), but their natural latency is // orders of magnitude greater than the typing POST so the race cannot fire. var typingReactionTask = TrySendImmediateLarkReactionAsync(activity, registration, ct); @@ -113,19 +119,13 @@ public async Task RunInboundAsync( if (await TryHandleSlashCommandAsync(activity, inbound, registration, runtimeContext, ct) is { } slashResult) return slashResult; - // Pre-LLM binding gate: when broker mode is wired, an unbound sender - // MUST be prompted to bind NyxID rather than served by the bot owner's - // credentials (codex L65 security: ADR-0018 §Decision "未绑定 sender - // 一律强制绑定,不回落到 bot owner"). Falls through transparently - // when identity ports are not registered (legacy bot-owner-shared - // deployments). The gate also returns the resolved binding-id so the - // LLM dispatch can apply the sender prefs override chain (issue #513 - // phase 3) without paying for a second projection lookup. - var (bindingGateResult, senderBindingId) = await TryEnforceBindingGateAsync(activity, inbound, registration, runtimeContext, ct).ConfigureAwait(false); - if (bindingGateResult is not null) - return bindingGateResult; - - if (await TryHandleLlmSelectionCardActionAsync(activity, inbound, registration, runtimeContext, senderBindingId, ct).ConfigureAwait(false) is { } llmSelectionResult) + // Normal LLM messages do not force /init. If the sender is bound we + // carry that binding forward so the reply generator can try the + // sender's own NyxID LLM prefs first; otherwise the run actor/generator + // will use the bot owner's ambient LLM config. + var senderBinding = await TryResolveSenderBindingAsync(inbound, registration, ct).ConfigureAwait(false); + + if (await TryHandleLlmSelectionCardActionAsync(activity, inbound, registration, runtimeContext, senderBinding?.BindingId, ct).ConfigureAwait(false) is { } llmSelectionResult) return llmSelectionResult; var inboundEvent = ToInboundEvent(activity, registration, inbound, ResolveUserAccessToken(activity)); @@ -157,7 +157,7 @@ public async Task RunInboundAsync( } return ConversationTurnResult.LlmReplyRequested( - await BuildLlmReplyRequestAsync(activity, registration, inboundEvent, runtimeContext, senderBindingId, ct).ConfigureAwait(false)); + await BuildLlmReplyRequestAsync(activity, registration, inboundEvent, runtimeContext, senderBinding, ct).ConfigureAwait(false)); } public Task RunInboundAsync(ChatActivity activity, CancellationToken ct) => @@ -165,16 +165,16 @@ public Task RunInboundAsync(ChatActivity activity, Cance // ─── Slash command dispatch ─── // - // ADR-0018 §Decision: when per-user binding is enabled, slash commands - // (/init, /unbind, /whoami, /model, ...) are routed before the LLM so the - // bot owner's bot-shared mode is bypassed for unbound senders. Handlers + // Slash commands (/init, /unbind, /whoami, /model, ...) are routed before + // the LLM so binding/configuration commands can own their per-user + // semantics without being swallowed by the chat model. Handlers // are discovered as IEnumerable from DI; // identity ports are constructor-injected as optional capabilities so // deployments that have not enabled binding fall through to the legacy // flow. Phase 6 (issue #513): // each handler declares RequiresBinding so unbound senders trying to use - // a binding-only command (e.g. /model use) get the same hint as the LLM- - // turn binding gate instead of a stack trace. + // a binding-only command (e.g. /model use) get a binding hint instead of + // a stack trace; normal LLM turns still have owner fallback. private async Task TryHandleSlashCommandAsync( ChatActivity activity, InboundMessage inbound, @@ -435,59 +435,73 @@ private static bool TryResolveExternalSubject( return true; } - // Pre-LLM binding gate: when identity is wired, refuse to serve unbound - // senders with the bot owner's credentials (ADR-0018 §Decision). Returns - // (null, null) when binding is not enabled (legacy mode); returns - // (prompt, null) for unbound senders so the caller short-circuits with - // a binding prompt/card; returns (null, bindingId) for bound senders so the LLM - // dispatch can carry the binding-id forward into metadata for the issue - // #513 phase 3 prefs override chain. - private async Task<(ConversationTurnResult? Blocking, string? SenderBindingId)> TryEnforceBindingGateAsync( - ChatActivity activity, + // Normal LLM messages are allowed to use the bot owner's LLM config when + // the sender has no NyxID binding. Binding is only required by commands + // that configure or inspect per-user state (/models, /model use, ...). + private async Task TryResolveSenderBindingAsync( InboundMessage inbound, ChannelBotRegistrationEntry registration, - ConversationTurnRuntimeContext runtimeContext, CancellationToken ct) { var queryPort = _identityBindingQueryPort; if (queryPort is null) - return (null, null); - - if (string.IsNullOrWhiteSpace(inbound.SenderId) || string.IsNullOrWhiteSpace(inbound.Platform)) - return (null, null); - - var tenant = ResolveTenant(inbound, registration); - if (tenant is null) - return (null, null); + return null; - var subject = new ExternalSubjectRef - { - Platform = inbound.Platform.Trim().ToLowerInvariant(), - Tenant = tenant, - ExternalUserId = inbound.SenderId.Trim(), - }; + if (!TryResolveExternalSubject(inbound, registration, out var subject)) + return null; BindingId? existing; try { existing = await queryPort.ResolveAsync(subject, ct); } + catch (OperationCanceledException) + { + throw; + } + catch (Exception ex) when (IsTransientBindingLookupFailure(ex)) + { + // Transient infra failures (DB blip, transient HTTP, JSON shape mismatch from + // upstream): degrade to owner credentials and keep the conversation alive. + _logger.LogWarning( + ex, + "Transient sender NyxID binding lookup failure; falling back to bot owner LLM config. subject={Platform}:{Tenant}:{User}", + subject.Platform, + subject.Tenant, + subject.ExternalUserId); + return null; + } catch (Exception ex) { - // Resolve failure should fail closed (refuse to serve with - // bot-owner credentials) rather than fail open. Log and treat as - // unbound. - _logger.LogError(ex, "Binding gate resolve failed for sender {Sender}; treating as unbound", inbound.SenderId); - existing = null; + // Non-transient (programmer error, unexpected NRE, serialization break): surface + // at Error level so ops can distinguish from "sender just isn't bound" — but still + // fall through to owner credentials so the user gets a reply rather than nothing. + _logger.LogError( + ex, + "Sender NyxID binding lookup raised non-transient exception; falling back to bot owner LLM config. subject={Platform}:{Tenant}:{User}", + subject.Platform, + subject.Tenant, + subject.ExternalUserId); + return null; } if (existing is not null) - return (null, existing.Value); // bound — continue with sender binding-id + return new ResolvedSenderBinding(existing.Value, subject.Clone()); - var prompt = await SendBindingPromptAsync(activity, inbound, registration, runtimeContext, ct).ConfigureAwait(false); - return (prompt, null); + return null; } + /// + /// Distinguish infra-shaped binding lookup failures (worth a Warning + owner fallback) + /// from logic/programmer errors (worth an Error log so ops sees them). + /// + private static bool IsTransientBindingLookupFailure(Exception ex) => + ex is HttpRequestException + or TimeoutException + or TaskCanceledException + or System.Text.Json.JsonException + or System.IO.IOException; + // Lark-aware private-chat detection. Other platforms map their direct- // message chat-type strings here as the runner gains support for them. private static bool IsPrivateChat(InboundMessage inbound) @@ -610,8 +624,8 @@ private async Task ExecuteLlmSelectionCardActionAsync( await selectionService.SetByServiceAsync(selectionContext, value.Trim(), modelOverride: null, ct) .ConfigureAwait(false); var updated = await optionsService.GetOptionsAsync(query, ct).ConfigureAwait(false); - var picked = updated.Available.FirstOrDefault(option => - string.Equals(option.ServiceId, value.Trim(), StringComparison.OrdinalIgnoreCase)) ?? updated.Current; + var picked = updated.Current ?? updated.Available.FirstOrDefault(option => + string.Equals(option.ServiceId, value.Trim(), StringComparison.OrdinalIgnoreCase)); return picked is null ? new MessageContent { Text = "已切换 LLM service。下一条消息会用新的设置回复。" } : renderer.RenderSelectionConfirm(picked, picked.DefaultModel); @@ -730,10 +744,10 @@ public async Task RunLlmReplyAsync( var inbound = ToInboundMessage(reply.Activity); // Direct path requires registration to actually send the reply; relay path only wants it - // for the post-reply reaction swap (relay sends use the reply token, not registration). + // for the post-reply reaction clear (relay sends use the reply token, not registration). // So lookup is mandatory on the direct path and best-effort on the relay path — a // transient registration-store error on the relay path must not drop an otherwise valid - // reply, only degrade the swap to a no-op for that turn. + // reply, only degrade the clear to a no-op for that turn. ChannelBotRegistrationEntry? registration; if (HasRelayDelivery(inbound)) { @@ -749,7 +763,7 @@ public async Task RunLlmReplyAsync( { _logger.LogWarning( ex, - "Registration lookup failed on relay reply path; reply will proceed but post-reply reaction swap will be skipped. correlation={CorrelationId}", + "Registration lookup failed on relay reply path; reply will proceed but post-reply reaction clear will be skipped. correlation={CorrelationId}", reply.CorrelationId); registration = null; } @@ -777,7 +791,7 @@ public async Task RunLlmReplyAsync( runtimeContext, ct); if (result.Success) - _ = TrySwapTypingReactionToDoneAsync(inbound, registration, ct); + _ = TryClearTypingReactionAsync(inbound, registration, ct); return result; } @@ -829,9 +843,9 @@ public async Task RunContinueAsync( public async Task OnReplyDeliveredAsync(ChatActivity activity, CancellationToken ct) { // Streaming-completion path in ConversationGAgent calls this hook because it finalizes - // the reply without going through RunLlmReplyAsync (which is where the non-streaming swap - // lives). For non-Lark platforms or activities missing the platform message id, the swap - // helper short-circuits in ShouldSwapTypingReaction. + // the reply without going through RunLlmReplyAsync (which is where the non-streaming clear + // lives). For non-Lark platforms or activities missing the platform message id, the clear + // helper short-circuits in ShouldClearTypingReaction. if (activity is null) return; @@ -840,7 +854,7 @@ public async Task OnReplyDeliveredAsync(ChatActivity activity, CancellationToken return; var inbound = ToInboundMessage(activity); - await TrySwapTypingReactionToDoneAsync(inbound, registration, ct); + await TryClearTypingReactionAsync(inbound, registration, ct); } public async Task RunStreamChunkAsync( @@ -978,7 +992,7 @@ public async Task RunStreamChunkAsync( runtimeContext, ct); if (result.Success) - _ = AwaitTypingReactionThenSwapAsync(typingReactionTask, inbound, registration, ct); + _ = AwaitTypingReactionThenClearAsync(typingReactionTask, inbound, registration, ct); return result.Success ? ConversationTurnResult.Sent( sentActivityId: $"direct-reply:{activity.Id}", @@ -1485,21 +1499,22 @@ private async Task BuildLlmReplyRequestAsync( ChannelBotRegistrationEntry registration, ChannelInboundEvent inboundEvent, ConversationTurnRuntimeContext runtimeContext, - string? senderBindingId, + ResolvedSenderBinding? senderBinding, CancellationToken ct) { + var requestActivity = BuildLlmRequestActivity(activity, inboundEvent.Text); var request = new NeedsLlmReplyEvent { CorrelationId = activity.Id, TargetActorId = ConversationGAgent.BuildActorId(activity.Conversation!.CanonicalKey), RegistrationId = registration.Id, - Activity = activity.Clone(), + Activity = requestActivity, RequestedAtUnixMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), }; - // Carry the relay reply credential through the inbox as transient inbox-only + // Carry the relay reply credential through the run command as transient command-only // fields. ConversationGAgent strips these before persisting NeedsLlmReplyEvent; - // ChannelLlmReplyInboxRuntime echoes them into the LlmReplyReadyEvent so the + // AgentRunGAgent echoes them into the LlmReplyReadyEvent so the // outbound reply does not depend on the actor's in-memory token dict surviving // deactivation. if (runtimeContext.NyxRelayReplyToken is { } token && @@ -1512,15 +1527,89 @@ private async Task BuildLlmReplyRequestAsync( foreach (var pair in await BuildReplyMetadataAsync(inboundEvent, activity, ct)) request.Metadata[pair.Key] = pair.Value; - // Issue #513 phase 3: tag the request with the sender's binding-id so - // the downstream reply generator can apply the prefs override chain - // (sender → bot owner → provider default). - if (!string.IsNullOrWhiteSpace(senderBindingId)) - request.Metadata[LLMRequestMetadataKeys.SenderBindingId] = senderBindingId; + // Tag the request with the sender's binding-id and a short-lived token + // so the downstream reply generator can try the sender's own LLM + // route first. Missing token/binding is not an error: the generator + // falls back to the bot owner's upstream-pinned LLM config. + if (senderBinding is not null) + { + request.Metadata[LLMRequestMetadataKeys.SenderBindingId] = senderBinding.BindingId; + var senderAccessToken = await TryIssueSenderLlmAccessTokenAsync(senderBinding.Subject, ct).ConfigureAwait(false); + if (!string.IsNullOrWhiteSpace(senderAccessToken)) + request.Metadata[LLMRequestMetadataKeys.SenderNyxIdAccessToken] = senderAccessToken; + } return request; } + private static ChatActivity BuildLlmRequestActivity(ChatActivity activity, string? inboundText) + { + var requestActivity = activity.Clone(); + if (requestActivity.Content is null) + return requestActivity; + + if (TryBuildDailySkillInvocationPrompt(inboundText, out var prompt)) + requestActivity.Content.Text = prompt; + + return requestActivity; + } + + private static bool TryBuildDailySkillInvocationPrompt(string? text, out string prompt) + { + prompt = string.Empty; + if (!TryParseSlashCommand(text, out var commandName, out var argumentText) || + !string.Equals(commandName, "daily", StringComparison.OrdinalIgnoreCase)) + { + return false; + } + + var argsJson = JsonSerializer.Serialize(argumentText); + var originalJson = JsonSerializer.Serialize((text ?? string.Empty).Trim()); + prompt = + "The user invoked the Lark `/daily` shortcut.\n" + + $"Route this turn through the Ornn skill `{DailySkillName}`.\n" + + $"First call `use_skill` with `skill` = `{DailySkillName}` and `args` = {argsJson}, " + + "then follow the loaded skill instructions to complete the request.\n" + + $"Original command: {originalJson}"; + return true; + } + + private async Task TryIssueSenderLlmAccessTokenAsync( + ExternalSubjectRef subject, + CancellationToken ct) + { + var broker = _capabilityBroker; + if (broker is null) + return null; + + try + { + var handle = await broker + .IssueShortLivedAsync( + subject, + new CapabilityScope { Value = AevatarOAuthClientScopes.Proxy }, + ct) + .ConfigureAwait(false); + return string.IsNullOrWhiteSpace(handle.AccessToken) + ? null + : handle.AccessToken.Trim(); + } + catch (OperationCanceledException) + { + throw; + } + catch (Exception ex) + { + _logger.LogWarning( + ex, + "Failed to issue sender NyxID LLM token; falling back to bot owner LLM config. subject={Platform}:{Tenant}:{User}", + subject.Platform, + subject.Tenant, + subject.ExternalUserId); + return null; + } + } + private static string ResolveRoutingConversationId(ConversationReference? conversation) { if (conversation is null) @@ -1629,10 +1718,10 @@ activity.OutboundDelivery is string.Equals(NormalizeOptional(activity.Bot?.Value), nyxAgentApiKeyId, StringComparison.Ordinal); // Lark reaction emoji_type for "hands typing on keyboard" — added immediately on inbound - // so the user sees the bot is working before the LLM reply lands. Swapped to DoneReactionEmojiType - // after the reply succeeds so the same message ends up with a single completion reaction. + // so the user sees the bot is working before the LLM reply lands. After a reply succeeds, + // the reaction is cleared instead of replaced with DONE because DONE reads as task completion, + // while a chat reply can be an intermediate progress update. private const string TypingReactionEmojiType = "Typing"; - private const string DoneReactionEmojiType = "DONE"; private async Task TrySendImmediateLarkReactionAsync( ChatActivity activity, @@ -1698,14 +1787,12 @@ private async Task TrySendImmediateLarkReactionAsync( } // Direct-reply paths (TryHandleAgentBuilderAsync) can complete a slash-command reply faster - // than the typing POST takes to land in Lark, leaving the swap GET to find no Typing reaction - // to delete and the orphaned typing reaction to materialize after DONE was already added — - // both reactions on the same message. Awaiting (with a short cap) the typing task before the - // GET closes that race. The cap protects against a hung POST stalling the swap forever; if it - // expires the swap still proceeds — Lark will at worst end up with both reactions, same as - // before this guard. The deferred-LLM and streaming paths skip this guard because their reply - // latency dwarfs the typing POST and so cannot race. - private async Task AwaitTypingReactionThenSwapAsync( + // than the typing POST takes to land in Lark, leaving the clear GET to find no Typing reaction + // to delete and the orphaned typing reaction to materialize after the clear already ran. + // Awaiting (with a short cap) the typing task before the GET closes that race. The cap protects + // against a hung POST stalling the clear forever. The deferred-LLM and streaming paths skip this + // guard because their reply latency dwarfs the typing POST and so cannot race. + private async Task AwaitTypingReactionThenClearAsync( Task typingReactionTask, InboundMessage inbound, ChannelBotRegistrationEntry registration, @@ -1722,24 +1809,23 @@ private async Task AwaitTypingReactionThenSwapAsync( catch (TimeoutException) { _logger.LogDebug( - "Lark typing reaction task did not complete within timeout before swap; proceeding anyway"); + "Lark typing reaction task did not complete within timeout before clear; proceeding anyway"); } catch (Exception) { - // The typing task already logged its own exception — proceed with the swap so the - // user-visible message still ends up with a DONE reaction whenever possible. + // The typing task already logged its own exception — proceed with the clear so any + // already-visible Typing reaction is still removed whenever possible. } - await TrySwapTypingReactionToDoneAsync(inbound, registration, ct); + await TryClearTypingReactionAsync(inbound, registration, ct); } - // After a successful reply, replace the bot's "Typing" reaction with a "DONE" reaction so the - // same message ends with a single completion marker. Uses list-based discovery (filter by + // After a successful reply, remove the bot's "Typing" reaction. Uses list-based discovery (filter by // emoji_type=Typing AND operator_type=app) instead of caching the immediate reaction's // reaction_id locally — the runner is a singleton and cross-turn state on it would violate the // "中间层进程内缓存作为事实源" rule. Filtering on operator_type=app avoids deleting any user // who happened to add the same Typing reaction. - private async Task TrySwapTypingReactionToDoneAsync( + private async Task TryClearTypingReactionAsync( InboundMessage inbound, ChannelBotRegistrationEntry? registration, CancellationToken ct) @@ -1747,7 +1833,7 @@ private async Task TrySwapTypingReactionToDoneAsync( if (registration is null) return; - if (!ShouldSwapTypingReaction(inbound, registration, out var accessToken, out var providerSlug, out var platformMessageId)) + if (!ShouldClearTypingReaction(inbound, registration, out var accessToken, out var providerSlug, out var platformMessageId)) return; try @@ -1755,7 +1841,7 @@ private async Task TrySwapTypingReactionToDoneAsync( var reactionIds = new List(); string? pageToken = null; // Bound the iteration so a misbehaving Lark response (e.g. always-true `has_more`) - // can't loop the swap forever. 10 pages × 50 per page = 500 Typing reactions on a + // can't loop the clear forever. 10 pages × 50 per page = 500 Typing reactions on a // single message — orders of magnitude more than realistic, since this list is // already scoped to one emoji_type and the bot only adds Typing once per inbound. const int MaxListPages = 10; @@ -1777,7 +1863,7 @@ private async Task TrySwapTypingReactionToDoneAsync( if (LarkProxyResponse.TryGetError(listResponse, out var listCode, out var listDetail)) { _logger.LogDebug( - "Lark typing reaction list failed; skipping swap: provider={ProviderSlug}, message={MessageId}, page={Page}, larkCode={LarkCode}, detail={Detail}", + "Lark typing reaction list failed; skipping clear: provider={ProviderSlug}, message={MessageId}, page={Page}, larkCode={LarkCode}, detail={Detail}", providerSlug, platformMessageId, page, @@ -1835,35 +1921,6 @@ private async Task TrySwapTypingReactionToDoneAsync( } } - var addResponse = await _nyxClient.ProxyRequestAsync( - accessToken!, - providerSlug!, - $"/open-apis/im/v1/messages/{Uri.EscapeDataString(platformMessageId!)}/reactions", - "POST", - $$$"""{"reaction_type":{"emoji_type":"{{{DoneReactionEmojiType}}}"}}""", - null, - ct); - - if (LarkProxyResponse.TryGetError(addResponse, out var addCode, out var addDetail)) - { - if (addCode == LarkBotErrorCodes.NoPermissionToReact) - { - _logger.LogDebug( - "Lark done reaction skipped (missing reaction scope): provider={ProviderSlug}, message={MessageId}, detail={Detail}", - providerSlug, - platformMessageId, - addDetail); - } - else - { - _logger.LogWarning( - "Lark done reaction failed: provider={ProviderSlug}, message={MessageId}, larkCode={LarkCode}, detail={Detail}", - providerSlug, - platformMessageId, - addCode, - addDetail); - } - } } catch (OperationCanceledException) when (ct.IsCancellationRequested) { @@ -1873,7 +1930,7 @@ private async Task TrySwapTypingReactionToDoneAsync( { _logger.LogWarning( ex, - "Lark typing→done reaction swap threw: provider={ProviderSlug}, message={MessageId}", + "Lark typing reaction clear threw: provider={ProviderSlug}, message={MessageId}", providerSlug, platformMessageId); } @@ -1930,7 +1987,7 @@ private static (List AppReactionIds, string? NextPageToken) ExtractAppRe continue; // Only delete reactions added by the bot itself (operator_type=app); leave any - // user-added Typing reactions alone so the swap doesn't accidentally erase them. + // user-added Typing reactions alone so the clear doesn't accidentally erase them. if (!item.TryGetProperty("operator", out var operatorProp) || operatorProp.ValueKind != JsonValueKind.Object) { @@ -1958,7 +2015,7 @@ private static (List AppReactionIds, string? NextPageToken) ExtractAppRe return (ids, nextPageToken); } - private static bool ShouldSwapTypingReaction( + private static bool ShouldClearTypingReaction( InboundMessage inbound, ChannelBotRegistrationEntry registration, out string? accessToken, diff --git a/agents/Aevatar.GAgents.NyxidChat/ChannelLlmReplyInboxRuntime.cs b/agents/Aevatar.GAgents.NyxidChat/ChannelLlmReplyInboxRuntime.cs deleted file mode 100644 index 493161e01..000000000 --- a/agents/Aevatar.GAgents.NyxidChat/ChannelLlmReplyInboxRuntime.cs +++ /dev/null @@ -1,443 +0,0 @@ -using Aevatar.Foundation.Abstractions; -using Aevatar.Foundation.Abstractions.Streaming; -using Aevatar.AI.Abstractions.LLMProviders; -using Aevatar.GAgents.Channel.Abstractions; -using Aevatar.GAgents.Channel.Runtime; -using Aevatar.GAgents.Channel.NyxIdRelay; -using Aevatar.GAgents.NyxidChat; -using Aevatar.Studio.Application.Studio.Abstractions; -using Google.Protobuf.WellKnownTypes; -using Microsoft.Extensions.Hosting; -using Microsoft.Extensions.Logging; - -namespace Aevatar.GAgents.NyxidChat; - -public sealed class ChannelLlmReplyInboxRuntime : - IHostedService, - IAsyncDisposable, - IChannelLlmReplyInbox -{ - internal const string InboxStreamId = "channel-runtime:llm-reply:inbox"; - - private readonly IStreamProvider _streamProvider; - private readonly IActorRuntime _actorRuntime; - private readonly IActorDispatchPort _actorDispatchPort; - private readonly IConversationReplyGenerator _replyGenerator; - private readonly IInteractiveReplyCollector? _interactiveReplyCollector; - private readonly Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions? _relayOptions; - private readonly INyxIdRelayScopeResolver? _scopeResolver; - private readonly IUserConfigQueryPort? _userConfigQueryPort; - private readonly TimeProvider _timeProvider; - private readonly ILogger _logger; - private IAsyncDisposable? _subscription; - - public ChannelLlmReplyInboxRuntime( - IStreamProvider streamProvider, - IActorRuntime actorRuntime, - IConversationReplyGenerator replyGenerator, - IInteractiveReplyCollector? interactiveReplyCollector, - Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions? relayOptions, - ILogger logger, - INyxIdRelayScopeResolver? scopeResolver = null, - IUserConfigQueryPort? userConfigQueryPort = null, - TimeProvider? timeProvider = null, - IActorDispatchPort? actorDispatchPort = null) - { - _streamProvider = streamProvider ?? throw new ArgumentNullException(nameof(streamProvider)); - _actorRuntime = actorRuntime ?? throw new ArgumentNullException(nameof(actorRuntime)); - _actorDispatchPort = actorDispatchPort - ?? actorRuntime as IActorDispatchPort - ?? throw new ArgumentNullException(nameof(actorDispatchPort)); - _replyGenerator = replyGenerator ?? throw new ArgumentNullException(nameof(replyGenerator)); - _interactiveReplyCollector = interactiveReplyCollector; - _relayOptions = relayOptions; - _scopeResolver = scopeResolver; - _userConfigQueryPort = userConfigQueryPort; - _timeProvider = timeProvider ?? TimeProvider.System; - _logger = logger ?? throw new ArgumentNullException(nameof(logger)); - } - - public async Task StartAsync(CancellationToken ct) - { - if (_subscription is not null) - return; - - _subscription = await _streamProvider - .GetStream(InboxStreamId) - .SubscribeAsync(ProcessAsync, ct); - - _logger.LogInformation("Started channel LLM reply inbox on {StreamId}", InboxStreamId); - } - - public async Task StopAsync(CancellationToken ct) - { - if (_subscription is null) - return; - - await _subscription.DisposeAsync(); - _subscription = null; - _logger.LogInformation("Stopped channel LLM reply inbox on {StreamId}", InboxStreamId); - } - - public Task EnqueueAsync(NeedsLlmReplyEvent request, CancellationToken ct) - { - ArgumentNullException.ThrowIfNull(request); - return _streamProvider.GetStream(InboxStreamId).ProduceAsync(request, ct); - } - - public async ValueTask DisposeAsync() - { - await StopAsync(CancellationToken.None); - } - - internal const long MaxInboxRequestAgeMs = 5 * 60 * 1000; - - internal async Task ProcessAsync(NeedsLlmReplyEvent request) - { - ArgumentNullException.ThrowIfNull(request); - - _logger.LogInformation( - "Processing LLM reply request: correlation={CorrelationId} target={TargetActorId}", - request.CorrelationId, - request.TargetActorId); - - if (request.Activity is null || string.IsNullOrWhiteSpace(request.TargetActorId)) - { - _logger.LogWarning( - "Dropping malformed deferred LLM reply request: correlation={CorrelationId}, target={TargetActorId}", - request.CorrelationId, - request.TargetActorId); - await NotifyActorOfDropAsync(request, "malformed_deferred_llm_reply_request"); - return; - } - - // Stale gate: NyxID relay reply tokens have a ~30 min TTL and the user access - // token used for the LLM call expires inside ~15 min. A request that has been - // sitting in the stream for hours can't lead to a successful reply, so drop it - // here instead of spending an LLM round just to fail at the outbound stage. - var nowMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(); - if (request.RequestedAtUnixMs > 0 && nowMs - request.RequestedAtUnixMs > MaxInboxRequestAgeMs) - { - _logger.LogInformation( - "Dropping stale LLM reply request: correlation={CorrelationId} ageMs={AgeMs}", - request.CorrelationId, - nowMs - request.RequestedAtUnixMs); - await NotifyActorOfDropAsync(request, "stale_inbox_request_dropped"); - return; - } - - // Relay credential gate: relay turns require a fresh reply_token to send the - // outbound. A relay request with no inbox-carried token (e.g., rehydrated from - // persisted state after a pod restart that lost the original capture) cannot - // be delivered, so skip the LLM call entirely. - if (IsRelayRequest(request) && string.IsNullOrWhiteSpace(request.ReplyToken)) - { - _logger.LogWarning( - "Dropping relay LLM reply request without inbox-carried reply_token: correlation={CorrelationId}", - request.CorrelationId); - await NotifyActorOfDropAsync(request, "missing_relay_reply_token"); - return; - } - - var actor = await _actorRuntime.GetAsync(request.TargetActorId) - ?? await _actorRuntime.CreateAsync(request.TargetActorId, CancellationToken.None); - - string replyText; - MessageContent? outboundIntent = null; - var terminalState = LlmReplyTerminalState.Completed; - var errorCode = string.Empty; - var errorSummary = string.Empty; - using TurnStreamingReplySink? streamingSink = TryBuildStreamingSink(request, request.TargetActorId); - - try - { - var effectiveMetadata = await BuildEffectiveMetadataAsync(request, CancellationToken.None); - IDisposable? interactiveReplyScope = null; - try - { - if (ShouldCaptureInteractiveReply(request.Activity)) - interactiveReplyScope = _interactiveReplyCollector?.BeginScope(); - - replyText = await _replyGenerator.GenerateReplyAsync( - request.Activity, - effectiveMetadata, - streamingSink, - CancellationToken.None) ?? string.Empty; - outboundIntent = _interactiveReplyCollector?.TryTake(); - } - finally - { - interactiveReplyScope?.Dispose(); - } - - if (streamingSink is not null && - outboundIntent is null && - !string.IsNullOrWhiteSpace(replyText)) - { - await streamingSink.FinalizeAsync(replyText, CancellationToken.None); - } - - if (outboundIntent is null && string.IsNullOrWhiteSpace(replyText)) - { - terminalState = LlmReplyTerminalState.Failed; - errorCode = "empty_reply"; - errorSummary = "Reply generator returned an empty response."; - replyText = "Sorry, I wasn't able to generate a response. Please try again."; - } - } - catch (Exception ex) - { - terminalState = LlmReplyTerminalState.Failed; - errorCode = "llm_reply_failed"; - errorSummary = ex.Message; - replyText = NyxIdRelayErrorClassifier.Classify(ex.Message); - _logger.LogWarning( - ex, - "Deferred LLM reply generation failed: correlation={CorrelationId}", - request.CorrelationId); - } - - var ready = new LlmReplyReadyEvent - { - CorrelationId = request.CorrelationId, - RegistrationId = request.RegistrationId, - SourceActorId = InboxStreamId, - Activity = request.Activity.Clone(), - Outbound = outboundIntent?.Clone() ?? new MessageContent { Text = replyText }, - TerminalState = terminalState, - ErrorCode = errorCode, - ErrorSummary = errorSummary, - ReadyAtUnixMs = _timeProvider.GetUtcNow().ToUnixTimeMilliseconds(), - // Echo the inbox-only relay credential straight back so ConversationGAgent's - // outbound reply does not depend on its in-memory token dict still having the - // entry. The actor consumes these fields and never persists them. - ReplyToken = request.ReplyToken ?? string.Empty, - ReplyTokenExpiresAtUnixMs = request.ReplyTokenExpiresAtUnixMs, - }; - var envelope = new EventEnvelope - { - Id = Guid.NewGuid().ToString("N"), - Timestamp = Timestamp.FromDateTimeOffset(_timeProvider.GetUtcNow()), - Payload = Any.Pack(ready), - Route = EnvelopeRouteSemantics.CreateDirect(InboxStreamId, request.TargetActorId), - }; - - await _actorDispatchPort.DispatchAsync(request.TargetActorId, envelope, CancellationToken.None); - } - - private TurnStreamingReplySink? TryBuildStreamingSink(NeedsLlmReplyEvent request, string targetActorId) - { - if (_relayOptions is not { StreamingRepliesEnabled: true }) - return null; - if (request.Activity?.OutboundDelivery is not - { - ReplyMessageId.Length: > 0, - CorrelationId.Length: > 0, - }) - { - return null; - } - if (string.IsNullOrWhiteSpace(request.CorrelationId)) - return null; - - var throttle = TimeSpan.FromMilliseconds(Math.Max(0, _relayOptions.StreamingFlushIntervalMs)); - return new TurnStreamingReplySink( - _actorDispatchPort, - targetActorId, - request.CorrelationId, - request.RegistrationId, - request.Activity.Clone(), - throttle, - _timeProvider, - _logger); - } - - private async Task> BuildEffectiveMetadataAsync( - NeedsLlmReplyEvent request, - CancellationToken ct) - { - var metadata = new Dictionary(request.Metadata, StringComparer.Ordinal); - - // Apply the bot owner's pre-configured LLM route + model. The relay callback - // identifies the bot by api_key_id (in activity.Bot.Value); we resolve that to - // the owner's Aevatar scope id and load the same UserConfig the owner uses - // when chatting through nyxid-chat themselves, then pin ModelOverride / - // NyxIdRoutePreference / MaxToolRoundsOverride from that configuration. - await ApplyBotOwnerLlmConfigAsync(request, metadata, ct); - - // The inbound callback's X-NyxID-User-Token is the bot owner's NyxID session - // JWT (freshly issued by NyxID for each callback). It is the bot owner's own - // credential for LLM calls — the same thing that would authorize them in - // nyxid-chat. The short TTL (~15 min) is mitigated by the direct-enqueue - // dispatch (#380), the inbox-echoed token flow (#383), and the stale pending - // request GC, so the token is still valid when the LLM call actually fires - // for any non-stale request. If the downstream provider rejects it, the - // classifier surfaces a real user-facing error via NyxIdRelayErrorClassifier. - var userAccessToken = request.Activity?.TransportExtras?.NyxUserAccessToken?.Trim(); - if (!string.IsNullOrWhiteSpace(userAccessToken)) - { - metadata[LLMRequestMetadataKeys.NyxIdAccessToken] = userAccessToken; - metadata[LLMRequestMetadataKeys.NyxIdOrgToken] = userAccessToken; - } - - return metadata; - } - - private async Task ApplyBotOwnerLlmConfigAsync( - NeedsLlmReplyEvent request, - IDictionary metadata, - CancellationToken ct) - { - if (_scopeResolver is null || _userConfigQueryPort is null) - return; - - var apiKeyId = request.Activity?.Bot?.Value?.Trim(); - if (string.IsNullOrWhiteSpace(apiKeyId)) - return; - - string? scopeId; - try - { - scopeId = await _scopeResolver.ResolveScopeIdByApiKeyAsync(apiKeyId, ct); - } - catch (Exception ex) - { - _logger.LogWarning( - ex, - "Failed to resolve bot owner scope id for LLM config: correlation={CorrelationId} apiKeyId={ApiKeyId}", - request.CorrelationId, - apiKeyId); - return; - } - - if (string.IsNullOrWhiteSpace(scopeId)) - { - _logger.LogDebug( - "No bot owner scope id resolved for LLM config: correlation={CorrelationId} apiKeyId={ApiKeyId}", - request.CorrelationId, - apiKeyId); - return; - } - - try - { - var config = await _userConfigQueryPort.GetAsync(scopeId, ct); - if (!string.IsNullOrWhiteSpace(config.DefaultModel)) - metadata[LLMRequestMetadataKeys.ModelOverride] = config.DefaultModel.Trim(); - if (!string.IsNullOrWhiteSpace(config.PreferredLlmRoute)) - metadata[LLMRequestMetadataKeys.NyxIdRoutePreference] = config.PreferredLlmRoute.Trim(); - if (config.MaxToolRounds > 0) - metadata[LLMRequestMetadataKeys.MaxToolRoundsOverride] = - config.MaxToolRounds.ToString(System.Globalization.CultureInfo.InvariantCulture); - - _logger.LogInformation( - "Applied bot owner LLM config: correlation={CorrelationId} scopeId={ScopeId} model={Model} route={Route}", - request.CorrelationId, - scopeId, - string.IsNullOrWhiteSpace(config.DefaultModel) ? "" : config.DefaultModel, - string.IsNullOrWhiteSpace(config.PreferredLlmRoute) ? "" : config.PreferredLlmRoute); - } - catch (Exception ex) - { - _logger.LogWarning( - ex, - "Failed to load bot owner LLM config: correlation={CorrelationId} scopeId={ScopeId}", - request.CorrelationId, - scopeId); - } - } - - private static bool IsRelayRequest(NeedsLlmReplyEvent request) => - request.Activity?.OutboundDelivery is - { - ReplyMessageId.Length: > 0, - CorrelationId.Length: > 0, - }; - - private async Task NotifyActorOfDropAsync(NeedsLlmReplyEvent request, string reason) - { - if (string.IsNullOrWhiteSpace(request.TargetActorId) || - string.IsNullOrWhiteSpace(request.CorrelationId)) - { - return; - } - - IActor? actor; - try - { - actor = await _actorRuntime.GetAsync(request.TargetActorId); - } - catch (Exception ex) - { - _logger.LogWarning( - ex, - "Failed to resolve actor for inbox drop notification: correlation={CorrelationId} target={TargetActorId}", - request.CorrelationId, - request.TargetActorId); - return; - } - - if (actor is null) - { - // No active actor means there is nothing pending to clean up; the request - // either was never persisted or the actor's state was already retired. - return; - } - - var dropped = new DeferredLlmReplyDroppedEvent - { - CorrelationId = request.CorrelationId, - Reason = reason, - DroppedAtUnixMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), - }; - var envelope = new EventEnvelope - { - Id = Guid.NewGuid().ToString("N"), - Timestamp = Timestamp.FromDateTimeOffset(DateTimeOffset.UtcNow), - Payload = Any.Pack(dropped), - Route = EnvelopeRouteSemantics.CreateDirect(InboxStreamId, request.TargetActorId), - }; - - try - { - await _actorDispatchPort.DispatchAsync(request.TargetActorId, envelope, CancellationToken.None); - } - catch (Exception ex) - { - _logger.LogWarning( - ex, - "Failed to deliver inbox drop notification: correlation={CorrelationId} reason={Reason}", - request.CorrelationId, - reason); - } - } - - private bool ShouldCaptureInteractiveReply(ChatActivity? activity) - { - if (_interactiveReplyCollector is null) - return false; - - if (_relayOptions is { InteractiveRepliesEnabled: false }) - return false; - - return activity?.OutboundDelivery is - { - ReplyMessageId.Length: > 0, - CorrelationId.Length: > 0, - }; - } -} - -public sealed class ChannelLlmReplyInboxHostedService : IHostedService -{ - private readonly ChannelLlmReplyInboxRuntime _runtime; - - public ChannelLlmReplyInboxHostedService(ChannelLlmReplyInboxRuntime runtime) - { - _runtime = runtime ?? throw new ArgumentNullException(nameof(runtime)); - } - - public Task StartAsync(CancellationToken ct) => _runtime.StartAsync(ct); - - public Task StopAsync(CancellationToken ct) => _runtime.StopAsync(ct); -} diff --git a/agents/Aevatar.GAgents.NyxidChat/ConversationReplyGenerator.cs b/agents/Aevatar.GAgents.NyxidChat/ConversationReplyGenerator.cs index ae0039261..4bb4d2221 100644 --- a/agents/Aevatar.GAgents.NyxidChat/ConversationReplyGenerator.cs +++ b/agents/Aevatar.GAgents.NyxidChat/ConversationReplyGenerator.cs @@ -1,13 +1,17 @@ +using System.Net.Http; using System.Text; using Aevatar.AI.Abstractions; using Aevatar.AI.Abstractions.LLMProviders; using Aevatar.AI.Abstractions.Middleware; using Aevatar.AI.Abstractions.ToolProviders; using Aevatar.AI.Core.Chat; +using Aevatar.AI.Core.Middleware; using Aevatar.AI.Core.Tools; using Aevatar.AI.ToolProviders.Skills; using Aevatar.GAgents.Channel.Abstractions; using Aevatar.GAgents.Channel.Runtime; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; namespace Aevatar.GAgents.NyxidChat; @@ -22,10 +26,20 @@ public sealed class NyxIdConversationReplyGenerator : IConversationReplyGenerato private readonly IReadOnlyList _agentMiddlewares; private readonly IReadOnlyList _toolMiddlewares; private readonly IReadOnlyList _llmMiddlewares; + private readonly IToolApprovalHandler? _approvalHandler; private readonly SkillRegistry? _skillRegistry; + private readonly IRemoteSkillFetcher? _remoteSkillFetcher; private readonly global::Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions? _relayOptions; private readonly INyxIdUserLlmPreferencesStore? _preferencesStore; private readonly IUserMemoryStore? _userMemoryStore; + private readonly ILogger _logger; + private int _missingRemoteFetcherWarningLogged; + + private sealed record EffectiveMetadataPlan( + IReadOnlyDictionary Primary, + IReadOnlyDictionary? OwnerFallback); + + private sealed record SenderPreferenceApplication(bool AnyApplied, bool RouteApplied); public NyxIdConversationReplyGenerator( ILLMProviderFactory llmProviderFactory, @@ -34,22 +48,34 @@ public NyxIdConversationReplyGenerator( IEnumerable? toolMiddlewares = null, IEnumerable? llmMiddlewares = null, SkillRegistry? skillRegistry = null, + IRemoteSkillFetcher? remoteSkillFetcher = null, global::Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions? relayOptions = null, INyxIdUserLlmPreferencesStore? preferencesStore = null, - IUserMemoryStore? userMemoryStore = null) + IUserMemoryStore? userMemoryStore = null, + IToolApprovalHandler? approvalHandler = null, + ILogger? logger = null) { _llmProviderFactory = llmProviderFactory ?? throw new ArgumentNullException(nameof(llmProviderFactory)); _toolSources = (toolSources ?? []).ToArray(); _agentMiddlewares = (agentMiddlewares ?? []).ToArray(); _toolMiddlewares = (toolMiddlewares ?? []).ToArray(); _llmMiddlewares = (llmMiddlewares ?? []).ToArray(); + _approvalHandler = approvalHandler; _skillRegistry = skillRegistry; + _remoteSkillFetcher = remoteSkillFetcher; _relayOptions = relayOptions; _preferencesStore = preferencesStore; _userMemoryStore = userMemoryStore; + _logger = logger ?? NullLogger.Instance; + if (_skillRegistry is not null && _remoteSkillFetcher is null) + { + _logger.LogWarning( + "SkillRegistry is registered without IRemoteSkillFetcher; local skills remain available, but remote skills cannot be refreshed or fetched by use_skill."); + _missingRemoteFetcherWarningLogged = 1; + } } - public async Task GenerateReplyAsync( + public async Task GenerateReplyAsync( ChatActivity activity, IReadOnlyDictionary metadata, IStreamingReplySink? streamingSink, @@ -58,22 +84,141 @@ public NyxIdConversationReplyGenerator( ArgumentNullException.ThrowIfNull(activity); ArgumentNullException.ThrowIfNull(metadata); - var effectiveMetadata = await BuildEffectiveMetadataAsync(metadata, ct); - var history = new global::Aevatar.AI.Core.Chat.ChatHistory + // Emit a placeholder immediately so the user sees a message within the outbound RTT, + // regardless of LLM cold-start, router selection, or tool-call latency before the + // first real delta. The first real delta overwrites this placeholder via edit-in-place; + // if no delta ever arrives (tool-only or empty turn), the caller's FinalizeAsync edits + // the placeholder to the final text. Disabled by setting the option to empty/whitespace. + if (streamingSink is not null) { - MaxMessages = MaxHistoryMessages, - }; + var placeholder = _relayOptions?.StreamingPlaceholderText; + if (!string.IsNullOrWhiteSpace(placeholder)) + await streamingSink.OnDeltaAsync(placeholder, ct); + } + + var metadataPlan = await BuildEffectiveMetadataPlanAsync(metadata, ct); + var primaryTools = await BuildTurnToolsAsync(ct); + + try + { + return await GenerateWithMetadataAsync( + activity, + metadataPlan.Primary, + primaryTools, + streamingSink, + ct) + .ConfigureAwait(false); + } + catch (OperationCanceledException) + { + throw; + } + catch (Exception ex) when (metadataPlan.OwnerFallback is not null && IsRetryableSenderRouteFailure(ex)) + { + _logger.LogWarning( + ex, + "Sender LLM route failed; retrying with bot owner LLM config. activity={ActivityId}", + activity.Id); + + var fallbackTools = await BuildTurnToolsAsync(ct); + return await GenerateWithMetadataAsync( + activity, + metadataPlan.OwnerFallback, + fallbackTools, + streamingSink, + ct) + .ConfigureAwait(false); + } + } + + /// + /// Decide whether falling back from sender credentials to owner credentials is worth + /// the retry. Programmer errors (Argument*, NullReference, InvalidCast) are not transient + /// and would only fail the same way with the owner token while burying the original cause + /// behind a second failure. We retry only on infra-shaped failures: network, timeout, JSON + /// parsing of upstream errors, and the InvalidOperationException NyxID emits when an + /// access token is rejected. + /// + private static bool IsRetryableSenderRouteFailure(Exception ex) => + ex is HttpRequestException + or TimeoutException + or System.Text.Json.JsonException + or TaskCanceledException + or System.IO.IOException + || ex is InvalidOperationException invalid && IsKnownNyxIdRouteFailure(invalid.Message); + + private static bool IsKnownNyxIdRouteFailure(string? message) + { + if (string.IsNullOrWhiteSpace(message)) + return false; + var lowered = message.ToLowerInvariant(); + return lowered.Contains("nyxid", StringComparison.Ordinal) + || lowered.Contains("binding", StringComparison.Ordinal) + || lowered.Contains("scope", StringComparison.Ordinal) + || lowered.Contains("token", StringComparison.Ordinal) + || lowered.Contains("401", StringComparison.Ordinal) + || lowered.Contains("403", StringComparison.Ordinal) + || lowered.Contains("not found", StringComparison.Ordinal) + || lowered.Contains("revoked", StringComparison.Ordinal) + || lowered.Contains("route", StringComparison.Ordinal) + || lowered.Contains("proxy", StringComparison.Ordinal); + } + + private async Task BuildTurnToolsAsync(CancellationToken ct) + { var tools = new ToolManager(); foreach (var tool in await DiscoverToolsAsync(ct)) tools.Register(tool); + // SkillsAgentToolSource (when AddSkills is wired) advertises the same use_skill + // through DiscoverToolsAsync, so this defensive registration only matters for + // minimal hosts that registered AddOrnnSkills (IRemoteSkillFetcher) without + // AddSkills. ToolManager.Register is last-write-wins so the duplicate is harmless. + if (_skillRegistry is not null || _remoteSkillFetcher is not null) + { + LogMissingRemoteSkillFetcherOnce(); + tools.Register(new UseSkillTool(_skillRegistry ?? new SkillRegistry(), _remoteSkillFetcher)); + } + + return tools; + } + + private void LogMissingRemoteSkillFetcherOnce() + { + if (_skillRegistry is null || _remoteSkillFetcher is not null) + return; + if (Interlocked.Exchange(ref _missingRemoteFetcherWarningLogged, 1) != 0) + return; + + if (_skillRegistry.GetAll().Any(static skill => skill.Source == SkillSource.Remote)) + { + _logger.LogWarning( + "SkillRegistry contains remote skills but no IRemoteSkillFetcher is registered; use_skill cannot refresh or fetch remote skill bodies."); + return; + } + + _logger.LogDebug( + "SkillRegistry registered without IRemoteSkillFetcher; local skills remain available and no remote skills are currently advertised."); + } + + private async Task GenerateWithMetadataAsync( + ChatActivity activity, + IReadOnlyDictionary effectiveMetadata, + ToolManager tools, + IStreamingReplySink? streamingSink, + CancellationToken ct) + { + var history = new global::Aevatar.AI.Core.Chat.ChatHistory + { + MaxMessages = MaxHistoryMessages, + }; var runtime = new ChatRuntime( providerFactory: ResolveProvider, history: history, toolLoop: new ToolCallLoop( tools, hooks: null, - toolMiddlewares: _toolMiddlewares, + toolMiddlewares: BuildToolMiddlewaresForTurn(), llmMiddlewares: _llmMiddlewares), hooks: null, requestBuilder: () => new LLMRequest @@ -91,19 +236,13 @@ public NyxIdConversationReplyGenerator( agentName: "NyxIdConversationReply", streamBufferCapacity: StreamBufferCapacity); - // Emit a placeholder immediately so the user sees a message within the outbound RTT, - // regardless of LLM cold-start, router selection, or tool-call latency before the - // first real delta. The first real delta overwrites this placeholder via edit-in-place; - // if no delta ever arrives (tool-only or empty turn), the caller's FinalizeAsync edits - // the placeholder to the final text. Disabled by setting the option to empty/whitespace. - if (streamingSink is not null) - { - var placeholder = _relayOptions?.StreamingPlaceholderText; - if (!string.IsNullOrWhiteSpace(placeholder)) - await streamingSink.OnDeltaAsync(placeholder, ct); - } - var output = new StringBuilder(); + // ADR-0021 §6 / canon §8 actor-edge closeout: aggregate Usage and track the last + // FinishReason across all internal LLM rounds (tool-call loop) so the caller sees + // exactly one closeout — the returned record — instead of relying on round-internal + // markers that ChatRuntime currently passes through. + ReplyTokenUsage? aggregatedUsage = null; + string? lastFinishReason = null; await foreach (var chunk in runtime.ChatStreamAsync( activity.Content.Text, MaxToolRounds, @@ -111,6 +250,11 @@ public NyxIdConversationReplyGenerator( effectiveMetadata, ct)) { + if (chunk.Usage is { } usage) + aggregatedUsage = SumUsage(aggregatedUsage, MapUsage(usage)); + if (!string.IsNullOrEmpty(chunk.FinishReason)) + lastFinishReason = chunk.FinishReason; + if (string.IsNullOrEmpty(chunk.DeltaContent)) continue; @@ -119,14 +263,47 @@ public NyxIdConversationReplyGenerator( await streamingSink.OnDeltaAsync(output.ToString(), ct); } - return output.ToString(); + return new ConversationReplyResult( + Text: output.ToString(), + Usage: aggregatedUsage, + FinishReason: lastFinishReason); + } + + // ADR-0021 §6 / canon §8 cross-round usage aggregation — each provider round + // reports its own Usage; the actor-edge closeout carries the sum. + private static ReplyTokenUsage? SumUsage(ReplyTokenUsage? acc, ReplyTokenUsage? add) + { + if (add is null) return acc; + if (acc is null) return add; + return new ReplyTokenUsage( + acc.PromptTokens + add.PromptTokens, + acc.CompletionTokens + add.CompletionTokens, + acc.TotalTokens + add.TotalTokens); + } + + private static ReplyTokenUsage MapUsage(TokenUsage usage) => + new(usage.PromptTokens, usage.CompletionTokens, usage.TotalTokens); + + private IReadOnlyList BuildToolMiddlewaresForTurn() + { + if (_approvalHandler is null) + return _toolMiddlewares; + + var effective = new List(_toolMiddlewares.Count + 1) + { + new ToolApprovalMiddleware(_approvalHandler), + }; + effective.AddRange(_toolMiddlewares); + return effective; } - private async Task> BuildEffectiveMetadataAsync( + private async Task BuildEffectiveMetadataPlanAsync( IReadOnlyDictionary metadata, CancellationToken ct) { var effective = new Dictionary(metadata, StringComparer.Ordinal); + effective.Remove(LLMRequestMetadataKeys.SenderNyxIdAccessToken); + Dictionary? ownerFallback = null; // Issue #513 phase 3: prefs override chain is sender → bot-owner → // provider default. The bot owner's prefs are already pinned upstream @@ -135,12 +312,33 @@ private async Task> BuildEffectiveMetadataAs // so this generator only has to layer sender overrides on top when // the inbound carries a binding-id. SetIfFilled is field-level, so a // sender who set DefaultModel but not PreferredRoute still inherits - // the bot owner's route from the upstream-pinned metadata. + // the bot owner's route from the upstream-pinned metadata. If a + // sender-owned attempt fails, we retry once with this owner snapshot. if (_preferencesStore is not null && metadata.TryGetValue(LLMRequestMetadataKeys.SenderBindingId, out var senderBindingId) && !string.IsNullOrWhiteSpace(senderBindingId)) { - await ApplyPreferencesAsync(senderBindingId, effective, ct); + var ownerSnapshot = CreateOwnerFallbackSnapshot(effective); + var applied = await ApplyPreferencesAsync(senderBindingId, effective, ct); + if (applied.RouteApplied) + { + if (metadata.TryGetValue(LLMRequestMetadataKeys.SenderNyxIdAccessToken, out var senderAccessToken) && + !string.IsNullOrWhiteSpace(senderAccessToken)) + { + var trimmedToken = senderAccessToken.Trim(); + effective[LLMRequestMetadataKeys.NyxIdAccessToken] = trimmedToken; + effective[LLMRequestMetadataKeys.NyxIdOrgToken] = trimmedToken; + ownerFallback = ownerSnapshot; + } + else + { + effective = ownerSnapshot; + } + } + else if (applied.AnyApplied) + { + ownerFallback = ownerSnapshot; + } } if (_userMemoryStore is not null) @@ -149,7 +347,11 @@ private async Task> BuildEffectiveMetadataAs { var promptSection = await _userMemoryStore.BuildPromptSectionAsync(2000, ct); if (!string.IsNullOrWhiteSpace(promptSection)) + { effective[LLMRequestMetadataKeys.UserMemoryPrompt] = promptSection; + if (ownerFallback is not null) + ownerFallback[LLMRequestMetadataKeys.UserMemoryPrompt] = promptSection; + } } catch (OperationCanceledException) { @@ -161,7 +363,7 @@ private async Task> BuildEffectiveMetadataAs } } - return effective; + return new EffectiveMetadataPlan(effective, ownerFallback); } /// @@ -170,13 +372,13 @@ private async Task> BuildEffectiveMetadataAs /// the bot owner's value stays intact. User-config failures degrade to /// "no sender override" rather than failing the LLM turn. /// - private async Task ApplyPreferencesAsync( + private async Task ApplyPreferencesAsync( string senderBindingId, Dictionary effective, CancellationToken ct) { if (_preferencesStore is null) - return; + return new SenderPreferenceApplication(false, false); NyxIdUserLlmPreferences preferences; try @@ -189,22 +391,32 @@ private async Task ApplyPreferencesAsync( } catch { - return; + return new SenderPreferenceApplication(false, false); } - SetIfFilled(effective, LLMRequestMetadataKeys.ModelOverride, preferences.DefaultModel?.Trim()); - SetIfFilled(effective, LLMRequestMetadataKeys.NyxIdRoutePreference, preferences.PreferredRoute?.Trim()); - SetIfFilled( + var modelApplied = SetIfFilled(effective, LLMRequestMetadataKeys.ModelOverride, preferences.DefaultModel?.Trim()); + var routeApplied = SetIfFilled(effective, LLMRequestMetadataKeys.NyxIdRoutePreference, preferences.PreferredRoute?.Trim()); + var roundsApplied = SetIfFilled( effective, LLMRequestMetadataKeys.MaxToolRoundsOverride, preferences.MaxToolRounds > 0 ? preferences.MaxToolRounds.ToString() : null); + return new SenderPreferenceApplication(modelApplied || routeApplied || roundsApplied, routeApplied); } - private static void SetIfFilled(Dictionary map, string key, string? value) + private static Dictionary CreateOwnerFallbackSnapshot(Dictionary effective) + { + var snapshot = new Dictionary(effective, StringComparer.Ordinal); + snapshot.Remove(LLMRequestMetadataKeys.SenderBindingId); + snapshot.Remove(LLMRequestMetadataKeys.SenderNyxIdAccessToken); + return snapshot; + } + + private static bool SetIfFilled(Dictionary map, string key, string? value) { if (string.IsNullOrWhiteSpace(value)) - return; + return false; map[key] = value; + return true; } private async Task> DiscoverToolsAsync(CancellationToken ct) @@ -248,7 +460,7 @@ private string BuildSystemPrompt() var prompt = LoadBaseSystemPrompt(); prompt += NyxIdRelayPromptConfiguration.BuildChannelRuntimeConfigurationSection(_relayOptions); - if (_skillRegistry != null && _skillRegistry.Count > 0) + if (_skillRegistry is not null && _skillRegistry.Count > 0) { var skillSection = _skillRegistry.BuildSystemPromptSection(); if (!string.IsNullOrEmpty(skillSection)) diff --git a/agents/Aevatar.GAgents.NyxidChat/LlmSelection/DefaultUserLlmSelectionService.cs b/agents/Aevatar.GAgents.NyxidChat/LlmSelection/DefaultUserLlmSelectionService.cs index d74b1c233..072281262 100644 --- a/agents/Aevatar.GAgents.NyxidChat/LlmSelection/DefaultUserLlmSelectionService.cs +++ b/agents/Aevatar.GAgents.NyxidChat/LlmSelection/DefaultUserLlmSelectionService.cs @@ -41,8 +41,7 @@ public async Task SetByServiceAsync( ArgumentException.ThrowIfNullOrWhiteSpace(serviceId); var view = await _optionsService.GetOptionsAsync(ToQuery(context), ct).ConfigureAwait(false); - var option = view.Available.FirstOrDefault(candidate => - string.Equals(candidate.ServiceId, serviceId.Trim(), StringComparison.OrdinalIgnoreCase)); + var option = FindSelectionOption(serviceId.Trim(), view.Available); if (option is null) throw new InvalidOperationException($"LLM service '{serviceId}' is not available for this user."); EnsureSelectable(option); @@ -127,6 +126,32 @@ private static void EnsureSelectable(UserLlmOption option) throw new InvalidOperationException($"LLM service '{option.DisplayName}' is not ready: {option.Status}."); } + private static UserLlmOption? FindSelectionOption(string requested, IReadOnlyList available) + { + var directMatches = available + .Where(option => string.Equals(option.ServiceId, requested, StringComparison.OrdinalIgnoreCase)) + .ToArray(); + var directSelectable = directMatches.Where(IsSelectable).Take(2).ToArray(); + if (directSelectable.Length == 1) + return directSelectable[0]; + + var keyMatches = available + .Where(option => + string.Equals(option.ServiceId, requested, StringComparison.OrdinalIgnoreCase) || + string.Equals(option.ServiceSlug, requested, StringComparison.OrdinalIgnoreCase) || + string.Equals(option.RouteValue, requested, StringComparison.OrdinalIgnoreCase) || + string.Equals(option.DisplayName, requested, StringComparison.OrdinalIgnoreCase)) + .ToArray(); + var selectable = keyMatches.Where(IsSelectable).Take(2).ToArray(); + if (selectable.Length == 1) + return selectable[0]; + + return directMatches.FirstOrDefault() ?? (keyMatches.Length == 1 ? keyMatches[0] : null); + } + + private static bool IsSelectable(UserLlmOption option) => + option.Allowed && string.Equals(option.Status, "ready", StringComparison.OrdinalIgnoreCase); + public async Task ResetAsync(UserLlmSelectionContext context, CancellationToken ct) { var current = await ReadCurrentAsync(context, ct).ConfigureAwait(false); diff --git a/agents/Aevatar.GAgents.NyxidChat/LlmSelection/NyxIdLlmServiceCatalogClient.cs b/agents/Aevatar.GAgents.NyxidChat/LlmSelection/NyxIdLlmServiceCatalogClient.cs index 9d9828f00..c1d949b10 100644 --- a/agents/Aevatar.GAgents.NyxidChat/LlmSelection/NyxIdLlmServiceCatalogClient.cs +++ b/agents/Aevatar.GAgents.NyxidChat/LlmSelection/NyxIdLlmServiceCatalogClient.cs @@ -1,16 +1,31 @@ +using System.Security.Cryptography; +using System.Text; using Aevatar.AI.ToolProviders.NyxId; using Aevatar.Studio.Application.Studio.Abstractions; using Aevatar.Studio.Application.Studio.Services; +using Microsoft.Extensions.Caching.Memory; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; namespace Aevatar.GAgents.NyxidChat.LlmSelection; public sealed class NyxIdLlmServiceCatalogClient : INyxIdLlmServiceCatalogClient { + private static readonly TimeSpan ProxyServicesCacheTtl = TimeSpan.FromSeconds(30); + private const string ProxyServicesCacheKeyPrefix = "nyxid-llm-svc:proxy-services:"; + private readonly NyxIdApiClient _nyxClient; + private readonly IMemoryCache _proxyServicesCache; + private readonly ILogger _logger; - public NyxIdLlmServiceCatalogClient(NyxIdApiClient nyxClient) + public NyxIdLlmServiceCatalogClient( + NyxIdApiClient nyxClient, + IMemoryCache proxyServicesCache, + ILogger? logger = null) { _nyxClient = nyxClient ?? throw new ArgumentNullException(nameof(nyxClient)); + _proxyServicesCache = proxyServicesCache ?? throw new ArgumentNullException(nameof(proxyServicesCache)); + _logger = logger ?? NullLogger.Instance; } public async Task GetServicesAsync( @@ -22,7 +37,8 @@ public async Task GetServicesAsync( ArgumentException.ThrowIfNullOrWhiteSpace(accessToken); var response = await _nyxClient.GetLlmServicesAsync(accessToken, ct).ConfigureAwait(false); - return NyxIdLlmServiceCatalogParser.ParseServicesResult(response); + var result = NyxIdLlmServiceCatalogParser.ParseServicesResult(response); + return await MergeProxyRouteCandidatesAsync(result, accessToken, ct).ConfigureAwait(false); } public async Task GetSetupHintAsync( @@ -49,4 +65,61 @@ public async Task ProvisionAsync( .ConfigureAwait(false); return NyxIdLlmServiceCatalogParser.ParseProvisionedService(response); } + + private async Task MergeProxyRouteCandidatesAsync( + NyxIdLlmServicesResult result, + string accessToken, + CancellationToken ct) + { + try + { + var proxyServices = await DiscoverProxyServicesCachedAsync(accessToken, ct).ConfigureAwait(false); + return NyxIdLlmServiceCatalogParser.MergeProxyRouteCandidates(result, proxyServices); + } + catch (OperationCanceledException) + { + throw; + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Failed to merge NyxID proxy services into LLM route catalog"); + return result; + } + } + + /// + /// Cache the per-user /api/v1/proxy/services response for a short TTL so a flurry + /// of /model invocations from the same user collapses onto one upstream call. We use + /// rather than a singleton dictionary so the cache backing + /// store is shared, sized, and evicted per the host's standard memory-cache policy + /// (CLAUDE.md §"中间层状态约束" — services don't own per-caller state directly). + /// + private async Task DiscoverProxyServicesCachedAsync( + string accessToken, + CancellationToken ct) + { + var cacheKey = ProxyServicesCacheKeyPrefix + ComputeTokenFingerprint(accessToken); + if (_proxyServicesCache.TryGetValue(cacheKey, out string? cached) && + !string.IsNullOrEmpty(cached)) + { + return cached; + } + + var response = await _nyxClient.DiscoverProxyServicesAsync(accessToken, ct).ConfigureAwait(false); + // Size is not set on the entry — IMemoryCache only enforces Size when the host + // configured a SizeLimit on MemoryCacheOptions. The cache backing store is owned + // by the host (we register IMemoryCache via AddMemoryCache, no per-entry size + // policy from us), so leave eviction to the host's TimeBasedExpiration default. + _proxyServicesCache.Set( + cacheKey, + response, + new MemoryCacheEntryOptions + { + AbsoluteExpirationRelativeToNow = ProxyServicesCacheTtl, + }); + return response; + } + + private static string ComputeTokenFingerprint(string accessToken) => + Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(accessToken))); } diff --git a/agents/Aevatar.GAgents.NyxidChat/NyxIdChatEndpoints.Relay.cs b/agents/Aevatar.GAgents.NyxidChat/NyxIdChatEndpoints.Relay.cs index 9f4c40c2e..2d09a1742 100644 --- a/agents/Aevatar.GAgents.NyxidChat/NyxIdChatEndpoints.Relay.cs +++ b/agents/Aevatar.GAgents.NyxidChat/NyxIdChatEndpoints.Relay.cs @@ -28,6 +28,7 @@ public static partial class NyxIdChatEndpoints private static async Task HandleRelayWebhookAsync( HttpContext http, [FromServices] IActorRuntime actorRuntime, + [FromServices] IActorDispatchPort actorDispatchPort, [FromServices] NyxIdRelayTransport relayTransport, [FromServices] NyxIdRelayAuthValidator relayAuthValidator, [FromServices] Aevatar.GAgents.Channel.NyxIdRelay.NyxIdRelayOptions relayOptions, @@ -129,13 +130,10 @@ private static async Task HandleRelayWebhookAsync( Id = Guid.NewGuid().ToString("N"), Timestamp = Timestamp.FromDateTimeOffset(DateTimeOffset.UtcNow), Payload = Any.Pack(relayInbound), - Route = new EnvelopeRoute - { - Direct = new DirectRoute { TargetActorId = actorId }, - }, + Route = EnvelopeRouteSemantics.CreateDirect("nyxid-chat.relay", actorId), }; - await actor.HandleEventAsync(command, ct); + await actorDispatchPort.DispatchAsync(actor.Id, command, ct); logger.LogInformation( "Accepted relay callback into channel conversation backbone: message={MessageId}, actor={ActorId}, platform={Platform}, activity={ActivityType}", diff --git a/agents/Aevatar.GAgents.NyxidChat/NyxIdChatEndpoints.Streaming.cs b/agents/Aevatar.GAgents.NyxidChat/NyxIdChatEndpoints.Streaming.cs index b35820524..56734e7aa 100644 --- a/agents/Aevatar.GAgents.NyxidChat/NyxIdChatEndpoints.Streaming.cs +++ b/agents/Aevatar.GAgents.NyxidChat/NyxIdChatEndpoints.Streaming.cs @@ -21,6 +21,7 @@ private static async Task HandleStreamMessageAsync( string actorId, NyxIdChatStreamRequest request, [FromServices] IActorRuntime actorRuntime, + [FromServices] IActorDispatchPort actorDispatchPort, [FromServices] IScopeResourceAdmissionPort admissionPort, [FromServices] IActorEventSubscriptionProvider subscriptionProvider, [FromServices] ILoggerFactory loggerFactory, @@ -114,7 +115,7 @@ await NyxIdChatStreamingRunner.RunAsync( }, }; - await actor.HandleEventAsync(envelope, runCt); + await actorDispatchPort.DispatchAsync(actor.Id, envelope, runCt); }, mapAndWriteEventAsync: MapAndWriteEventAsync, errorMessages: new NyxIdChatStreamingRunner.ErrorMessages( @@ -209,6 +210,7 @@ private static async Task HandleApproveAsync( string actorId, NyxIdApprovalRequest request, [FromServices] IActorRuntime actorRuntime, + [FromServices] IActorDispatchPort actorDispatchPort, [FromServices] IScopeResourceAdmissionPort admissionPort, [FromServices] IActorEventSubscriptionProvider subscriptionProvider, [FromServices] ILoggerFactory loggerFactory, @@ -289,7 +291,7 @@ await NyxIdChatStreamingRunner.RunAsync( }, }; - await actor.HandleEventAsync(envelope, runCt); + await actorDispatchPort.DispatchAsync(actor.Id, envelope, runCt); }, mapAndWriteEventAsync: MapAndWriteEventAsync, errorMessages: new NyxIdChatStreamingRunner.ErrorMessages( diff --git a/agents/Aevatar.GAgents.NyxidChat/NyxIdChatServiceDefaults.cs b/agents/Aevatar.GAgents.NyxidChat/NyxIdChatServiceDefaults.cs index 4661de04b..0949cc5a8 100644 --- a/agents/Aevatar.GAgents.NyxidChat/NyxIdChatServiceDefaults.cs +++ b/agents/Aevatar.GAgents.NyxidChat/NyxIdChatServiceDefaults.cs @@ -8,6 +8,7 @@ public static class NyxIdChatServiceDefaults public const string ActorIdPrefix = "nyxid-chat"; public const string ActorsFileName = "actors"; public const string ProviderName = "nyxid"; + public const string ModelSelfHealPublisherActorId = "nyxid-chat.model.self-heal"; public static string GenerateActorId() => $"{ActorIdPrefix}-{Guid.NewGuid():N}"; diff --git a/agents/Aevatar.GAgents.NyxidChat/ServiceCollectionExtensions.cs b/agents/Aevatar.GAgents.NyxidChat/ServiceCollectionExtensions.cs index 151a082ae..f4f2c280d 100644 --- a/agents/Aevatar.GAgents.NyxidChat/ServiceCollectionExtensions.cs +++ b/agents/Aevatar.GAgents.NyxidChat/ServiceCollectionExtensions.cs @@ -1,5 +1,6 @@ using System.Runtime.CompilerServices; using Aevatar.AI.Abstractions.Middleware; +using Aevatar.AI.ToolProviders.Lark; using Aevatar.GAgents.Channel.Abstractions; using Aevatar.GAgents.Channel.Abstractions.Slash; using Aevatar.GAgents.Channel.NyxIdRelay; @@ -9,7 +10,7 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection.Extensions; -using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; namespace Aevatar.GAgents.NyxidChat; @@ -19,6 +20,7 @@ public static IServiceCollection AddNyxIdChat(this IServiceCollection services, { ArgumentNullException.ThrowIfNull(services); RuntimeHelpers.RunClassConstructor(typeof(NyxIdChatGAgent).TypeHandle); + RuntimeHelpers.RunClassConstructor(typeof(AgentRunGAgent).TypeHandle); services.AddHttpClient(); services.TryAddSingleton(provider => BindRelayOptions(configuration)); @@ -34,13 +36,33 @@ public static IServiceCollection AddNyxIdChat(this IServiceCollection services, services.TryAddSingleton(); services.TryAddSingleton(); - // ─── Channel LLM reply inbox runtime + hosted service ─── - services.TryAddSingleton(); - services.TryAddSingleton(sp => sp.GetRequiredService()); - services.TryAddEnumerable(ServiceDescriptor.Singleton()); + // ─── Channel LLM reply run dispatch ─── + services.TryAddSingleton(); // ─── Conversation turn-runner override + reply generator ─── services.Replace(ServiceDescriptor.Singleton()); + // The CardKit runner depends on Aevatar.AI.ToolProviders.Lark services. AddNyxIdChat() + // does not transitively register them — production hosts also call AddLarkTools() — + // so resolve via factory and gracefully fall back to the no-op runner when Lark + // tooling is absent. This keeps CardKit dormant for hosts that opt out of Lark + // instead of failing DI validation at startup. + var existingCardRunner = services.LastOrDefault(static descriptor => + descriptor.ServiceType == typeof(IConversationCardTurnRunner)); + if (existingCardRunner is null || + existingCardRunner.ImplementationType == typeof(NullConversationCardTurnRunner)) + { + services.Replace(ServiceDescriptor.Singleton(sp => + { + var cardKit = sp.GetService(); + var lark = sp.GetService(); + if (cardKit is null || lark is null) + return new NullConversationCardTurnRunner(); + return new ChannelCardConversationTurnRunner( + cardKit, + lark, + sp.GetRequiredService>()); + })); + } services.TryAddSingleton(); // ─── LLM-call middleware that injects channel context into LLM requests ─── @@ -54,6 +76,10 @@ public static IServiceCollection AddNyxIdChat(this IServiceCollection services, // Registered here (not in Channel.Identity) because the handler depends // on Studio.Application UserConfig ports; Channel.Identity intentionally // does not pull Studio dependencies. + // Catalog client uses IMemoryCache for the proxy-services TTL cache. AddMemoryCache + // is idempotent: hosts that already registered MemoryCacheOptions keep control of + // cache size/compaction behavior; hosts that did not register one get the default. + services.AddMemoryCache(); services.TryAddSingleton(); // These are consumed by singleton turn-runner/slash handlers. They create // short scopes internally for UserConfig ports instead of capturing diff --git a/agents/Aevatar.GAgents.NyxidChat/Skills/system-prompt.md b/agents/Aevatar.GAgents.NyxidChat/Skills/system-prompt.md index b8bde68d5..09c6ff71f 100644 --- a/agents/Aevatar.GAgents.NyxidChat/Skills/system-prompt.md +++ b/agents/Aevatar.GAgents.NyxidChat/Skills/system-prompt.md @@ -29,7 +29,41 @@ Rules: - Only ask the user a follow-up question when required inputs are genuinely missing and cannot be inferred. - After tool results arrive, continue to the next required tool call or give the user the concrete result. -## Capability Tools (Doing Things) +## Skills (CRITICAL — NyxID and Ornn knowledge lives here) + +This prompt deliberately keeps the NyxID and Ornn user manuals **out of the system prompt** and on the Ornn skill platform instead, so curators can update those manuals without redeploying the bot. You learn the canonical, up-to-date usage by loading the relevant skill. + +**Before doing any of the following, call `use_skill(skill="nyxid")` first** to load the authoritative NyxID manual: +- Account / profile / MFA / sessions / consents +- Service catalog browsing, connecting a new service (OAuth / device-code / API key flows) +- API key, node, organization, approval, notification management +- Diagnosing NyxID error codes (`approval_required`, `unauthorized`, `node_offline`, etc.) +- Anything that would otherwise need `nyxid_account`, `nyxid_status`, `nyxid_profile`, `nyxid_mfa`, `nyxid_sessions`, `nyxid_catalog`, `nyxid_services`, `nyxid_endpoints`, `nyxid_external_keys`, `nyxid_api_keys`, `nyxid_nodes`, `nyxid_approvals`, `nyxid_notifications`, `nyxid_providers`, `nyxid_orgs`, `nyxid_admin`, `nyxid_search_capabilities`, `nyxid_proxy_execute` + +**Before driving the Ornn API directly via the AI Agent CLI, call `use_skill(skill="ornn-agent-manual-cli")`** to load the Ornn agent manual. + +`use_skill` caches the loaded instructions in-process for ~5 minutes; after that window the next call refetches from Ornn so curator updates land within 5 minutes without a redeploy. + +### Proactive skill discovery + +When the user mentions a named skill or asks for a specialized capability (translation, summarization, network/device inventory, scraping, scheduling, content drafting, code review, domain workflows, etc.), call `ornn_search_skills` to find a matching skill and then `use_skill` to load it. Treat the loaded skill's instructions as authoritative for that task. + +Triggers: +- User issues `/daily` or `/daily ...` — do not search; immediately call `use_skill` with `skill="chrono-ai-daily"` and `args` set to the text after `/daily`, then follow that skill. +- User quotes a skill name (`'translate-pro'`, `"sg-office-network"`) +- User uses a slug-like or Title Case identifier that could be a skill name +- User issues another `/` slash command that isn't an in-tree relay command (the in-tree ones are `/route`, `/models`, `/model`, `/agents`, `/agent-status`, `/run-agent`, `/disable-agent`, `/enable-agent`, `/delete-agent`) — treat the command name as the skill query (`/invoice` → search "invoice") +- User says "挂载/mount/use/load this skill" or names a domain workflow + +Only fall back to `nyxid_proxy` / generic API discovery when no skill matches. + +### Quick reference + +- **Search**: `ornn_search_skills` — keywords or skill name; `scope=public|private|mixed` +- **Activate**: `use_skill skill=""` — loads instructions + associated files +- **Follow**: once loaded, the skill's instructions take precedence over generic guidance for that task + +## Capability Tools (the universal primitives) ### code_execute — Run Code Execute Python, JavaScript, TypeScript, or Bash in a sandboxed environment. Returns stdout, stderr, and exit code. Use this for calculations, data processing, format conversion, testing code snippets, etc. @@ -39,45 +73,16 @@ Make HTTP requests to any connected service. NyxID injects credentials automatic - Omit slug → discover all proxyable services with proxy URLs - Provide slug + path + method + body → make the proxied request -**Critical**: Proxy paths are relative to the service's base URL (shown in ``). Do NOT duplicate version prefixes already in the base URL. +**Critical**: Proxy paths are relative to the service's base URL (shown in ``). Do NOT duplicate version prefixes already in the base URL. For NyxID-specific service paths, OAuth/device/API-key connection flows, error code semantics, and conventions, **load `use_skill(skill="nyxid")` first** instead of guessing. ### Channel Bots — Messaging Use `nyxid_proxy` with a Telegram/Discord bot's slug to send messages. For Telegram: POST `/sendMessage` with `{"chat_id":"...","text":"..."}`. -## Account & Service Management Tools - -### Account -- **nyxid_account** — View user profile and account status -- **nyxid_status** — Comprehensive overview (user + services + API keys + nodes) -- **nyxid_profile** — Update display name, delete account, manage OAuth consents -- **nyxid_mfa** — Setup/verify TOTP multi-factor authentication -- **nyxid_sessions** — List active login sessions - -### Services -- **nyxid_catalog** — Browse service templates (list all, or show details for a slug) -- **nyxid_services** — Manage connected services: list, show, create, update, delete, rotate_credential, route -- **nyxid_endpoints** — Manage service base URLs: list, update, delete -- **nyxid_external_keys** — Manage external API credentials: list, rotate, delete - -### Security & Access -- **nyxid_api_keys** — Manage NyxID API keys: list, show, create, rotate, delete, update -- **nyxid_nodes** — Manage on-premise nodes: list, show, delete, register_token, rotate_token -- **nyxid_approvals** — Manage approvals: list/show requests, approve/deny, grants, per-service config -- **nyxid_notifications** — Notification settings & Telegram integration -- **nyxid_llm_status** — Check available LLM providers and models -- **nyxid_providers** — Manage OAuth provider connections: list, connect, disconnect, credentials - -### Organizations -- **nyxid_orgs** — Manage NyxID organizations (shared credentials): list, show, create, update, delete, join, set_primary, member management (list/add/update/remove), invites (list/create/cancel) - -### Channel Bots & Events -- **channel_registrations** — List, provision, rebuild, repair, and delete Aevatar's local Lark relay registrations. Use this for Aevatar-managed Lark setup, for rebuilding the local read model from the authoritative actor state, and for restoring the local mirror when Nyx relay resources already exist -- **agent_delivery_targets** — Manage agent delivery target mappings used by workflow human approval/input cards and other outbound channel delivery -- **agent_builder** — Create and manage Day One persistent automation agents in Feishu private chat. Internal tool actions: `list_templates`, `create_agent`, `list_agents`, `agent_status`, `run_agent`, `disable_agent`, `enable_agent`, `delete_agent`. Internal template names (used only inside `create_agent` arguments): `daily_report`, `social_media`. **When talking to the user, always use the slash-command names — never surface the internal template names `daily_report` / `social_media`.** User-facing slash commands: `/daily [github_username]`, `/social-media `, `/agents`, `/agent-status `, `/run-agent `, `/disable-agent `, `/enable-agent `, `/delete-agent confirm`. -- **nyxid_channel_bots** — NyxID-native channel bot management: inspect/register/verify/delete bots and manage conversation routes directly via NyxID API. Use this to inspect existing Nyx Lark bot/route state or register Nyx-native fields such as `verification_token` -- **nyxid_channel_events** — Push device/analyzer events through the NyxID HTTP Event Gateway to agent conversations - -### LLM Route Selection +## Aevatar-specific tools + +These are **aevatar-internal** tools, not on Ornn's `nyxid` skill — they manage state local to this aevatar deployment. + +### LLM Route Selection (slash commands) The relay handles LLM route selection deterministically, without an LLM round-trip. User-facing commands: - `/route` or `/models` — list NyxID services that NyxID says are usable as LLM providers, including status/source/model hints. @@ -85,235 +90,74 @@ The relay handles LLM route selection deterministically, without an LLM round-tr - `/model use ` — keep the current route and only override the model. - `/model reset` — clear the sender's route/model preference and fall back to the bot default. -### Admin -- **nyxid_admin** — Administrative commands (admin role required): manage invite codes (list, create, deactivate) - -### API Discovery (Fallback) -- **nyxid_search_capabilities** — Search NyxID API capabilities by natural language query. Returns matching operations with method, path, and parameters. Use this to discover endpoints not covered by specialized tools -- **nyxid_proxy_execute** — Execute a NyxID API operation discovered via nyxid_search_capabilities. Validates parameters against cached OpenAPI spec before sending - -## Connecting New Services - -All connection info comes from the catalog entry. Use `nyxid_catalog action=show slug=` and read: - -| Field | Meaning | -|-------|---------| -| `provider_type` | Connection method: `oauth2`, `device_code`, `api_key` | -| `credential_mode` | Who provides OAuth app: `admin` (platform) or `user` (user must provide) | -| `provider_config_id` | Provider ID for OAuth/device-code | -| `api_key_instructions` | How to get an API key (display as-is) | -| `api_key_url` | Where to get the key (clickable link) | -| `requires_gateway_url` | If true, user must also provide endpoint URL | - -### OAuth Flow -1. Check `nyxid_providers action=list` for existing connection -2. If `credential_mode=user`: check/set credentials via `nyxid_providers action=get_credentials/set_credentials` - - Callback URL: `https://nyx-api.chrono-ai.fun/api/v1/providers/callback` -3. `nyxid_providers action=connect_oauth provider_id=` → give user the authorization URL -4. Verify with `nyxid_providers action=list` - -### Device Code Flow -1. `nyxid_providers action=connect_device_code provider_id=` → tell user to visit URL and enter code -2. Poll: `nyxid_providers action=poll_device_code provider_id= state=` -3. Verify with `nyxid_providers action=list` - -### API Key Flow -1. Guide user with catalog's `api_key_instructions` and `api_key_url` -2. `nyxid_services action=create service_slug= credential= label=` -3. Test with a simple read-only proxy request - -If user asks to connect a service and you don't know the slug, browse with `nyxid_catalog action=list`. - -## Channel Bot Setup (Lark via Nyx Relay) +### channel_registrations (Aevatar's local Lark mirror) Aevatar owns the local runtime and registration mirror. For Lark, webhook ingress goes through NyxID first, then NyxID relays callbacks into Aevatar. Nyx owns the platform bot, route, and relay API key; Aevatar owns the local registration mirror used by the runtime. Do not assume `channel_registrations action=list` being empty means the Nyx bot is missing. -### Lark Stage 1: New provisioning - -Use this stage when the user wants the bot connected for inbound Lark messages and basic relay replies. -Do not block this stage on typed Lark tools, delivery target bindings, or proactive outbound setup. - -Register channel bot in Aevatar: +**Stage 1: New provisioning** — when the user wants the bot connected for inbound Lark messages and basic relay replies. Do not block on typed Lark tools or proactive outbound setup. `channel_registrations action=register_lark_via_nyx app_id= app_secret= verification_token= webhook_base_url=https://` -`verification_token` is optional in the tool contract, but when the user has it or the Nyx backend requires it, pass it through. - -→ This returns the registration ID, the Nyx relay callback URL, and the Nyx webhook URL that must be configured in the Lark developer console. - -Configure the platform webhook: - -**Lark/Feishu:** 开发者后台 → 事件与回调 → 事件配置 → 请求地址: -`` - -Add events: -- `im.message.receive_v1` -- `card.action.trigger` - -### Lark Stage 2: Repair an existing bot +→ Returns the registration ID, the Nyx relay callback URL, and the Nyx webhook URL that must be configured in 开发者后台 → 事件与回调 → 事件配置 → 请求地址. -Use this stage when Nyx already has the Lark bot and route, but Aevatar no longer replies or `channel_registrations action=list` is empty. +Add events: `im.message.receive_v1`, `card.action.trigger`. -First try rebuilding the local registration read model from the authoritative actor state: +**Stage 2: Repair an existing bot** — when Nyx already has the Lark bot/route but Aevatar no longer replies or `channel_registrations action=list` is empty. -`channel_registrations action=rebuild_projection` +1. `channel_registrations action=rebuild_projection` — rebuild local read model from authoritative actor state. +2. Inspect Nyx-side first: `nyxid_channel_bots action=list` / `show` / `routes`. (For NyxID-side details, `use_skill(skill="nyxid")`.) +3. If Nyx is healthy but local list still empty, restore the local mirror: + `channel_registrations action=repair_lark_mirror registration_id= credential_ref= webhook_base_url=https:// nyx_channel_bot_id= nyx_agent_api_key_id= nyx_conversation_route_id=` + `repair_lark_mirror` must preserve the existing relay credential reference. Reuse `registration_id` when its `vault://.../relay-hmac` secret still exists, or pass `credential_ref` explicitly. If neither is available, do not claim repair succeeded; tell the user to re-provision instead. -Inspect the Nyx side first: +**Stage 3: Advanced Lark capabilities** — only when the user needs proactive sends, typed Lark tools, delivery target bindings, spreadsheet appends, approval actions, or active chat lookup. Ensure NyxID has a usable Lark outbound provider slug (typically `api-lark-bot`); if not, `use_skill(skill="nyxid")` to drive the catalog connection flow. -- `nyxid_channel_bots action=list` -- `nyxid_channel_bots action=show id=` -- `nyxid_channel_bots action=routes channel_bot_id=` -- `nyxid_api_keys action=show id=` +For advanced Lark API operations outside the current relay reply, prefer typed tools: `lark_messages_send`, `lark_messages_search`, `lark_messages_batch_get`, `lark_messages_reactions_list`, `lark_messages_reactions_delete`, `lark_chats_lookup`, `lark_sheets_append_rows`, `lark_approvals_list`, `lark_approvals_act`. Fall back to `nyxid_proxy_execute` only when typed tools don't cover. -If the Nyx bot, route, and relay callback are correct but rebuild did not restore the local list, restore the local Aevatar mirror: +For inbound Lark relay turns that represent a fresh user message, do **not** call `lark_messages_reply`, `lark_messages_react`, or `nyxid_proxy_execute` to deliver the answer. Produce the final text reply directly; the channel runtime will send it through the Nyx relay reply token. -`channel_registrations action=repair_lark_mirror registration_id= credential_ref= webhook_base_url=https:// nyx_channel_bot_id= nyx_agent_api_key_id= nyx_conversation_route_id=` +Managing registrations: `list`, `rebuild_projection`, `repair_lark_mirror`, `delete id= confirm=true`. -`repair_lark_mirror` must preserve the existing relay credential reference. Reuse the old `registration_id` when its `vault://.../relay-hmac` secret still exists, or pass `credential_ref` explicitly. If neither is available, do not claim repair succeeded; tell the user to re-provision instead. +### agent_delivery_targets -If rebuild and mirror repair both succeed but `channel_registrations action=list` still stays empty, tell the user the local Aevatar registration projection/read model is unhealthy. +Workflow `human_approval`, `human_input`, `secure_input` steps can send Feishu delivery messages when the workflow step includes `delivery_target_id=`. For the Nyx relay path, these arrive as interactive cards in Lark/Feishu (with `/approve`, `/reject`, `/submit` as fallback commands). -### Lark Stage 3: Advanced Lark capabilities +Bind `agent_id` to the real outbound route: +- `agent_delivery_targets action=list` +- `agent_delivery_targets action=upsert agent_id= conversation_id= nyx_provider_slug= nyx_api_key=` +- `agent_delivery_targets action=delete agent_id= confirm=true` -Only use this stage when the user needs proactive sends, typed Lark tools, delivery target bindings, spreadsheet appends, approval actions, or active chat lookup. +`channel_registrations` configures inbound bot callbacks; `agent_delivery_targets` configures outbound agent delivery. Today the human-interaction delivery path supports `lark`. -Ensure NyxID has a usable Lark outbound provider slug, typically `api-lark-bot`: -`nyxid_services action=list` → check if the service exists -If not: `nyxid_catalog action=list` → find the slug → guide user to add it +### agent_builder (Day One persistent automation lifecycle) -For advanced Lark API operations that are not the current inbound relay reply, prefer typed tools such as: -- `lark_messages_send` -- `lark_messages_search` -- `lark_messages_batch_get` -- `lark_messages_reactions_list` -- `lark_messages_reactions_delete` -- `lark_chats_lookup` -- `lark_sheets_append_rows` -- `lark_approvals_list` -- `lark_approvals_act` +`agent_builder` manages the lifecycle of agents the user has already created. Recipes for *new* agents live as Ornn skills — match the user's intent against `ornn_search_skills` and follow the SKILL.md verbatim. `agent_builder` itself does not create agents. -Only call `lark_messages_reply` or `lark_messages_react` when the user explicitly asks you to reply to or react to a specific Lark message outside the current relay turn. +| Intent | Slash command | +|---|---| +| List agents | `/agents` | +| Inspect one agent | `/agent-status ` | +| Manual run | `/run-agent ` | +| Pause schedule | `/disable-agent ` | +| Resume schedule | `/enable-agent ` | +| Delete (two-step) | `/delete-agent confirm` | -Use generic `nyxid_proxy_execute` only when typed tools do not cover the operation. - -For inbound Lark relay turns that represent a fresh user message, do not call `lark_messages_reply`, `lark_messages_react`, or `nyxid_proxy_execute` to deliver the answer. Produce the final text reply directly; the channel runtime will send it through the Nyx relay reply token. - -When binding workflow delivery or proactive agent delivery, use a Lark outbound provider slug such as `api-lark-bot`. - -### Managing registrations - -- List: `channel_registrations action=list` -- Rebuild local registration projection: `channel_registrations action=rebuild_projection` -- Repair existing Lark mirror: `channel_registrations action=repair_lark_mirror registration_id= credential_ref= webhook_base_url=https:// nyx_channel_bot_id= nyx_agent_api_key_id= nyx_conversation_route_id=` -- Delete: `channel_registrations action=delete id= confirm=true` -- Inspect Nyx-native bot state: `nyxid_channel_bots action=show id=` and `nyxid_channel_bots action=routes channel_bot_id=` - -## Agent Delivery Targets - -Workflow `human_approval`, `human_input`, and `secure_input` steps can send Feishu delivery messages when the workflow step includes `delivery_target_id=`. - -For the Nyx relay path, these arrive as interactive cards in Lark/Feishu: -- `human_approval`: users can approve/reject directly from the card; `/approve ...` and `/reject ...` remain valid fallback commands -- `human_input` / `secure_input`: users can submit directly from the card; `/submit ...` remains a valid fallback command - -Use `agent_delivery_targets` to bind that `agent_id` to the real outbound route: -- List: `agent_delivery_targets action=list` -- Upsert: `agent_delivery_targets action=upsert agent_id= conversation_id= nyx_provider_slug= nyx_api_key=` -- Delete: `agent_delivery_targets action=delete agent_id= confirm=true` - -Notes: -- `channel_registrations` configures inbound bot callbacks -- `agent_delivery_targets` configures outbound agent delivery -- Today the human interaction delivery path supports `lark` - -## Agent Builder - -Use `agent_builder` when the user wants a persistent Day One automation agent in Feishu private chat. - -### User-facing vocabulary (critical) - -When you describe Day One to the user — capability summaries, suggested replies, example commands, help text — use the slash commands below, **not** the internal template names. `daily_report` and `social_media` are tool-argument identifiers; they are not commands the user types. If the user says something like "帮我建一个 daily_report" or "create a daily_report", treat that as intent for `/daily` and present your reply using `/daily`. - -| Intent | Slash command users type | Internal `template` (only for tool calls) | -|---|---|---| -| Daily GitHub summary | `/daily [github_username]` | `daily_report` | -| Social media draft + approval | `/social-media ` | `social_media` | -| List agents | `/agents` | — | -| Inspect one agent | `/agent-status ` | — | -| Manual run | `/run-agent ` | — | -| Pause schedule | `/disable-agent ` | — | -| Resume schedule | `/enable-agent ` | — | -| Delete (two-step) | `/delete-agent confirm` | — | - -`/daily` with no arguments pops an interactive card (GitHub username + schedule fields). `/daily ` saves the username as the user's default and runs the first report immediately — the ack message should say the first run is on its way, not just "scheduled for tomorrow". - -### Tool semantics - -- Creation is private-chat only; if the current chat is not `p2p`, tell the user to DM the bot. -- `create_agent` with `template=daily_report` provisions a `SkillRunnerGAgent` that sends plain-text GitHub summaries back into the current private chat, plus a non-expiring NyxID API key for outbound delivery. -- `create_agent` with `template=social_media` provisions a workflow-backed scheduled agent that generates one draft and routes approval through the current supported human-interaction surface. -- `list_agents` and `agent_status` read the registry-backed current state. -- `run_agent` only works when the agent is enabled. -- `disable_agent` pauses scheduled execution without deleting the agent or revoking its API key. -- `enable_agent` resumes scheduled execution for a previously disabled agent. -- `delete_agent` disables the agent, revokes the NyxID API key, and tombstones the registry entry. -- The Nyx relay path handles the slash commands above directly (and renders the `/daily` and `/social-media` cards) without an LLM round-trip. You typically only see these flows when the user asks for them in natural language instead of typing the slash command. - -## Notifications & Approvals - -If a proxy request requires approval: -1. Tell user approval is pending -2. User approves via Telegram notification, NyxID mobile app, or `nyxid_approvals action=approve id=` - -Setup notifications: `nyxid_notifications action=telegram_link` / Mobile app: https://nyxid.onelink.me/REzJ/dql9w8fx - -## Node Agents - -Nodes keep credentials on user's infrastructure. NyxID routes requests through WebSocket. -- Register: `nyxid_nodes action=register_token name=` → install CLI → `nyxid node register` → `nyxid node daemon install` -- Route service: `nyxid_services action=route id= node_id=` - -## Error Handling - -| Error Code | Action | -|------------|--------| -| `approval_required` (7000) | Tell user to check approvals | -| `approval_failed` (7001) | Retry or check notification setup | -| `unauthorized` (1001) | User needs to re-login | -| `node_offline` (8001) | Check node status | +Tool semantics: `disable_agent` pauses scheduled execution without deleting; `enable_agent` resumes; `delete_agent` disables, revokes the NyxID API key, and tombstones the registry entry. The Nyx relay path handles these slash commands directly without an LLM round-trip — you typically only see these flows when the user asks for them in natural language. ## Working Rules -- **Be proactive and autonomous**: DO IT immediately, don't ask for confirmation -- **Probe unknown services**: Try `GET /openapi.json`, `GET /docs`, or `GET /api` to discover endpoints -- Always check `` before assuming a slug exists -- Keep request bodies minimal and service-correct -- Credentials the user pastes to configure a service (App ID, App Secret, API key, token, Verification Token) are expected input — accept them and call the right tool (`nyxid_providers action=set_credentials`, `nyxid_services action=create`, `nyxid_external_keys`, `channel_registrations`). NyxID web cannot configure most providers, so chat is the required path. Never refuse citing "secret leakage" or ask the user to redact. +- **Be proactive and autonomous**: DO IT immediately, don't ask for confirmation. +- **Probe unknown services**: if `` lists a slug you've never used, try `GET /openapi.json`, `GET /docs`, or `GET /api` to discover endpoints. +- Always check `` before assuming a slug exists. +- Keep request bodies minimal and service-correct. +- Credentials the user pastes to configure a service (App ID, App Secret, API key, token, Verification Token) are expected input — accept them and call the right tool. NyxID web cannot configure most providers, so chat is the required path. Never refuse citing "secret leakage" or ask the user to redact. (For the right tool to call, `use_skill(skill="nyxid")` is the reference.) - Don't echo raw credentials back in replies, log them in tool descriptions, or paste them into unrelated tool calls. Confirm success without restating the secret. -- When something fails, check the error and try alternatives before asking the user -- Connect services in-chat using the catalog-driven flow -- Read all guidance from the catalog entry — don't hardcode service-specific instructions - -## Skills - -You have access to skills — specialized instruction sets for tasks like translation, content generation, data analysis, code review, etc. - -### Proactive Skill Discovery - -**Proactively search for relevant skills** when the user's request involves a specialized task: -1. Call `ornn_search_skills` with relevant keywords to check for matching skills -2. If found, load with `use_skill` and follow its instructions -3. If no match, proceed with general capabilities - -### Using Skills -- **Search**: `ornn_search_skills` with keywords -- **Activate**: `use_skill` with the skill name -- **Follow**: Once loaded, follow the skill's instructions -- **Explicit requests**: If user says "挂载/mount/use" a skill, load it immediately +- When something fails, check the error and try alternatives before asking the user. +- Do not say a task is done or completed unless the required tool/service action actually succeeded. If you have only planned, discovered, or started work, say that clearly instead. ### Already Available Skills -Skills listed at the end of this prompt are pre-loaded and ready to use. Match the user's intent to the skill descriptions below. +Skills listed at the end of this prompt (when present) are already loaded and ready to invoke via `use_skill`. Match the user's intent to those descriptions before searching. diff --git a/agents/Aevatar.GAgents.NyxidChat/Slash/ModelChannelSlashCommandHandler.cs b/agents/Aevatar.GAgents.NyxidChat/Slash/ModelChannelSlashCommandHandler.cs index 99a57c268..90d652702 100644 --- a/agents/Aevatar.GAgents.NyxidChat/Slash/ModelChannelSlashCommandHandler.cs +++ b/agents/Aevatar.GAgents.NyxidChat/Slash/ModelChannelSlashCommandHandler.cs @@ -1,8 +1,11 @@ +using Aevatar.Foundation.Abstractions; using Aevatar.GAgents.Channel.Abstractions; using Aevatar.GAgents.Channel.Abstractions.Slash; +using Aevatar.GAgents.Channel.Identity; using Aevatar.GAgents.Channel.Identity.Abstractions; using Aevatar.GAgents.NyxidChat.LlmSelection; using Aevatar.Studio.Application.Studio.Abstractions; +using Google.Protobuf.WellKnownTypes; using Microsoft.Extensions.Logging; namespace Aevatar.GAgents.NyxidChat.Slash; @@ -18,15 +21,18 @@ public sealed class ModelChannelSlashCommandHandler : IChannelSlashCommandHandle private readonly IUserLlmOptionsService? _optionsService; private readonly IUserLlmSelectionService? _selectionService; private readonly IUserLlmOptionsRenderer? _renderer; + private readonly IActorDispatchPort _actorDispatchPort; private readonly ILogger _logger; public ModelChannelSlashCommandHandler( ILogger logger, + IActorDispatchPort actorDispatchPort, IUserLlmOptionsService? optionsService = null, IUserLlmSelectionService? selectionService = null, IUserLlmOptionsRenderer? renderer = null) { _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + _actorDispatchPort = actorDispatchPort ?? throw new ArgumentNullException(nameof(actorDispatchPort)); _optionsService = optionsService; _selectionService = selectionService; _renderer = renderer; @@ -76,15 +82,30 @@ await _optionsService.GetOptionsAsync(BuildQuery(context, bindingId), ct).Config } catch (BindingNotFoundException) { - return new MessageContent { Text = "当前 NyxID 绑定不可用,请先发送 /init 重新绑定。" }; + return await SelfHealRevokedBindingAsync( + context, + reason: "auto_self_heal_remote_not_found", + submittedMessage: "NyxID 端 binding 已不可用,本地清理已提交。请稍后发送 /init 完成新绑定。", + degradedMessage: "NyxID 端 binding 已不可用,本地清理提交失败。请稍后重试 /models,或发送 /unbind 后再发送 /init 重新绑定。", + ct).ConfigureAwait(false); } catch (BindingRevokedException) { - return new MessageContent { Text = "当前 NyxID 绑定已失效,请先发送 /init 重新绑定。" }; + return await SelfHealRevokedBindingAsync( + context, + reason: "auto_self_heal_remote_revoked", + submittedMessage: "NyxID 端 binding 已失效,本地清理已提交。请稍后发送 /init 完成新绑定。", + degradedMessage: "NyxID 端 binding 已失效,本地清理提交失败。请稍后重试 /models,或发送 /unbind 后再发送 /init 重新绑定。", + ct).ConfigureAwait(false); } catch (BindingScopeMismatchException) { - return new MessageContent { Text = "当前 NyxID 绑定缺少 LLM route 权限,请先发送 /init 重新绑定。" }; + return await SelfHealRevokedBindingAsync( + context, + reason: "auto_self_heal_scope_mismatch", + submittedMessage: "当前 NyxID 绑定缺少 LLM route 权限,本地清理已提交。请稍后发送 /init 完成新绑定。", + degradedMessage: "当前 NyxID 绑定缺少 LLM route 权限,本地清理提交失败。请稍后重试 /models,或发送 /unbind 后再发送 /init 重新绑定。", + ct).ConfigureAwait(false); } catch (Exception ex) when (ex is InvalidOperationException or ArgumentException or HttpRequestException or NotSupportedException) { @@ -93,6 +114,85 @@ await _optionsService.GetOptionsAsync(BuildQuery(context, bindingId), ct).Config } } + /// + /// Submits a local binding revoke when NyxID reports the binding is gone + /// (revoked / not_found / scope-mismatch). This is intentionally dispatch + /// only: the slash request path must not activate projection scopes or + /// wait for read-model materialization. The user is told cleanup has been + /// submitted and can retry /init after the projection catches up. + /// + /// + /// Differs from explicit /unbind because the NyxID-side revoke is + /// already known; we only need to flip the local actor, with one retry for + /// transient dispatch failure. + /// + private async Task SelfHealRevokedBindingAsync( + ChannelSlashCommandContext context, + string reason, + string submittedMessage, + string degradedMessage, + CancellationToken ct) + { + var submitted = await TryDispatchLocalBindingRevokeAsync(context, reason, ct).ConfigureAwait(false); + return new MessageContent { Text = submitted ? submittedMessage : degradedMessage }; + } + + private async Task TryDispatchLocalBindingRevokeAsync( + ChannelSlashCommandContext context, + string reason, + CancellationToken ct) + { + var actorId = context.Subject.ToActorId(); + + // Single retry mirrors /unbind: a one-off dispatch hiccup should not + // leave the user permanently stuck with a stale local binding. + Exception? lastError = null; + for (var attempt = 1; attempt <= 2; attempt++) + { + try + { + var envelope = new EventEnvelope + { + Id = Guid.NewGuid().ToString("N"), + Timestamp = Timestamp.FromDateTimeOffset(DateTimeOffset.UtcNow), + Payload = Any.Pack(new RevokeBindingCommand + { + ExternalSubject = context.Subject.Clone(), + Reason = reason, + }), + Route = EnvelopeRouteSemantics.CreateDirect( + NyxIdChatServiceDefaults.ModelSelfHealPublisherActorId, + actorId), + }; + await _actorDispatchPort + .DispatchAsync(actorId, envelope, ct) + .ConfigureAwait(false); + _logger.LogWarning( + "/model submitted local binding self-heal actor={ActorId} after NyxID-side rejection: reason={Reason}, attempt={Attempt}/2, subject={Platform}:{Tenant}:{User}", + actorId, + reason, + attempt, + context.Subject.Platform, context.Subject.Tenant, context.Subject.ExternalUserId); + return true; + } + catch (Exception ex) when (!ct.IsCancellationRequested) + { + lastError = ex; + _logger.LogWarning(ex, + "/model: local binding self-heal dispatch failed on attempt {Attempt}/2 for actor={ActorId}, reason={Reason}", + attempt, + actorId, + reason); + } + } + + _logger.LogError(lastError, + "/model failed to self-heal local binding actor={ActorId} after 2 attempts; reason={Reason}. User has been told to /unbind manually.", + actorId, + reason); + return false; + } + private async Task HandleUseAsync( ChannelSlashCommandContext context, string bindingId, @@ -310,16 +410,32 @@ private static bool TryResolveNumberedOption( .Where(option => option.ServiceSlug.Contains(requested, StringComparison.OrdinalIgnoreCase) || option.DisplayName.Contains(requested, StringComparison.OrdinalIgnoreCase)) - .Take(2) .ToArray(); + var selectable = fuzzy.Where(IsSelectable).Take(2).ToArray(); + if (selectable.Length == 1) + return selectable[0]; + return fuzzy.Length == 1 ? fuzzy[0] : null; } - private static UserLlmOption? FindExactOption(string requested, IReadOnlyList available) => - available.FirstOrDefault(option => - string.Equals(option.ServiceId, requested, StringComparison.OrdinalIgnoreCase) || - string.Equals(option.ServiceSlug, requested, StringComparison.OrdinalIgnoreCase) || - string.Equals(option.DisplayName, requested, StringComparison.OrdinalIgnoreCase)); + private static UserLlmOption? FindExactOption(string requested, IReadOnlyList available) + { + var matches = available + .Where(option => + string.Equals(option.ServiceId, requested, StringComparison.OrdinalIgnoreCase) || + string.Equals(option.ServiceSlug, requested, StringComparison.OrdinalIgnoreCase) || + string.Equals(option.DisplayName, requested, StringComparison.OrdinalIgnoreCase)) + .ToArray(); + + var selectable = matches.Where(IsSelectable).Take(2).ToArray(); + if (selectable.Length == 1) + return selectable[0]; + + return matches.FirstOrDefault(); + } + + private static bool IsSelectable(UserLlmOption option) => + option.Allowed && string.Equals(option.Status, "ready", StringComparison.OrdinalIgnoreCase); private static bool TryResolveExactOptionPrefix( string requested, diff --git a/agents/Aevatar.GAgents.NyxidChat/protos/agent_run.proto b/agents/Aevatar.GAgents.NyxidChat/protos/agent_run.proto new file mode 100644 index 000000000..7b23152c8 --- /dev/null +++ b/agents/Aevatar.GAgents.NyxidChat/protos/agent_run.proto @@ -0,0 +1,130 @@ +syntax = "proto3"; + +package aevatar.gagents.nyxid_chat; + +option csharp_namespace = "Aevatar.GAgents.NyxidChat"; + +import "chat_activity.proto"; +import "conversation_events.proto"; + +enum AgentRunStatus { + AGENT_RUN_STATUS_UNSPECIFIED = 0; + AGENT_RUN_STATUS_STARTED = 1; + // The LLM run has produced an immutable reply payload (success or failure + // terminal state) and persisted it, but the LlmReplyReadyEvent has not yet + // been accepted by the target conversation actor. Output-dispatch retries + // that happen in this status must re-deliver the persisted payload rather + // than re-run the LLM / tool chain. + AGENT_RUN_STATUS_REPLY_PRODUCED = 2; + AGENT_RUN_STATUS_DROPPED = 3; + AGENT_RUN_STATUS_FAILED = 4; + // The LlmReplyReadyEvent has been accepted by the target conversation + // actor. This is the AgentRunGAgent-side terminal: from here on the run + // actor only schedules cleanup; it never re-runs the LLM or re-dispatches. + // NOTE: REPLY_HANDED_OFF != chain.delivered. The chain-level delivered + // phase (user-visible) is owned by ConversationGAgent and observed via + // ConversationGAgentState.last_reply_delivery; REPLY_HANDED_OFF is a + // necessary-not-sufficient precondition. See ADR-0021. + AGENT_RUN_STATUS_REPLY_HANDED_OFF = 5; +} + +message AgentRunGAgentState { + string run_id = 1; + string correlation_id = 2; + string target_actor_id = 3; + AgentRunStatus status = 4; + int64 started_at_unix_ms = 5; + int64 completed_at_unix_ms = 6; + string error_code = 7; + string error_summary = 8; + // Produced LLM reply payload, persisted before dispatch so output-dispatch + // retries never re-invoke the LLM/tool chain. + string produced_reply_text = 9; + aevatar.gagents.channel.abstractions.MessageContent produced_outbound = 10; + aevatar.gagents.channel.runtime.LlmReplyTerminalState produced_terminal_state = 11; + // Field 12 (`reply_dispatched`) was a bool flag promoted to the explicit + // AGENT_RUN_STATUS_REPLY_HANDED_OFF status in ADR-0021. Reserved here so + // accidental reuse of the field number, or a stale serializer built before + // the split, fails loudly instead of silently masking the new status. + reserved 12; + reserved "reply_dispatched"; + // Wall-clock when the terminal-state cleanup callback completed. Combined + // with status ∈ {DROPPED, FAILED, REPLY_HANDED_OFF}, a non-zero value + // marks the run as finalized (chain.finalized) — late ready/dropped/failed + // /cleanup signals must no-op from this point. + int64 cleanup_completed_at_unix_ms = 13; +} + +// Transient command for the run actor. The nested NeedsLlmReplyEvent may carry +// a short-lived relay reply_token; AgentRunGAgent must never persist that +// credential into AgentRunGAgentState or any AgentRun*Event. +message AgentRunStartRequested { + aevatar.gagents.channel.runtime.NeedsLlmReplyEvent request = 1; +} + +message AgentRunCleanupRequested { + string run_id = 1; + int64 requested_at_unix_ms = 2; +} + +message AgentRunStartedEvent { + string run_id = 1; + string correlation_id = 2; + string target_actor_id = 3; + int64 started_at_unix_ms = 4; +} + +message AgentRunReplyProducedEvent { + string run_id = 1; + string correlation_id = 2; + string target_actor_id = 3; + aevatar.gagents.channel.runtime.LlmReplyTerminalState terminal_state = 4; + string error_code = 5; + string error_summary = 6; + int64 produced_at_unix_ms = 7; + // Immutable reply payload produced by the LLM run. Persisting these lets the + // dispatch retry path re-deliver the same reply without re-running the LLM + // chain (which would otherwise repeat tool side-effects like SSH exec or + // external API calls and incur duplicate billing). + string reply_text = 8; + aevatar.gagents.channel.abstractions.MessageContent outbound = 9; +} + +// Persisted after the LlmReplyReadyEvent has been successfully delivered to +// the target conversation actor. Until this event lands, output-dispatch +// retries must re-deliver the persisted produced payload from +// AgentRunGAgentState rather than re-run the LLM / tool chain. +message AgentRunReplyDispatchedEvent { + string run_id = 1; + string correlation_id = 2; + string target_actor_id = 3; + int64 dispatched_at_unix_ms = 4; +} + +message AgentRunDroppedEvent { + string run_id = 1; + string correlation_id = 2; + string target_actor_id = 3; + string reason = 4; + int64 dropped_at_unix_ms = 5; +} + +message AgentRunFailedEvent { + string run_id = 1; + string correlation_id = 2; + string target_actor_id = 3; + string error_code = 4; + string error_summary = 5; + int64 failed_at_unix_ms = 6; +} + +// Persisted by the terminal-state cleanup callback after it has finished its +// idempotent work (in-memory token eviction, scheduler unregistration, etc.). +// Combined with terminal status, this event drives +// AgentRunGAgentState.cleanup_completed_at_unix_ms to a non-zero value and +// marks the run as finalized (chain.finalized) per ADR-0021. +message AgentRunCleanupCompletedEvent { + string run_id = 1; + string correlation_id = 2; + int64 completed_at_unix_ms = 3; +} diff --git a/agents/Aevatar.GAgents.Scheduled/DependencyInjection/ScheduledServiceCollectionExtensions.cs b/agents/Aevatar.GAgents.Scheduled/DependencyInjection/ScheduledServiceCollectionExtensions.cs index 9d775237d..4640ffbb3 100644 --- a/agents/Aevatar.GAgents.Scheduled/DependencyInjection/ScheduledServiceCollectionExtensions.cs +++ b/agents/Aevatar.GAgents.Scheduled/DependencyInjection/ScheduledServiceCollectionExtensions.cs @@ -5,7 +5,6 @@ using Aevatar.CQRS.Projection.Stores.Abstractions; using Aevatar.Foundation.Abstractions.Maintenance; using Aevatar.GAgents.Channel.Runtime; -using Aevatar.GAgents.Scheduled.WorkflowModules; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection.Extensions; @@ -71,7 +70,6 @@ public static IServiceCollection AddScheduledAgents( services.TryAddSingleton(); services.TryAddSingleton(); services.TryAddSingleton(); - services.TryAddSingleton(); // Caller-scope resolver chain (issue #466 §B). Channel resolver runs first so // a request with channel metadata produces the per-sender scope rather than // the looser nyxid-scoped tuple from the underlying NyxID session. @@ -108,12 +106,6 @@ public static IServiceCollection AddScheduledAgents( static doc => doc.Id, static key => key); } - // Register the scheduled-agent workflow module pack so the social_media template's - // `twitter_publish` step type resolves at workflow run time (issue #216). - // AddWorkflowModulePack uses TryAddEnumerable, so calling alongside AddAevatarWorkflow - // is idempotent. - services.AddScheduledWorkflowExtensions(); - return services; } diff --git a/agents/Aevatar.GAgents.Scheduled/IWorkflowAgentCommandPort.cs b/agents/Aevatar.GAgents.Scheduled/IWorkflowAgentCommandPort.cs deleted file mode 100644 index a94e97bcf..000000000 --- a/agents/Aevatar.GAgents.Scheduled/IWorkflowAgentCommandPort.cs +++ /dev/null @@ -1,27 +0,0 @@ -namespace Aevatar.GAgents.Scheduled; - -/// -/// Application-service surface for WorkflowAgent lifecycle. Mirrors -/// : owns actor lifecycle, catalog -/// projection priming, and envelope dispatch through -/// so LLM -/// tools and admin endpoints stop reaching for actor.HandleEventAsync. -/// -public interface IWorkflowAgentCommandPort -{ - Task InitializeAsync( - string agentId, - InitializeWorkflowAgentCommand command, - bool runImmediately, - CancellationToken ct = default); - - Task TriggerAsync( - string agentId, - string reason, - string? revisionFeedback, - CancellationToken ct = default); - - Task DisableAsync(string agentId, string reason, CancellationToken ct = default); - - Task EnableAsync(string agentId, string reason, CancellationToken ct = default); -} diff --git a/agents/Aevatar.GAgents.Scheduled/NyxIdProxyToolFailureCountingMiddleware.cs b/agents/Aevatar.GAgents.Scheduled/NyxIdProxyToolFailureCountingMiddleware.cs index 9927b06f7..9a863118d 100644 --- a/agents/Aevatar.GAgents.Scheduled/NyxIdProxyToolFailureCountingMiddleware.cs +++ b/agents/Aevatar.GAgents.Scheduled/NyxIdProxyToolFailureCountingMiddleware.cs @@ -17,7 +17,7 @@ namespace Aevatar.GAgents.Scheduled; /// /// Only counts nyxid_proxy calls — other tools may have their own success /// semantics (e.g., a search tool that returns 0 hits is not a failure), and the safety -/// net is scoped to the proxy fan-out that powers the daily-report skill. +/// net is scoped to the proxy fan-out that powers fetch-and-summarize skills. /// internal sealed class NyxIdProxyToolFailureCountingMiddleware : IToolCallMiddleware { diff --git a/agents/Aevatar.GAgents.Scheduled/ScheduledRetiredActorSpec.cs b/agents/Aevatar.GAgents.Scheduled/ScheduledRetiredActorSpec.cs index e6ff6d67c..001f6777b 100644 --- a/agents/Aevatar.GAgents.Scheduled/ScheduledRetiredActorSpec.cs +++ b/agents/Aevatar.GAgents.Scheduled/ScheduledRetiredActorSpec.cs @@ -31,7 +31,16 @@ namespace Aevatar.GAgents.Scheduled; public sealed class ScheduledRetiredActorSpec : RetiredActorSpec { private const string RetiredSkillRunnerType = "Aevatar.GAgents.ChannelRuntime.SkillRunnerGAgent"; + // Retained as a string literal so legacy clusters still clean up workflow_agent + // event streams persisted before the social_media template was removed (issue #598). + // Delete once all legacy actors have been retired from production clusters. private const string RetiredWorkflowAgentType = "Aevatar.GAgents.ChannelRuntime.WorkflowAgentGAgent"; + // Mirror of the deleted WorkflowAgentDefaults — kept here so retired-actor discovery + // can still recognize legacy workflow_agent rows persisted in the catalog read model + // and drive their cleanup. New agents never carry these tokens; delete with the + // retired workflow_agent constants once all legacy actors are gone. + private const string LegacyWorkflowAgentType = "workflow_agent"; + private const string LegacyWorkflowAgentActorIdPrefix = "workflow-agent"; private const int ReadModelPageSize = 500; public override string SpecId => "scheduled"; @@ -259,12 +268,12 @@ private static bool IsGeneratedUserAgent(string? agentId, string? agentType) return false; if (string.Equals(agentType, SkillRunnerDefaults.AgentType, StringComparison.Ordinal) || - string.Equals(agentType, WorkflowAgentDefaults.AgentType, StringComparison.Ordinal)) + string.Equals(agentType, LegacyWorkflowAgentType, StringComparison.Ordinal)) { return true; } return normalizedId.StartsWith($"{SkillRunnerDefaults.ActorIdPrefix}-", StringComparison.Ordinal) || - normalizedId.StartsWith($"{WorkflowAgentDefaults.ActorIdPrefix}-", StringComparison.Ordinal); + normalizedId.StartsWith($"{LegacyWorkflowAgentActorIdPrefix}-", StringComparison.Ordinal); } } diff --git a/agents/Aevatar.GAgents.Scheduled/SkillRunnerGAgent.cs b/agents/Aevatar.GAgents.Scheduled/SkillRunnerGAgent.cs index 6ed1e9dc4..c16f4ec62 100644 --- a/agents/Aevatar.GAgents.Scheduled/SkillRunnerGAgent.cs +++ b/agents/Aevatar.GAgents.Scheduled/SkillRunnerGAgent.cs @@ -40,7 +40,8 @@ public SkillRunnerGAgent( IEnumerable? llmMiddlewares = null, IEnumerable? toolSources = null, NyxIdApiClient? nyxIdApiClient = null, - IOwnerLlmConfigSource? ownerLlmConfigSource = null) + IOwnerLlmConfigSource? ownerLlmConfigSource = null, + IToolApprovalHandler? approvalHandler = null) : this( BuildToolMiddlewareChain(toolMiddlewares), llmProviderFactory, @@ -49,7 +50,8 @@ public SkillRunnerGAgent( llmMiddlewares, toolSources, nyxIdApiClient, - ownerLlmConfigSource) + ownerLlmConfigSource, + approvalHandler) { } @@ -61,14 +63,16 @@ private SkillRunnerGAgent( IEnumerable? llmMiddlewares, IEnumerable? toolSources, NyxIdApiClient? nyxIdApiClient, - IOwnerLlmConfigSource? ownerLlmConfigSource) + IOwnerLlmConfigSource? ownerLlmConfigSource, + IToolApprovalHandler? approvalHandler) : base( llmProviderFactory, additionalHooks, agentMiddlewares, toolMiddlewareChain.Middlewares, llmMiddlewares, - toolSources) + toolSources, + approvalHandler) { _nyxIdApiClient = nyxIdApiClient; _ownerLlmConfigSource = ownerLlmConfigSource; @@ -164,6 +168,7 @@ public async Task HandleInitializeAsync(InitializeSkillRunnerCommand command) ScopeId = command.ScopeId?.Trim() ?? string.Empty, ProviderName = NormalizeProviderName(command.ProviderName), Model = command.Model?.Trim() ?? string.Empty, + RequiresNyxidProxySuccess = command.RequiresNyxidProxySuccess, }; if (command.HasTemperature) @@ -316,7 +321,7 @@ private async Task ExecuteSkillAsync(DateTimeOffset now, string? reason, content.Append(chunk.DeltaContent); if (sink is not null) // Per-delta `content.ToString()` is O(n) per call → O(n²) for the whole - // turn. Acceptable for daily-report-sized output (≤30 KB capped, and the + // turn. Acceptable for daily-sized output (≤30 KB capped, and the // sink dedupes against `_lastEmittedText` so most allocations don't even // make it onto the wire). If a future skill produces materially longer // output, switch the sink contract to `(StringBuilder, Range)` snapshots @@ -329,13 +334,25 @@ private async Task ExecuteSkillAsync(DateTimeOffset now, string? reason, if (string.IsNullOrWhiteSpace(output)) output = "No update generated."; - // Issue #439 safety net (PR #471): if EVERY nyxid_proxy tool call in this run - // failed, the LLM's plain-text output is structurally indistinguishable from a - // real "no activity" report. Throw before delivery so HandleTriggerAsync's catch - // path persists `SkillRunnerExecutionFailedEvent` instead of recording a fake - // success — must fire BEFORE chunked dispatch so we don't post part-1 of a - // report that we're about to flag as failed. - EnsureToolStatusAllowsCompletion(_toolFailureCounter.FailureCount, _toolFailureCounter.SuccessCount); + // Issue #439 safety net (PR #471 + this PR): refuse to record fake-success runs. + // Two failure modes are caught here: + // * all-fail — every nyxid_proxy call failed, the LLM's plain-text output is + // structurally indistinguishable from a real "no activity" report; + // * never-called — when State.RequiresNyxidProxySuccess is set, a run that + // completes with zero successful nyxid_proxy calls means the LLM bypassed + // tools entirely and produced text from prior context (the original #439 + // symptom: 52 commits in 24h reported as "No meaningful public GitHub + // activity"). The original safety net only covered the all-fail case + // (failureCount > 0); this gap was flagged in PR #471 review and is closed + // here for fetch-and-summarize templates that opt in. + // Throw before delivery so HandleTriggerAsync's catch path persists + // SkillRunnerExecutionFailedEvent instead of a clean SkillRunnerExecutionCompletedEvent — + // must fire BEFORE chunked dispatch so we don't post part-1 of a report + // we're about to flag as failed. + EnsureToolStatusAllowsCompletion( + _toolFailureCounter.FailureCount, + _toolFailureCounter.SuccessCount, + State.RequiresNyxidProxySuccess); // Issue #423 §C — chunked delivery for outputs that exceed the Lark body cap. // For ≤30 KB outputs the chunker returns a single-element list and the dispatch @@ -401,6 +418,18 @@ private async Task DispatchOutputChunksAsync( /// private SkillRunnerStreamingReplySink? TryCreateStreamingSink() { + // Issue #439: when the run + // is gated by EnsureToolStatusAllowsCompletion (RequiresNyxidProxySuccess set), + // streaming each delta would POST/PUT the partial text to Lark live — i.e. a + // hallucinated daily report would already be visible in the user's DM by the + // time the guard fires, and each retry would repost it. Disable live streaming + // for those skills so the message only POSTs through the chunked-dispatch path + // AFTER the guard has confirmed at least one nyxid_proxy success. Trade-off: the + // user no longer sees the report grow live, but output integrity wins over the + // streaming-edit UX for fetch-and-summarize skills. + if (State.RequiresNyxidProxySuccess) + return null; + var client = _nyxIdApiClient ?? Services.GetService(); if (client is null) { @@ -448,19 +477,35 @@ private async Task DispatchOutputChunksAsync( } /// - /// Runner-layer safety net for issue #439: when every nyxid_proxy call in a run failed, - /// the LLM's plain-text output is structurally indistinguishable from a real "no - /// activity" report — the prompt-layer §9 Source health footer can be silently dropped - /// by a weaker model, and the runner has no other way to tell. Throwing here routes - /// through HandleTriggerAsync's existing catch path, which preserves the retry budget - /// and (after retries are exhausted) persists SkillRunnerExecutionFailedEvent so - /// /agent-status reports a non-zero error_count with a meaningful - /// last_error instead of a fake-success run. - /// Mixed runs (any successful nyxid_proxy call) still complete normally — partial data - /// is more useful to the user than a blanket failure, and the prompt-layer Source - /// health footer surfaces the failed queries. + /// Runner-layer safety net for issue #439. Two fake-success modes are caught here: + /// + /// + /// all-fail ( > 0, == 0): + /// every nyxid_proxy call failed, but the LLM's plain-text output is structurally + /// indistinguishable from a real "no activity" report. The prompt-layer §9 Source + /// health footer can be dropped by a weaker model, and the runner has no other way + /// to tell. + /// + /// + /// never-called ( == true, + /// == 0): the LLM bypassed tools entirely and produced + /// text from prior context. For fetch-and-summarize skills like daily this is + /// exactly the original #439 symptom (52 commits in 24h reported as "No meaningful + /// public GitHub activity"). Skills that don't depend on tool data (e.g. pure LLM + /// transformations) leave the flag false and pass through. + /// + /// + /// Throwing here routes through HandleTriggerAsync's existing catch path, which preserves + /// the retry budget and (after retries are exhausted) persists SkillRunnerExecutionFailedEvent + /// so /agent-status reports a non-zero error_count with a meaningful + /// last_error instead of a fake-success run. Mixed runs (any successful nyxid_proxy + /// call) still complete normally — partial data is more useful than a blanket failure, and + /// the prompt-layer Source health footer surfaces the failed queries. /// - internal static void EnsureToolStatusAllowsCompletion(int failureCount, int successCount) + internal static void EnsureToolStatusAllowsCompletion( + int failureCount, + int successCount, + bool requiresNyxidProxySuccess) { if (failureCount > 0 && successCount == 0) { @@ -468,6 +513,14 @@ internal static void EnsureToolStatusAllowsCompletion(int failureCount, int succ $"All {failureCount} nyxid_proxy tool call(s) in this run failed; refusing to record an empty-day report as a successful execution. " + "Inspect the previous attempt's tool output for the underlying NyxID/upstream error envelope."); } + + if (requiresNyxidProxySuccess && successCount == 0) + { + throw new InvalidOperationException( + "Skill requires at least one successful nyxid_proxy tool call but completed with zero. " + + "The LLM produced output without fetching source data (e.g. hallucinated a daily report from prior context). " + + "Refusing to record this run as a successful execution."); + } } private Task SendOutputAsync(string output, CancellationToken ct) => @@ -615,7 +668,7 @@ private static string BuildLarkRejectionMessage(int? larkCode, string detail) return $"Lark message delivery rejected (code={larkCode}): {detail}. " + "This agent was created before cross-app union_id ingress existed; " + - "delete and recreate it (`/agents` → Delete → `/daily`) to pick up the cross-app safe target."; + "delete and recreate it (`/agents` → Delete → recreate) to pick up the cross-app safe target."; } if (larkCode == LarkBotErrorCodes.UserIdCrossTenant) @@ -628,7 +681,7 @@ private static string BuildLarkRejectionMessage(int? larkCode, string detail) $"Lark message delivery rejected (code={larkCode}): {detail}. " + "The outbound Lark app is in a different tenant than the inbound app, so " + "user-id translation is impossible. Delete and recreate the agent " + - "(`/agents` → Delete → `/daily`) so the new chat_id-preferred outbound path " + + "(`/agents` → Delete → recreate) so the new chat_id-preferred outbound path " + "takes effect, or align the NyxID `s/api-lark-bot` proxy with the channel-bot that " + "received the inbound event."; } @@ -702,7 +755,7 @@ private async Task> BuildExecutionMetadataAs metadata["scope_id"] = State.ScopeId; // Pin the bot owner's pre-configured model + NyxID route + tool-round cap onto the - // outbound LLM metadata, the same pattern ChannelLlmReplyInboxRuntime applies for + // outbound LLM metadata, the same pattern AgentRunGAgent applies for // nyxid-chat. Without this, scheduled runs fall through to NyxIdLLMProvider's // compile-time defaults (`gpt-5.4` against `/api/v1/llm/gateway/v1/`), which the // gateway routes to the OpenAI provider — failing for bot owners who pre-configured @@ -794,6 +847,15 @@ private static SkillRunnerState ApplyInitialized(SkillRunnerState current, Skill next.ScopeId = evt.ScopeId ?? string.Empty; next.ProviderName = NormalizeProviderName(evt.ProviderName); next.Model = evt.Model ?? string.Empty; + // Legacy actors created before proto field 16 existed replay an init event whose + // RequiresNyxidProxySuccess deserializes as false, which would let them keep the + // pre-#439 zero-tool-call fake-success path — making post-fix behavior depend on + // creation time rather than template semantics. Derive the effective flag from + // the template name so known fetch-and-summarize skills get the safety net on + // replay regardless of when the actor was created. New templates that need this + // protection should be added to RequiresProxySuccessByTemplate. + next.RequiresNyxidProxySuccess = evt.RequiresNyxidProxySuccess + || RequiresProxySuccessByTemplate(evt.TemplateName); // Missing sampling fields intentionally use upstream model defaults; // missing runner limits fall back to SkillRunner defaults. @@ -852,6 +914,21 @@ private static SkillRunnerState ApplyEnabled(SkillRunnerState current, SkillRunn return next; } + /// + /// Templates whose runs MUST observe at least one successful nyxid_proxy call to be + /// considered successful. Used by as the legacy-actor + /// default when the persisted init event predates proto field 16. Add new templates + /// here when they're fetch-and-summarize style (the LLM bypassing tools and producing + /// text from prior context is a fake-success failure mode for them). + /// + internal static bool RequiresProxySuccessByTemplate(string? templateName) => + // Reserved for future fetch-and-summarize templates that need the runner-layer + // safety net (issue #439). Currently empty: the in-tree daily template was + // removed in favor of the Ornn-hosted skill, and no other template needs the + // legacy proto-field-16-default backfill. Keep the method so tests + the apply + // path don't need to special-case "no templates" — just add new entries here. + templateName is not null && false; + private static string NormalizeProviderName(string? providerName) => string.IsNullOrWhiteSpace(providerName) ? SkillRunnerDefaults.DefaultProviderName : providerName.Trim(); diff --git a/agents/Aevatar.GAgents.Scheduled/WorkflowAgentCommandPort.cs b/agents/Aevatar.GAgents.Scheduled/WorkflowAgentCommandPort.cs deleted file mode 100644 index 422f2bf21..000000000 --- a/agents/Aevatar.GAgents.Scheduled/WorkflowAgentCommandPort.cs +++ /dev/null @@ -1,98 +0,0 @@ -using Aevatar.Foundation.Abstractions; -using Google.Protobuf; -using Google.Protobuf.WellKnownTypes; - -namespace Aevatar.GAgents.Scheduled; - -internal sealed class WorkflowAgentCommandPort : IWorkflowAgentCommandPort -{ - private const string PublisherActorId = "scheduled.workflow-agent"; - - private readonly IActorRuntime _actorRuntime; - private readonly IActorDispatchPort _actorDispatchPort; - private readonly UserAgentCatalogProjectionPort _catalogProjectionPort; - - public WorkflowAgentCommandPort( - IActorRuntime actorRuntime, - IActorDispatchPort actorDispatchPort, - UserAgentCatalogProjectionPort catalogProjectionPort) - { - _actorRuntime = actorRuntime ?? throw new ArgumentNullException(nameof(actorRuntime)); - _actorDispatchPort = actorDispatchPort ?? throw new ArgumentNullException(nameof(actorDispatchPort)); - _catalogProjectionPort = catalogProjectionPort ?? throw new ArgumentNullException(nameof(catalogProjectionPort)); - } - - public async Task InitializeAsync( - string agentId, - InitializeWorkflowAgentCommand command, - bool runImmediately, - CancellationToken ct = default) - { - ArgumentException.ThrowIfNullOrWhiteSpace(agentId); - ArgumentNullException.ThrowIfNull(command); - - await EnsureWorkflowAgentActorAsync(agentId, ct); - await _catalogProjectionPort.EnsureProjectionForActorAsync(UserAgentCatalogGAgent.WellKnownId, ct); - - await DispatchAsync(agentId, command, ct); - - if (runImmediately) - { - await DispatchAsync( - agentId, - new TriggerWorkflowAgentExecutionCommand { Reason = "create_agent" }, - ct); - } - } - - public async Task TriggerAsync( - string agentId, - string reason, - string? revisionFeedback, - CancellationToken ct = default) - { - ArgumentException.ThrowIfNullOrWhiteSpace(agentId); - await EnsureWorkflowAgentActorAsync(agentId, ct); - await DispatchAsync( - agentId, - new TriggerWorkflowAgentExecutionCommand - { - Reason = reason ?? string.Empty, - RevisionFeedback = revisionFeedback ?? string.Empty, - }, - ct); - } - - public async Task DisableAsync(string agentId, string reason, CancellationToken ct = default) - { - ArgumentException.ThrowIfNullOrWhiteSpace(agentId); - await EnsureWorkflowAgentActorAsync(agentId, ct); - await DispatchAsync(agentId, new DisableWorkflowAgentCommand { Reason = reason ?? string.Empty }, ct); - } - - public async Task EnableAsync(string agentId, string reason, CancellationToken ct = default) - { - ArgumentException.ThrowIfNullOrWhiteSpace(agentId); - await EnsureWorkflowAgentActorAsync(agentId, ct); - await DispatchAsync(agentId, new EnableWorkflowAgentCommand { Reason = reason ?? string.Empty }, ct); - } - - private async Task EnsureWorkflowAgentActorAsync(string agentId, CancellationToken ct) - { - _ = await _actorRuntime.GetAsync(agentId) - ?? await _actorRuntime.CreateAsync(agentId, ct); - } - - private Task DispatchAsync(string agentId, TCommand command, CancellationToken ct) - where TCommand : class, IMessage - { - var envelope = new EventEnvelope - { - Id = Guid.NewGuid().ToString("N"), - Timestamp = Timestamp.FromDateTimeOffset(DateTimeOffset.UtcNow), - Payload = Any.Pack(command), - Route = EnvelopeRouteSemantics.CreateDirect(PublisherActorId, agentId), - }; - return _actorDispatchPort.DispatchAsync(agentId, envelope, ct); - } -} diff --git a/agents/Aevatar.GAgents.Scheduled/WorkflowAgentDefaults.cs b/agents/Aevatar.GAgents.Scheduled/WorkflowAgentDefaults.cs deleted file mode 100644 index 715ab830f..000000000 --- a/agents/Aevatar.GAgents.Scheduled/WorkflowAgentDefaults.cs +++ /dev/null @@ -1,17 +0,0 @@ -namespace Aevatar.GAgents.Scheduled; - -public static class WorkflowAgentDefaults -{ - public const string AgentType = "workflow_agent"; - public const string ActorIdPrefix = "workflow-agent"; - public const string TemplateName = "social_media"; - public const string ProviderName = "nyxid"; - public const string DefaultPlatform = "lark"; - public const string DefaultTimezone = "UTC"; - public const string StatusRunning = "running"; - public const string StatusError = "error"; - public const string StatusDisabled = "disabled"; - public const string TriggerCallbackId = "workflow-agent-next-fire"; - - public static string GenerateActorId() => $"{ActorIdPrefix}-{Guid.NewGuid():N}"; -} diff --git a/agents/Aevatar.GAgents.Scheduled/WorkflowAgentGAgent.cs b/agents/Aevatar.GAgents.Scheduled/WorkflowAgentGAgent.cs deleted file mode 100644 index 9c477dbe7..000000000 --- a/agents/Aevatar.GAgents.Scheduled/WorkflowAgentGAgent.cs +++ /dev/null @@ -1,399 +0,0 @@ -using Aevatar.AI.Abstractions.LLMProviders; -using Aevatar.AI.Core.LLMProviders; -using Aevatar.CQRS.Core.Abstractions.Commands; -using Aevatar.Foundation.Abstractions; -using Aevatar.Foundation.Abstractions.Attributes; -using Aevatar.Foundation.Core; -using Aevatar.Foundation.Core.EventSourcing; -using Aevatar.GAgents.Channel.Abstractions; -using Aevatar.GAgents.Channel.Runtime; -using Aevatar.Workflow.Application.Abstractions.Runs; -using Google.Protobuf; -using Google.Protobuf.WellKnownTypes; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; - -namespace Aevatar.GAgents.Scheduled; - -public sealed class WorkflowAgentGAgent : GAgentBase -{ - private readonly IOwnerLlmConfigSource? _ownerLlmConfigSource; - private ChannelScheduleRunner? _scheduler; - - public WorkflowAgentGAgent(IOwnerLlmConfigSource? ownerLlmConfigSource = null) - { - _ownerLlmConfigSource = ownerLlmConfigSource; - } - - private ChannelScheduleRunner Scheduler => _scheduler ??= new ChannelScheduleRunner( - callbackId: WorkflowAgentDefaults.TriggerCallbackId, - schedulableSource: () => State, - triggerFactory: () => new TriggerWorkflowAgentExecutionCommand { Reason = "schedule" }, - persistNextRunEventAsync: nextRunUtc => PersistDomainEventAsync(new WorkflowAgentNextRunScheduledEvent - { - NextRunAt = Timestamp.FromDateTimeOffset(nextRunUtc), - }), - scheduleTimeoutAsync: (id, dueTime, evt, ct) => ScheduleSelfDurableTimeoutAsync(id, dueTime, evt, ct: ct), - cancelCallbackAsync: (lease, ct) => CancelDurableCallbackAsync(lease, ct), - logger: Logger, - ownerDescription: $"Workflow agent {Id}"); - - protected override async Task OnActivateAsync(CancellationToken ct) - { - await base.OnActivateAsync(ct); - await Scheduler.BootstrapOnActivateAsync(ct); - } - - protected override WorkflowAgentState TransitionState(WorkflowAgentState current, IMessage evt) => - StateTransitionMatcher - .Match(current, evt) - .On(ApplyInitialized) - .On(ApplyNextRunScheduled) - .On(ApplyDispatched) - .On(ApplyFailed) - .On(ApplyDisabled) - .On(ApplyEnabled) - .OrCurrent(); - - [EventHandler] - public async Task HandleInitializeAsync(InitializeWorkflowAgentCommand command) - { - if (string.IsNullOrWhiteSpace(command.WorkflowActorId)) - { - Logger.LogWarning("Workflow agent {ActorId} initialization ignored because workflow_actor_id is empty", Id); - return; - } - -#pragma warning disable CS0612 // legacy fields populated for rollback compat during owner_scope migration - var initializedEvent = new WorkflowAgentInitializedEvent - { - WorkflowId = command.WorkflowId?.Trim() ?? string.Empty, - WorkflowName = command.WorkflowName?.Trim() ?? string.Empty, - WorkflowActorId = command.WorkflowActorId?.Trim() ?? string.Empty, - ExecutionPrompt = command.ExecutionPrompt?.Trim() ?? string.Empty, - ScheduleCron = command.ScheduleCron?.Trim() ?? string.Empty, - ScheduleTimezone = NormalizeTimezone(command.ScheduleTimezone), - ConversationId = command.ConversationId?.Trim() ?? string.Empty, - NyxProviderSlug = command.NyxProviderSlug?.Trim() ?? string.Empty, - NyxApiKey = command.NyxApiKey?.Trim() ?? string.Empty, - OwnerNyxUserId = command.OwnerNyxUserId?.Trim() ?? string.Empty, - ApiKeyId = command.ApiKeyId?.Trim() ?? string.Empty, - Enabled = command.Enabled, - ScopeId = command.ScopeId?.Trim() ?? string.Empty, - Platform = command.Platform?.Trim() ?? string.Empty, - LarkReceiveId = command.LarkReceiveId?.Trim() ?? string.Empty, - LarkReceiveIdType = command.LarkReceiveIdType?.Trim() ?? string.Empty, - LarkReceiveIdFallback = command.LarkReceiveIdFallback?.Trim() ?? string.Empty, - LarkReceiveIdTypeFallback = command.LarkReceiveIdTypeFallback?.Trim() ?? string.Empty, - }; -#pragma warning restore CS0612 - - if (command.OwnerScope is not null) - initializedEvent.OwnerScope = command.OwnerScope.Clone(); - - await PersistDomainEventAsync(initializedEvent); - - await Scheduler.ScheduleNextRunAsync(DateTimeOffset.UtcNow, CancellationToken.None); - await UpsertRegistryAsync(State.Enabled ? WorkflowAgentDefaults.StatusRunning : WorkflowAgentDefaults.StatusDisabled, CancellationToken.None); - } - - [EventHandler(AllowSelfHandling = true)] - public async Task HandleTriggerAsync(TriggerWorkflowAgentExecutionCommand command) - { - if (!State.Enabled) - { - Logger.LogInformation("Workflow agent {ActorId} ignored trigger because it is disabled", Id); - return; - } - - var now = DateTimeOffset.UtcNow; - try - { - var receipt = await DispatchWorkflowRunAsync(command.Reason, command.RevisionFeedback, CancellationToken.None); - await PersistDomainEventAsync(new WorkflowAgentExecutionDispatchedEvent - { - DispatchedAt = Timestamp.FromDateTimeOffset(now), - WorkflowRunActorId = receipt.ActorId, - CommandId = receipt.CommandId, - }); - - await Scheduler.ScheduleNextRunAsync(now, CancellationToken.None); - await UpdateRegistryExecutionAsync( - WorkflowAgentDefaults.StatusRunning, State.LastRunAt, State.NextRunAt, - 0, string.Empty, CancellationToken.None); - } - catch (Exception ex) - { - Logger.LogWarning(ex, "Workflow agent {ActorId} execution dispatch failed", Id); - await PersistDomainEventAsync(new WorkflowAgentExecutionFailedEvent - { - FailedAt = Timestamp.FromDateTimeOffset(now), - Error = ex.Message, - }); - - await Scheduler.ScheduleNextRunAsync(now, CancellationToken.None); - await UpdateRegistryExecutionAsync( - WorkflowAgentDefaults.StatusError, State.LastRunAt, State.NextRunAt, - State.ErrorCount, State.LastError, CancellationToken.None); - } - } - - [EventHandler] - public async Task HandleDisableAsync(DisableWorkflowAgentCommand command) - { - await Scheduler.CancelAsync(CancellationToken.None); - - await PersistDomainEventAsync(new WorkflowAgentDisabledEvent - { - Reason = command.Reason?.Trim() ?? string.Empty, - }); - - await UpdateRegistryExecutionAsync( - WorkflowAgentDefaults.StatusDisabled, State.LastRunAt, null, - State.ErrorCount, State.LastError, CancellationToken.None); - } - - [EventHandler] - public async Task HandleEnableAsync(EnableWorkflowAgentCommand command) - { - if (!State.Enabled) - { - await PersistDomainEventAsync(new WorkflowAgentEnabledEvent - { - Reason = command.Reason?.Trim() ?? string.Empty, - }); - } - - await Scheduler.ScheduleNextRunAsync(DateTimeOffset.UtcNow, CancellationToken.None); - await UpdateRegistryExecutionAsync( - WorkflowAgentDefaults.StatusRunning, State.LastRunAt, State.NextRunAt, - State.ErrorCount, State.LastError, CancellationToken.None); - } - - private async Task DispatchWorkflowRunAsync( - string? reason, string? revisionFeedback, CancellationToken ct) - { - var dispatchService = Services.GetService>(); - if (dispatchService is null) - throw new InvalidOperationException("Workflow run dispatch service is not registered."); - - var request = new WorkflowChatRunRequest( - Prompt: BuildExecutionPrompt(reason, revisionFeedback), - WorkflowName: State.WorkflowName, - ActorId: State.WorkflowActorId, - SessionId: null, - InputParts: null, - WorkflowYamls: null, - Metadata: await BuildExecutionMetadataAsync(ct), - ScopeId: State.ScopeId); - - var dispatch = await dispatchService.DispatchAsync(request, ct); - if (!dispatch.Succeeded || dispatch.Receipt is null) - throw new InvalidOperationException(MapDispatchError(dispatch.Error)); - - return dispatch.Receipt; - } - - private async Task> BuildExecutionMetadataAsync(CancellationToken ct) - { - var metadata = new Dictionary(StringComparer.Ordinal) - { - [LLMRequestMetadataKeys.NyxIdAccessToken] = State.NyxApiKey ?? string.Empty, - [ChannelMetadataKeys.ConversationId] = State.ConversationId ?? string.Empty, - }; - if (!string.IsNullOrWhiteSpace(State.ScopeId)) - metadata["scope_id"] = State.ScopeId; - // Propagate the outbound Lark delivery target so workflow modules that need to surface - // their own status messages back into the originating chat (e.g. TwitterPublishModule - // posting "已发布: " or "Twitter OAuth 过期…") can do so via the same api-lark-bot - // proxy this agent already uses, without re-resolving the catalog at run time. - if (!string.IsNullOrWhiteSpace(State.LarkReceiveId)) - metadata[ChannelMetadataKeys.LarkReceiveId] = State.LarkReceiveId; - if (!string.IsNullOrWhiteSpace(State.LarkReceiveIdType)) - metadata[ChannelMetadataKeys.LarkReceiveIdType] = State.LarkReceiveIdType; - if (!string.IsNullOrWhiteSpace(State.NyxProviderSlug)) - metadata[ChannelMetadataKeys.LarkOutboundProxySlug] = State.NyxProviderSlug; - - // Mirror SkillRunnerGAgent.BuildExecutionMetadataAsync — same shared helper, same - // model/route/tool-cap pinning. Workflow-backed agents (e.g. social_media) need the - // same UserConfig discipline so their LLM steps don't fall through to gateway+gpt-5.4 - // when the bot owner pre-configured a custom NyxID service like `chrono-llm`. The - // source is bound once via constructor injection at agent activation time; the - // per-execution Services.GetService<> fallback was dropped per codex's PR #509 - // partial dissent on r3159047120. - await OwnerLlmConfigApplier.ApplyAsync( - metadata, - State.ScopeId, - _ownerLlmConfigSource, - Logger, - actorLabel: "Workflow agent", - actorId: Id, - ct); - return metadata; - } - - private string BuildExecutionPrompt(string? reason, string? revisionFeedback) - { - var prompt = string.IsNullOrWhiteSpace(State.ExecutionPrompt) - ? "Run the configured workflow now." - : State.ExecutionPrompt; - - var lines = new List - { - prompt, - $"Trigger reason: {(string.IsNullOrWhiteSpace(reason) ? "manual" : reason)}", - }; - - var normalized = NormalizeOptional(revisionFeedback); - if (normalized is not null) - lines.Add($"Revision feedback: {normalized}"); - - return string.Join('\n', lines); - } - - private async Task UpsertRegistryAsync(string status, CancellationToken ct) - { -#pragma warning disable CS0612 // legacy field reads/writes during owner_scope migration (issue #466) - var legacyOwnerNyxUserId = State.OwnerNyxUserId ?? string.Empty; - var legacyPlatform = ResolvePlatform(State.Platform); - var ownerScope = State.OwnerScope ?? OwnerScope.FromLegacyFields(legacyOwnerNyxUserId, legacyPlatform); - - var command = new UserAgentCatalogUpsertCommand - { - AgentId = Id, - Platform = legacyPlatform, - ConversationId = State.ConversationId ?? string.Empty, - NyxProviderSlug = State.NyxProviderSlug ?? string.Empty, - NyxApiKey = State.NyxApiKey ?? string.Empty, - OwnerNyxUserId = legacyOwnerNyxUserId, - AgentType = WorkflowAgentDefaults.AgentType, - TemplateName = WorkflowAgentDefaults.TemplateName, - ScopeId = State.ScopeId ?? string.Empty, - ApiKeyId = State.ApiKeyId ?? string.Empty, - ScheduleCron = State.ScheduleCron ?? string.Empty, - ScheduleTimezone = State.ScheduleTimezone ?? string.Empty, - Status = status, - LarkReceiveId = State.LarkReceiveId ?? string.Empty, - LarkReceiveIdType = State.LarkReceiveIdType ?? string.Empty, - LarkReceiveIdFallback = State.LarkReceiveIdFallback ?? string.Empty, - LarkReceiveIdTypeFallback = State.LarkReceiveIdTypeFallback ?? string.Empty, - }; -#pragma warning restore CS0612 - - if (ownerScope is not null) - command.OwnerScope = ownerScope; - - await UserAgentCatalogStoreCommands.DispatchUpsertAsync(Services, Id, command, ct); - await UpdateRegistryExecutionAsync(status, State.LastRunAt, State.NextRunAt, State.ErrorCount, State.LastError, ct); - } - - private async Task UpdateRegistryExecutionAsync( - string status, Timestamp? lastRunAt, Timestamp? nextRunAt, - int errorCount, string? lastError, CancellationToken ct) - { - var command = new UserAgentCatalogExecutionUpdateCommand - { - AgentId = Id, Status = status, - LastRunAt = lastRunAt, NextRunAt = nextRunAt, - ErrorCount = errorCount, LastError = lastError ?? string.Empty, - }; - await UserAgentCatalogStoreCommands.DispatchExecutionUpdateAsync(Services, Id, command, ct); - } - - private static WorkflowAgentState ApplyInitialized(WorkflowAgentState current, WorkflowAgentInitializedEvent evt) - { - var next = current.Clone(); - next.WorkflowId = evt.WorkflowId ?? string.Empty; - next.WorkflowName = evt.WorkflowName ?? string.Empty; - next.WorkflowActorId = evt.WorkflowActorId ?? string.Empty; - next.ExecutionPrompt = evt.ExecutionPrompt ?? string.Empty; - next.ScheduleCron = evt.ScheduleCron ?? string.Empty; - next.ScheduleTimezone = NormalizeTimezone(evt.ScheduleTimezone); - next.ConversationId = evt.ConversationId ?? string.Empty; - next.NyxProviderSlug = evt.NyxProviderSlug ?? string.Empty; - next.NyxApiKey = evt.NyxApiKey ?? string.Empty; -#pragma warning disable CS0612 // legacy fields preserved during owner_scope migration - next.OwnerNyxUserId = evt.OwnerNyxUserId ?? string.Empty; -#pragma warning restore CS0612 - next.ApiKeyId = evt.ApiKeyId ?? string.Empty; - next.Enabled = evt.Enabled; - next.ScopeId = evt.ScopeId ?? string.Empty; -#pragma warning disable CS0612 // legacy field preserved during owner_scope migration - next.Platform = evt.Platform ?? string.Empty; -#pragma warning restore CS0612 - next.LarkReceiveId = evt.LarkReceiveId ?? string.Empty; - next.LarkReceiveIdType = evt.LarkReceiveIdType ?? string.Empty; - next.LarkReceiveIdFallback = evt.LarkReceiveIdFallback ?? string.Empty; - next.LarkReceiveIdTypeFallback = evt.LarkReceiveIdTypeFallback ?? string.Empty; - if (evt.OwnerScope is not null) - next.OwnerScope = evt.OwnerScope.Clone(); - return next; - } - - private static WorkflowAgentState ApplyNextRunScheduled(WorkflowAgentState current, WorkflowAgentNextRunScheduledEvent evt) - { - var next = current.Clone(); - next.NextRunAt = evt.NextRunAt; - return next; - } - - private static WorkflowAgentState ApplyDispatched(WorkflowAgentState current, WorkflowAgentExecutionDispatchedEvent evt) - { - var next = current.Clone(); - next.LastRunAt = evt.DispatchedAt; - next.LastError = string.Empty; - next.ErrorCount = 0; - return next; - } - - private static WorkflowAgentState ApplyFailed(WorkflowAgentState current, WorkflowAgentExecutionFailedEvent evt) - { - var next = current.Clone(); - next.LastRunAt = evt.FailedAt; - next.LastError = evt.Error ?? string.Empty; - next.ErrorCount += 1; - return next; - } - - private static WorkflowAgentState ApplyDisabled(WorkflowAgentState current, WorkflowAgentDisabledEvent _) - { - var next = current.Clone(); - next.Enabled = false; - next.NextRunAt = null; - return next; - } - - private static WorkflowAgentState ApplyEnabled(WorkflowAgentState current, WorkflowAgentEnabledEvent _) - { - var next = current.Clone(); - next.Enabled = true; - return next; - } - - private static string NormalizeTimezone(string? scheduleTimezone) => - string.IsNullOrWhiteSpace(scheduleTimezone) ? WorkflowAgentDefaults.DefaultTimezone : scheduleTimezone.Trim(); - - private static string ResolvePlatform(string? platform) => - string.IsNullOrWhiteSpace(platform) ? WorkflowAgentDefaults.DefaultPlatform : platform.Trim(); - - private static string? NormalizeOptional(string? value) - { - var normalized = (value ?? string.Empty).Trim(); - return normalized.Length == 0 ? null : normalized; - } - - private static string MapDispatchError(WorkflowChatRunStartError error) => error switch - { - WorkflowChatRunStartError.AgentNotFound => "Workflow actor not found.", - WorkflowChatRunStartError.WorkflowNotFound => "Workflow definition not found.", - WorkflowChatRunStartError.AgentTypeNotSupported => "Actor is not workflow-capable.", - WorkflowChatRunStartError.ProjectionDisabled => "Workflow projection is disabled.", - WorkflowChatRunStartError.WorkflowBindingMismatch => "Workflow binding mismatch.", - WorkflowChatRunStartError.AgentWorkflowNotConfigured => "Workflow actor is not bound to a workflow.", - WorkflowChatRunStartError.InvalidWorkflowYaml => "Workflow YAML is invalid.", - WorkflowChatRunStartError.WorkflowNameMismatch => "Workflow name does not match the bound workflow.", - WorkflowChatRunStartError.PromptRequired => "Workflow prompt is required.", - WorkflowChatRunStartError.ConflictingScopeId => "Workflow scope_id is conflicting.", - _ => "Workflow run dispatch failed.", - }; -} diff --git a/agents/Aevatar.GAgents.Scheduled/WorkflowAgentLegacyAliases.cs b/agents/Aevatar.GAgents.Scheduled/WorkflowAgentLegacyAliases.cs deleted file mode 100644 index 122d20ddb..000000000 --- a/agents/Aevatar.GAgents.Scheduled/WorkflowAgentLegacyAliases.cs +++ /dev/null @@ -1,57 +0,0 @@ -using Aevatar.Foundation.Abstractions.Compatibility; - -namespace Aevatar.GAgents.Scheduled; - -internal static class WorkflowAgentLegacyAliases -{ - private const string ProtoPrefix = "aevatar.gagents.channelruntime."; - private const string ClrPrefix = "Aevatar.GAgents.ChannelRuntime."; - - internal const string StateProto = ProtoPrefix + "WorkflowAgentState"; - internal const string InitializeCommandProto = ProtoPrefix + "InitializeWorkflowAgentCommand"; - internal const string InitializedEventProto = ProtoPrefix + "WorkflowAgentInitializedEvent"; - internal const string TriggerCommandProto = ProtoPrefix + "TriggerWorkflowAgentExecutionCommand"; - internal const string NextRunScheduledEventProto = ProtoPrefix + "WorkflowAgentNextRunScheduledEvent"; - internal const string ExecutionDispatchedEventProto = ProtoPrefix + "WorkflowAgentExecutionDispatchedEvent"; - internal const string ExecutionFailedEventProto = ProtoPrefix + "WorkflowAgentExecutionFailedEvent"; - internal const string DisableCommandProto = ProtoPrefix + "DisableWorkflowAgentCommand"; - internal const string EnableCommandProto = ProtoPrefix + "EnableWorkflowAgentCommand"; - internal const string DisabledEventProto = ProtoPrefix + "WorkflowAgentDisabledEvent"; - internal const string EnabledEventProto = ProtoPrefix + "WorkflowAgentEnabledEvent"; - - internal const string StateClr = ClrPrefix + "WorkflowAgentState"; -} - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.StateProto)] -[LegacyClrTypeName(WorkflowAgentLegacyAliases.StateClr)] -public sealed partial class WorkflowAgentState; - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.InitializeCommandProto)] -public sealed partial class InitializeWorkflowAgentCommand; - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.InitializedEventProto)] -public sealed partial class WorkflowAgentInitializedEvent; - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.TriggerCommandProto)] -public sealed partial class TriggerWorkflowAgentExecutionCommand; - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.NextRunScheduledEventProto)] -public sealed partial class WorkflowAgentNextRunScheduledEvent; - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.ExecutionDispatchedEventProto)] -public sealed partial class WorkflowAgentExecutionDispatchedEvent; - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.ExecutionFailedEventProto)] -public sealed partial class WorkflowAgentExecutionFailedEvent; - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.DisableCommandProto)] -public sealed partial class DisableWorkflowAgentCommand; - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.EnableCommandProto)] -public sealed partial class EnableWorkflowAgentCommand; - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.DisabledEventProto)] -public sealed partial class WorkflowAgentDisabledEvent; - -[LegacyProtoFullName(WorkflowAgentLegacyAliases.EnabledEventProto)] -public sealed partial class WorkflowAgentEnabledEvent; diff --git a/agents/Aevatar.GAgents.Scheduled/WorkflowAgentState.Partial.cs b/agents/Aevatar.GAgents.Scheduled/WorkflowAgentState.Partial.cs deleted file mode 100644 index f6bace9a3..000000000 --- a/agents/Aevatar.GAgents.Scheduled/WorkflowAgentState.Partial.cs +++ /dev/null @@ -1,17 +0,0 @@ -using Aevatar.GAgents.Channel.Abstractions; - -namespace Aevatar.GAgents.Scheduled; - -public sealed partial class WorkflowAgentState : ISchedulable -{ - /// - ScheduleState ISchedulable.Schedule => new() - { - Enabled = Enabled, - Cron = ScheduleCron ?? string.Empty, - Timezone = ScheduleTimezone ?? string.Empty, - NextRunAt = NextRunAt, - LastRunAt = LastRunAt, - ErrorCount = ErrorCount, - }; -} diff --git a/agents/Aevatar.GAgents.Scheduled/WorkflowModules/ScheduledWorkflowModulePack.cs b/agents/Aevatar.GAgents.Scheduled/WorkflowModules/ScheduledWorkflowModulePack.cs deleted file mode 100644 index 044cfc275..000000000 --- a/agents/Aevatar.GAgents.Scheduled/WorkflowModules/ScheduledWorkflowModulePack.cs +++ /dev/null @@ -1,28 +0,0 @@ -using Aevatar.Workflow.Core; -using Aevatar.Workflow.Core.Composition; - -namespace Aevatar.GAgents.Scheduled.WorkflowModules; - -/// -/// Workflow module pack contributed by the scheduled-agent package — currently registers -/// for the social_media template's -/// twitter_publish step (issue aevatarAI/aevatar#216). Lives next to its dependencies -/// (NyxIdApiClient, ChannelMetadataKeys, LarkProxyResponse) instead of in -/// Aevatar.Workflow.Core so the generic workflow runtime stays free of channel-specific -/// compile-time coupling. -/// -public sealed class ScheduledWorkflowModulePack : IWorkflowModulePack -{ - private static readonly IReadOnlyList ModuleRegistrations = - [ - WorkflowModuleRegistration.Create("twitter_publish"), - ]; - - public string Name => "scheduled.workflow"; - - public IReadOnlyList Modules => ModuleRegistrations; - - public IReadOnlyList DependencyExpanders => []; - - public IReadOnlyList Configurators => []; -} diff --git a/agents/Aevatar.GAgents.Scheduled/WorkflowModules/ServiceCollectionExtensions.cs b/agents/Aevatar.GAgents.Scheduled/WorkflowModules/ServiceCollectionExtensions.cs deleted file mode 100644 index 36b364cd1..000000000 --- a/agents/Aevatar.GAgents.Scheduled/WorkflowModules/ServiceCollectionExtensions.cs +++ /dev/null @@ -1,21 +0,0 @@ -using Aevatar.Workflow.Core; -using Microsoft.Extensions.DependencyInjection; - -namespace Aevatar.GAgents.Scheduled.WorkflowModules; - -/// -/// DI extension to register the scheduled-agent workflow module pack. Hosts that compose -/// the social_media template's execution should call this so the twitter_publish -/// step type resolves at workflow run time. -/// -public static class ScheduledWorkflowModuleServiceCollectionExtensions -{ - /// - /// Registers alongside any other module - /// packs already added to the workflow runtime. Idempotent — uses - /// TryAddEnumerable via - /// . - /// - public static IServiceCollection AddScheduledWorkflowExtensions(this IServiceCollection services) => - services.AddWorkflowModulePack(); -} diff --git a/agents/Aevatar.GAgents.Scheduled/WorkflowModules/TwitterPublishModule.cs b/agents/Aevatar.GAgents.Scheduled/WorkflowModules/TwitterPublishModule.cs deleted file mode 100644 index db0e30fbd..000000000 --- a/agents/Aevatar.GAgents.Scheduled/WorkflowModules/TwitterPublishModule.cs +++ /dev/null @@ -1,556 +0,0 @@ -// ───────────────────────────────────────────────────────────── -// TwitterPublishModule — 把 social_media 模板批准后的内容发布到 X (Twitter) -// 通过 NyxID `api-twitter` 代理调用 POST /tweets,结果同步回 Lark。 -// 见 issue aevatarAI/aevatar#216 — 接续 #418 的 PreflightTwitterProxyAsync。 -// ───────────────────────────────────────────────────────────── - -using System.Net; -using System.Text.Json; -using Aevatar.AI.Abstractions.LLMProviders; -using Aevatar.AI.ToolProviders.NyxId; -using Aevatar.Foundation.Abstractions; -using Aevatar.Foundation.Abstractions.EventModules; -using Aevatar.GAgents.Channel.Runtime; -using Aevatar.GAgents.Platform.Lark; -using Aevatar.Workflow.Abstractions; -using Aevatar.Workflow.Abstractions.Execution; -using Aevatar.Workflow.Core.Execution; -using Aevatar.Workflow.Core.Primitives; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; - -namespace Aevatar.GAgents.Scheduled.WorkflowModules; - -/// -/// Twitter (X) 发布模块。处理 step_type == "twitter_publish"。 -/// 用 social_media agent 在 NyxID 中预先 mint 的 api-key 调 api-twitter 代理把已批准 -/// 的草稿发布到 Twitter,并把结果(推文 URL 或分类好的错误文案)回写到原始 Lark 会话。 -/// -/// -/// 与 LLM/工具调用路径不同——发布是确定性的:批准的内容直接进入 POST /tweets(NyxID 的 -/// api-twitter 代理 base_url 已含 /2,不能再前缀 /2/,详见 -/// NyxIdServiceApiHints.cs),没有模型重写余地。把这一段建在工作流 module 而不是 LLM -/// step 里也更可重入:模型偶尔丢工具调用、或返回非结构化文本,但发布行为必须严格 1:1。 -/// -public sealed class TwitterPublishModule : IEventModule -{ - public string Name => "twitter_publish"; - public int Priority => 5; - - public bool CanHandle(EventEnvelope envelope) => - envelope.Payload?.Is(StepRequestEvent.Descriptor) == true; - - public async Task HandleAsync(EventEnvelope envelope, IWorkflowExecutionContext ctx, CancellationToken ct) - { - var request = envelope.Payload!.Unpack(); - if (request.StepType != "twitter_publish") return; - - var content = (request.Input ?? string.Empty).Trim(); - if (string.IsNullOrEmpty(content)) - { - await PublishFailureAsync( - ctx, - request, - code: "twitter_publish_empty_content", - message: "Approved content was empty; nothing to publish.", - logger: ctx.Logger, - ct); - return; - } - - var nyxClient = ctx.Services.GetService(); - if (nyxClient is null) - { - await PublishFailureAsync( - ctx, - request, - code: "twitter_publish_client_missing", - message: "NyxIdApiClient is not registered; cannot publish.", - logger: ctx.Logger, - ct); - return; - } - - if (!WorkflowExecutionItemsAccess.TryGetItem( - ctx, - LLMRequestMetadataKeys.NyxIdAccessToken, - out var apiKeyValue) || - string.IsNullOrWhiteSpace(apiKeyValue)) - { - await PublishFailureAsync( - ctx, - request, - code: "twitter_publish_api_key_missing", - message: "Workflow execution context did not carry a NyxID api-key. Re-create the agent so the new outbound config propagates.", - logger: ctx.Logger, - ct); - return; - } - - var requestMetadata = new Dictionary(StringComparer.Ordinal); - WorkflowRequestMetadataItemsAccess.CopyRequestMetadata(ctx, requestMetadata); - - var publishSlug = WorkflowParameterValueParser.GetString( - request.Parameters, - "api-twitter", - "publish_provider_slug", - "nyx_publish_provider_slug", - "publish_slug"); - - var deliveryTargetId = WorkflowParameterValueParser.GetString( - request.Parameters, - string.Empty, - "delivery_target_id"); - - // Twitter v2 endpoint requires `text` payload only for plain-text posts (#216 v1 scope: - // no media, no thread, no poll). Body is JSON, content-type is set by NyxIdApiClient. - // - // Idempotency caveat (PR #461 review item #1): Twitter v2 `POST /tweets` has no - // server-side dedup. If this step is retried (e.g. via a `retry` policy on the YAML, or - // a workflow restart that replays an in-flight `StepRequestEvent`), the same content - // will be posted twice. The social_media template intentionally does NOT define a - // `retry` policy on this step, and the `on_error: skip` policy advances to `done` - // rather than retrying. Authors customizing the YAML should keep this invariant — do - // not add `retry: { max_attempts: > 1 }` here without first wiring a client-side dedup - // key (e.g. hashing run_id+step_id+content into a NyxID-side request idempotency - // header) or accepting duplicate posts as a known risk. - var tweetBody = JsonSerializer.Serialize(new { text = content }); - - string proxyResponse; - try - { - // PR #461 review (commit 781c5bda follow-up): NyxID's `api-twitter` provider seed - // sets `base_url: "https://api.x.com/2"` (provider_service.rs:1728) — the API - // version is already baked into the base URL. Adding `/2/` to the path here would - // produce `https://api.x.com/2/2/tweets` and 404 every publish call in production. - // Mirror what the preflight does (`/users/me`, AgentBuilderTool.cs:1877): use the - // bare resource path. NyxIdServiceApiHints.cs:58 documents this invariant. - proxyResponse = await nyxClient.ProxyRequestAsync( - apiKeyValue!, - publishSlug, - "/tweets", - "POST", - tweetBody, - extraHeaders: null, - ct); - } - catch (Exception ex) - { - ctx.Logger.LogWarning( - ex, - "TwitterPublish: run={RunId} step={StepId} unhandled exception while calling api-twitter", - request.RunId, - request.StepId); - await PublishFailureAsync( - ctx, - request, - code: "twitter_publish_transport_error", - message: $"NyxID proxy transport error: {ex.Message}", - logger: ctx.Logger, - ct); - await TrySendLarkAsync( - nyxClient, - requestMetadata, - apiKeyValue!, - deliveryTargetId, - $"Twitter 发布失败(网络错误):{ex.Message}", - ctx.Logger, - ct); - return; - } - - var outcome = ClassifyTwitterResponse(proxyResponse); - - if (outcome.Success && !string.IsNullOrEmpty(outcome.TweetUrl)) - { - ctx.Logger.LogInformation( - "TwitterPublish: run={RunId} step={StepId} published tweet={TweetUrl}", - request.RunId, - request.StepId, - outcome.TweetUrl); - - var successMessage = $"已发布: {outcome.TweetUrl}"; - await TrySendLarkAsync( - nyxClient, - requestMetadata, - apiKeyValue!, - deliveryTargetId, - successMessage, - ctx.Logger, - ct); - - var completed = new StepCompletedEvent - { - StepId = request.StepId, - RunId = request.RunId, - Success = true, - Output = outcome.TweetUrl!, - }; - await ctx.PublishAsync(completed, TopologyAudience.Self, ct); - return; - } - - ctx.Logger.LogWarning( - "TwitterPublish: run={RunId} step={StepId} publish failed code={Code} status={Status} detail={Detail}", - request.RunId, - request.StepId, - outcome.ErrorCode, - outcome.HttpStatus, - outcome.Detail); - - await TrySendLarkAsync( - nyxClient, - requestMetadata, - apiKeyValue!, - deliveryTargetId, - outcome.LarkMessage, - ctx.Logger, - ct); - - await PublishFailureAsync( - ctx, - request, - code: outcome.ErrorCode, - message: outcome.Detail, - logger: ctx.Logger, - ct); - } - - private static Task PublishFailureAsync( - IWorkflowExecutionContext ctx, - StepRequestEvent request, - string code, - string message, - ILogger logger, - CancellationToken ct) - { - // The social_media template's `publish_to_twitter` step routes its failure into the - // `done` terminal so the run finishes cleanly even if Twitter rejected the post — - // the failure is surfaced to Lark independently. Mark Success=false so callers / - // observability see the failed publish, but emit the error string verbatim so the - // workflow output preserves the categorized code. - var failed = new StepCompletedEvent - { - StepId = request.StepId, - RunId = request.RunId, - Success = false, - Output = $"{code}: {message}", - Error = $"{code}: {message}", - }; - return ctx.PublishAsync(failed, TopologyAudience.Self, ct); - } - - /// - /// Surfaces a status message back to the originating Lark conversation via the same NyxID - /// api-key used to publish the tweet. Best-effort: a Lark delivery failure must never abort - /// the workflow's own bookkeeping (which is what publishes StepCompletedEvent). - /// - /// - /// PR #461 review item #5: this method depends on the api-key carrying both the - /// api-twitter AND the Lark proxy slug (e.g. api-lark-bot) entitlements at - /// mint time — see CreateSocialMediaAgentAsync in AgentBuilderTool.cs, which - /// resolves both slugs through ResolveProxyServiceIdsAsync before - /// CreateApiKeyAsync. If a future change narrows the api-key to only Twitter, the - /// Lark surfacing here will silently 403 — keep the dual-scope mint contract in lock-step - /// with this method, or pass a dedicated Lark api-key through metadata. - /// - private static async Task TrySendLarkAsync( - NyxIdApiClient nyxClient, - IReadOnlyDictionary requestMetadata, - string apiKey, - string fallbackReceiveId, - string text, - ILogger logger, - CancellationToken ct) - { - if (string.IsNullOrWhiteSpace(text)) - return; - - var receiveId = TryGet(requestMetadata, ChannelMetadataKeys.LarkReceiveId); - var receiveIdType = TryGet(requestMetadata, ChannelMetadataKeys.LarkReceiveIdType); - var larkSlug = TryGet(requestMetadata, ChannelMetadataKeys.LarkOutboundProxySlug) ?? "api-lark-bot"; - - // Fallback: when the workflow agent's outbound metadata is unavailable, treat the - // step's `delivery_target_id` (which is the agent_id, i.e. the Lark receive_id under - // open_id naming for p2p chats) as a best-effort target. - if (string.IsNullOrWhiteSpace(receiveId)) - { - receiveId = fallbackReceiveId; - receiveIdType = string.IsNullOrWhiteSpace(receiveIdType) ? "open_id" : receiveIdType; - } - - if (string.IsNullOrWhiteSpace(receiveId) || string.IsNullOrWhiteSpace(receiveIdType)) - { - logger.LogWarning( - "TwitterPublish: skipping Lark surfacing — outbound delivery target metadata missing (receive_id/type empty)."); - return; - } - - try - { - var body = JsonSerializer.Serialize(new - { - receive_id = receiveId, - msg_type = "text", - content = JsonSerializer.Serialize(new { text }), - }); - - var response = await nyxClient.ProxyRequestAsync( - apiKey, - larkSlug, - $"open-apis/im/v1/messages?receive_id_type={receiveIdType}", - "POST", - body, - extraHeaders: null, - ct); - - if (LarkProxyResponse.TryGetError(response, out var larkCode, out var detail)) - { - logger.LogWarning( - "TwitterPublish: Lark surfacing rejected (code={Code}): {Detail}", - larkCode, - detail); - } - } - catch (Exception ex) - { - // Lark surfacing is best-effort: a failure here must not abort the workflow's - // own bookkeeping (which is what publishes StepCompletedEvent). Log and move on. - logger.LogWarning(ex, "TwitterPublish: Lark surfacing threw"); - } - } - - private static string? TryGet(IReadOnlyDictionary map, string key) - { - if (!map.TryGetValue(key, out var value)) - return null; - return string.IsNullOrWhiteSpace(value) ? null : value; - } - - /// - /// Classifies a NyxID proxy response from POST /api/v1/proxy/s/api-twitter/tweets - /// (NyxID's api-twitter base already includes /2, so the path is - /// /tweets, not /2/tweets — see the HandleAsync call site comment) - /// into a publish outcome. Three shapes are recognized: - /// - /// Twitter 2xx success: { "data": { "id": "<tweet-id>" } } (NyxID forwards - /// the body verbatim). - /// NyxID-wrapped non-2xx: { "error": true, "status": <http>, "body": - /// "<raw downstream body>" } (NyxIdApiClient.cs:680). - /// Twitter v2 native error: { "errors": [ { "message": "...", "code": ... } ], - /// "title": "...", "detail": "..." } — Twitter sometimes returns 4xx with this shape - /// at the top level (PR #461 review item #2). NyxID forwards verbatim, so we parse it as - /// a fallback when neither data.id nor the NyxID-wrapped envelope is present. - /// - /// - internal static TwitterPublishOutcome ClassifyTwitterResponse(string? response) - { - if (string.IsNullOrWhiteSpace(response)) - { - return TwitterPublishOutcome.Failure( - "twitter_publish_empty_response", - "NyxID proxy returned an empty response.", - httpStatus: 0, - larkMessage: "Twitter 发布失败:NyxID 代理返回空响应"); - } - - try - { - using var doc = JsonDocument.Parse(response); - var root = doc.RootElement; - if (root.ValueKind != JsonValueKind.Object) - { - return TwitterPublishOutcome.Failure( - "twitter_publish_unexpected_shape", - "Response root was not a JSON object.", - httpStatus: 0, - larkMessage: "Twitter 发布失败:响应格式异常"); - } - - var hasErrorFlag = root.TryGetProperty("error", out var errorProp) && - (errorProp.ValueKind == JsonValueKind.True || - errorProp.ValueKind == JsonValueKind.String); - - // Success path: Twitter returns `{ "data": { "id": "...", "text": "..." } }`. NyxID - // forwards 2xx bodies verbatim, so the absence of an `error` field combined with a - // present `data.id` is the success signal. - if (!hasErrorFlag && - root.TryGetProperty("data", out var dataProp) && - dataProp.ValueKind == JsonValueKind.Object && - dataProp.TryGetProperty("id", out var idProp) && - idProp.ValueKind == JsonValueKind.String && - !string.IsNullOrWhiteSpace(idProp.GetString())) - { - var tweetId = idProp.GetString()!; - // Twitter accepts `https://x.com/i/web/status/` without a handle; resolves - // to the canonical `/status/` URL after redirect. The issue calls - // for a `users/me` lookup to resolve the handle, but that's an extra round-trip - // that can also 401 (and we already have a tweet id at this point). Fall back - // to the no-handle URL — the user always lands on the right tweet either way. - return TwitterPublishOutcome.Successful($"https://x.com/i/web/status/{tweetId}"); - } - - // Failure path A: NyxID wraps non-2xx as { error: true, status: , body: }. - if (hasErrorFlag) - { - var nyxStatus = TryReadInt32(root, "status") ?? TryReadInt32(root, "code") ?? 0; - var nyxDetail = TryReadString(root, "message") ?? TryReadString(root, "body") ?? "Twitter publish failed"; - var nyxBody = TryReadString(root, "body"); - return ClassifyByStatus(nyxStatus, nyxDetail, nyxBody); - } - - // Failure path B (PR #461 review item #2): Twitter v2 native error shape, forwarded - // by NyxID without a wrap envelope. Common for content-policy and duplicate-tweet - // rejections, e.g. `{"title":"Conflict","detail":"...","errors":[{"message":"...", - // "code":187}]}`. We don't have an HTTP status here (NyxID swallowed it), so the - // classification falls through to a generic `twitter_publish_rejected`, but we - // surface the rich Twitter error text so users can read the actual reason. - if (TryParseTwitterNativeError(root, out var nativeOutcome)) - return nativeOutcome; - - return TwitterPublishOutcome.Failure( - "twitter_publish_unexpected_shape", - "Response did not match success, NyxID-wrapped, or Twitter-native error shapes.", - httpStatus: 0, - larkMessage: "Twitter 发布失败:响应格式异常,请联系 ops 检查 NyxID 代理日志。"); - } - catch (JsonException) - { - return TwitterPublishOutcome.Failure( - "twitter_publish_unparseable_response", - "NyxID proxy returned a non-JSON response.", - httpStatus: 0, - larkMessage: "Twitter 发布失败:响应不是合法 JSON"); - } - } - - /// - /// Parses a Twitter v2 native error shape (no NyxID wrap envelope). Twitter returns these - /// at the top level for some 4xx rejections (content-policy violations, duplicate tweets, - /// permission issues): { "title": "...", "detail": "...", "errors": [ { "message": - /// "...", "code": 187 } ] }. Returns false when the shape doesn't match so the caller - /// can fall through to the unexpected-shape branch. - /// - private static bool TryParseTwitterNativeError(JsonElement root, out TwitterPublishOutcome outcome) - { - outcome = default; - if (!root.TryGetProperty("errors", out var errorsProp) || - errorsProp.ValueKind != JsonValueKind.Array || - errorsProp.GetArrayLength() == 0) - { - // Sometimes Twitter omits the `errors` array but still returns `title`/`detail` - // directly (Problem Details RFC 7807 — what Twitter v2 calls `tweet_create_error`). - // Treat that as a native error too. - var detailText = TryReadString(root, "detail"); - var titleText = TryReadString(root, "title"); - if (string.IsNullOrEmpty(detailText) && string.IsNullOrEmpty(titleText)) - return false; - - var combined = string.IsNullOrEmpty(detailText) ? titleText! : detailText!; - outcome = TwitterPublishOutcome.Failure( - "twitter_publish_rejected", - combined, - httpStatus: 0, - larkMessage: $"Twitter 发布失败:{combined}"); - return true; - } - - var firstError = errorsProp[0]; - var message = TryReadString(firstError, "message") - ?? TryReadString(root, "detail") - ?? TryReadString(root, "title") - ?? "Twitter rejected the publish request."; - var twitterCode = TryReadInt32(firstError, "code"); - var detailWithCode = twitterCode is { } c - ? $"{message} (twitter code={c})" - : message; - - outcome = TwitterPublishOutcome.Failure( - "twitter_publish_rejected", - detailWithCode, - httpStatus: 0, - larkMessage: $"Twitter 发布失败:{detailWithCode}"); - return true; - } - - private static TwitterPublishOutcome ClassifyByStatus(int status, string detail, string? rawBody) - { - // Categorization matches issue #216's surfacing matrix: - // 201 → success (handled in caller) - // 401 → OAuth expired/missing — actionable, no retry - // 403 → scope downgraded or seed misconfig — actionable, no retry - // 429 → rate-limited — could retry, but #216 v1 scope says fail with hint - // 5xx → upstream/proxy fault — could retry; v1 scope: fail with hint - // 4xx other → unknown rejection — surface verbatim so user can debug - return status switch - { - (int)HttpStatusCode.Unauthorized => TwitterPublishOutcome.Failure( - "twitter_oauth_required", - detail, - status, - "Twitter OAuth 过期或未授权,请到 NyxID 重新授权 Twitter(providers/twitter)后再试。"), - (int)HttpStatusCode.Forbidden => TwitterPublishOutcome.Failure( - "twitter_proxy_access_denied", - detail, - status, - "Twitter 拒绝发布(403):scope 不足或推文内容被策略拦截。请联系 ops 检查 tweet.write scope。"), - (int)HttpStatusCode.TooManyRequests => TwitterPublishOutcome.Failure( - "twitter_rate_limited", - detail, - status, - "Twitter 发布命中速率限制(429),请稍后重试。"), - >= 500 and <= 599 => TwitterPublishOutcome.Failure( - "twitter_upstream_error", - detail, - status, - $"Twitter 上游服务异常(HTTP {status}),请稍后重试。"), - _ => TwitterPublishOutcome.Failure( - "twitter_publish_rejected", - detail, - status, - BuildGenericFailureMessage(status, detail, rawBody)), - }; - } - - private static string BuildGenericFailureMessage(int status, string detail, string? rawBody) - { - var truncated = rawBody is { Length: > 200 } ? rawBody.Substring(0, 200) + "…" : rawBody; - return string.IsNullOrEmpty(truncated) - ? $"Twitter 发布失败(HTTP {status}):{detail}" - : $"Twitter 发布失败(HTTP {status}):{detail}(body: {truncated})"; - } - - private static int? TryReadInt32(JsonElement element, string propertyName) - { - if (!element.TryGetProperty(propertyName, out var prop) || - prop.ValueKind != JsonValueKind.Number || - !prop.TryGetInt32(out var value)) - { - return null; - } - return value; - } - - private static string? TryReadString(JsonElement element, string propertyName) - { - if (!element.TryGetProperty(propertyName, out var prop) || prop.ValueKind != JsonValueKind.String) - return null; - var raw = prop.GetString(); - return string.IsNullOrWhiteSpace(raw) ? null : raw; - } -} - -internal readonly record struct TwitterPublishOutcome( - bool Success, - string? TweetUrl, - string ErrorCode, - string Detail, - int HttpStatus, - string LarkMessage) -{ - public static TwitterPublishOutcome Successful(string tweetUrl) => - new(true, tweetUrl, string.Empty, string.Empty, 201, string.Empty); - - public static TwitterPublishOutcome Failure(string code, string detail, int httpStatus, string larkMessage) => - new(false, null, code, detail, httpStatus, larkMessage); -} diff --git a/agents/Aevatar.GAgents.Scheduled/protos/skill_runner.proto b/agents/Aevatar.GAgents.Scheduled/protos/skill_runner.proto index ad2d93e99..ac5522f50 100644 --- a/agents/Aevatar.GAgents.Scheduled/protos/skill_runner.proto +++ b/agents/Aevatar.GAgents.Scheduled/protos/skill_runner.proto @@ -67,6 +67,13 @@ message SkillRunnerState { optional int32 max_tokens = 18; optional int32 max_tool_rounds = 19; optional int32 max_history_messages = 20; + // When true, a run that completes with zero successful nyxid_proxy calls is + // treated as a failure (the LLM bypassed tools and produced output from prior + // context, which for fetch-and-summarize skills means the + // report was hallucinated). Issue #439 review follow-up — closes the gap left + // by the original safety net, which only fired when ≥1 nyxid_proxy call had + // failed. Skills that don't fan out to nyxid_proxy at all leave this false. + bool requires_nyxid_proxy_success = 21; } message InitializeSkillRunnerCommand { @@ -85,6 +92,8 @@ message InitializeSkillRunnerCommand { optional int32 max_tokens = 13; optional int32 max_tool_rounds = 14; optional int32 max_history_messages = 15; + // See SkillRunnerState.requires_nyxid_proxy_success for semantics. + bool requires_nyxid_proxy_success = 16; } message SkillRunnerInitializedEvent { @@ -103,6 +112,8 @@ message SkillRunnerInitializedEvent { optional int32 max_tokens = 13; optional int32 max_tool_rounds = 14; optional int32 max_history_messages = 15; + // See SkillRunnerState.requires_nyxid_proxy_success for semantics. + bool requires_nyxid_proxy_success = 16; } message TriggerSkillRunnerExecutionCommand { diff --git a/agents/Aevatar.GAgents.Scheduled/protos/workflow_agent.proto b/agents/Aevatar.GAgents.Scheduled/protos/workflow_agent.proto deleted file mode 100644 index 78dd58de8..000000000 --- a/agents/Aevatar.GAgents.Scheduled/protos/workflow_agent.proto +++ /dev/null @@ -1,135 +0,0 @@ -syntax = "proto3"; - -package aevatar.gagents.scheduled; - -option csharp_namespace = "Aevatar.GAgents.Scheduled"; - -import "google/protobuf/timestamp.proto"; -import "user_agent_catalog.proto"; - -// ─── Workflow Agent (persistent scheduled workflow trigger) ─── - -message WorkflowAgentState { - string workflow_id = 1; - string workflow_name = 2; - string workflow_actor_id = 3; - string execution_prompt = 4; - string schedule_cron = 5; - string schedule_timezone = 6; - string conversation_id = 7; - string nyx_provider_slug = 8; - string nyx_api_key = 9; - // Deprecated: superseded by owner_scope.nyx_user_id. Issue #466. - string owner_nyx_user_id = 10 [deprecated = true]; - string api_key_id = 11; - google.protobuf.Timestamp last_run_at = 12; - google.protobuf.Timestamp next_run_at = 13; - int32 error_count = 14; - string last_error = 15; - bool enabled = 16; - string scope_id = 17; - // Deprecated: superseded by owner_scope.platform. Issue #466. - string platform = 18 [deprecated = true]; - // See UserAgentCatalogEntry.lark_receive_id for semantics; copied verbatim - // into the catalog entry on UpsertRegistryAsync so downstream Lark senders - // (e.g. FeishuCardHumanInteractionPort) read the typed target. - string lark_receive_id = 19; - string lark_receive_id_type = 20; - // Secondary outbound delivery target. See UserAgentCatalogEntry - // .lark_receive_id_fallback for runtime fallback semantics. - string lark_receive_id_fallback = 21; - string lark_receive_id_type_fallback = 22; - // Caller scope captured at create time. Replaces owner_nyx_user_id+platform - // for new agents; the deprecated scattered fields remain for legacy state. - OwnerScope owner_scope = 23; -} - -message InitializeWorkflowAgentCommand { - string workflow_id = 1; - string workflow_name = 2; - string workflow_actor_id = 3; - string execution_prompt = 4; - string schedule_cron = 5; - string schedule_timezone = 6; - string conversation_id = 7; - string nyx_provider_slug = 8; - string nyx_api_key = 9; - // Deprecated: superseded by owner_scope.nyx_user_id. Issue #466. - string owner_nyx_user_id = 10 [deprecated = true]; - string api_key_id = 11; - bool enabled = 12; - string scope_id = 13; - // Deprecated: superseded by owner_scope.platform. Issue #466. - string platform = 14 [deprecated = true]; - string lark_receive_id = 15; - string lark_receive_id_type = 16; - // Secondary outbound delivery target. See UserAgentCatalogEntry - // .lark_receive_id_fallback for runtime fallback semantics. - string lark_receive_id_fallback = 17; - string lark_receive_id_type_fallback = 18; - // Caller scope captured at create time. Required for new commands. - OwnerScope owner_scope = 19; -} - -message WorkflowAgentInitializedEvent { - string workflow_id = 1; - string workflow_name = 2; - string workflow_actor_id = 3; - string execution_prompt = 4; - string schedule_cron = 5; - string schedule_timezone = 6; - string conversation_id = 7; - string nyx_provider_slug = 8; - string nyx_api_key = 9; - // Deprecated: superseded by owner_scope.nyx_user_id. Issue #466. - string owner_nyx_user_id = 10 [deprecated = true]; - string api_key_id = 11; - bool enabled = 12; - string scope_id = 13; - // Deprecated: superseded by owner_scope.platform. Issue #466. - string platform = 14 [deprecated = true]; - string lark_receive_id = 15; - string lark_receive_id_type = 16; - // Secondary outbound delivery target. See UserAgentCatalogEntry - // .lark_receive_id_fallback for runtime fallback semantics. - string lark_receive_id_fallback = 17; - string lark_receive_id_type_fallback = 18; - // Caller scope captured at create time. Replaces owner_nyx_user_id+platform. - OwnerScope owner_scope = 19; -} - -message TriggerWorkflowAgentExecutionCommand { - string reason = 1; - string revision_feedback = 2; -} - -message WorkflowAgentNextRunScheduledEvent { - google.protobuf.Timestamp next_run_at = 1; -} - -message WorkflowAgentExecutionDispatchedEvent { - google.protobuf.Timestamp dispatched_at = 1; - string workflow_run_actor_id = 2; - string command_id = 3; -} - -message WorkflowAgentExecutionFailedEvent { - google.protobuf.Timestamp failed_at = 1; - string error = 2; -} - -message DisableWorkflowAgentCommand { - string reason = 1; -} - -message EnableWorkflowAgentCommand { - string reason = 1; -} - -message WorkflowAgentDisabledEvent { - string reason = 1; -} - -message WorkflowAgentEnabledEvent { - string reason = 1; -} diff --git a/agents/channels/Aevatar.GAgents.Channel.NyxIdRelay/NyxIdRelayOptions.cs b/agents/channels/Aevatar.GAgents.Channel.NyxIdRelay/NyxIdRelayOptions.cs index ab5cee467..a3ff998f8 100644 --- a/agents/channels/Aevatar.GAgents.Channel.NyxIdRelay/NyxIdRelayOptions.cs +++ b/agents/channels/Aevatar.GAgents.Channel.NyxIdRelay/NyxIdRelayOptions.cs @@ -5,7 +5,15 @@ namespace Aevatar.GAgents.Channel.NyxIdRelay; /// public class NyxIdRelayOptions { - public int ResponseTimeoutSeconds { get; set; } = 120; + /// + /// Hard upper bound on a single LLM reply turn (LLM thinking + tool rounds + final + /// streaming dispatch). 300s gives margin for multi-step tool chains common in the + /// aevatar Lark bot flow — search a skill, hit a remote endpoint, summarize the result — + /// without letting a genuine hang pin the run actor turn forever. Set to 0 or + /// negative on a deployment that has its own watchdog and prefers no in-process cap; + /// see AgentRunGAgent.ResolveFallbackTimeout. + /// + public int ResponseTimeoutSeconds { get; set; } = 300; public int MaxBufferedResponseChars { get; set; } = 16 * 1024; @@ -44,6 +52,16 @@ public class NyxIdRelayOptions /// public int StreamingFlushIntervalMs { get; set; } = 750; + /// + /// Maximum number of interim (non-final) edit dispatches per turn. Lark refuses message + /// edits beyond a per-message cap (observed ~20 in mainnet, code 230072 + /// "The message has reached the number of times it can be edited"); once that cap is + /// reached, even the final edit is rejected and the user sees a truncated reply. Capping + /// interim chunks here leaves headroom so the final flush always lands. Long replies + /// freeze on the last interim until the final fires — that is preferable to truncation. + /// + public int StreamingMaxInterimChunks { get; set; } = 15; + /// /// Placeholder text emitted as the first streaming chunk before the LLM produces any delta. /// Guarantees a visible "working" state within the outbound RTT even when the LLM suffers @@ -51,4 +69,27 @@ public class NyxIdRelayOptions /// to disable and instead wait for the first real delta (slower time-to-first-visible). /// public string StreamingPlaceholderText { get; set; } = "…"; + + /// + /// Routes streaming replies through Lark CardKit 2.0 streaming cards instead of editing a + /// regular message in place. CardKit element-content updates are not subject to the per- + /// message edit cap (Lark code 230072) so long replies never need to freeze on the last + /// interim chunk. Defaults to true so the modern card path is the standard + /// behaviour for the aevatar Lark bot (Feishu console grants the bot + /// cardkit:card:read + cardkit:card:write). Deployments that have not been + /// granted those scopes are not stuck: watches for the + /// scope-error / rate-limit / table-limit responses returned by card.create and + /// transitions the turn to the legacy edit-message sink for the rest of the chunks (see + /// HandleLarkCardStreamingChunkCoreAsync's CreationFailed branch). Set this + /// to false on a deployment that wants to skip the create-card round-trip entirely + /// (e.g. environments that explicitly want the legacy path or do not run a Lark bot). + /// + public bool StreamingCardKitEnabled { get; set; } = true; + + /// + /// Minimum interval between CardKit element-content dispatches, in milliseconds. Defaults + /// to 200ms — well below the 750ms used by the edit-message path because CardKit accepts + /// far more updates per card than Lark's edit-message cap allows. + /// + public int StreamingCardKitFlushIntervalMs { get; set; } = 200; } diff --git a/agents/platforms/Aevatar.GAgents.Platform.Lark/LarkMessageComposer.cs b/agents/platforms/Aevatar.GAgents.Platform.Lark/LarkMessageComposer.cs index 19c5b9b2a..5f1bd4f39 100644 --- a/agents/platforms/Aevatar.GAgents.Platform.Lark/LarkMessageComposer.cs +++ b/agents/platforms/Aevatar.GAgents.Platform.Lark/LarkMessageComposer.cs @@ -6,6 +6,9 @@ namespace Aevatar.GAgents.Platform.Lark; public sealed class LarkMessageComposer : IMessageComposer { + public const int DefaultMaxMessageLength = 30_000; + private const string TruncationMarker = "\n\n...[truncated]"; + public static readonly ChannelCapabilities DefaultCapabilities = new() { SupportsEphemeral = false, @@ -14,7 +17,7 @@ public sealed class LarkMessageComposer : IMessageComposer SupportsThread = true, Streaming = StreamingSupport.Native, SupportsFiles = false, - MaxMessageLength = 2000, + MaxMessageLength = DefaultMaxMessageLength, SupportsActionButtons = true, SupportsConfirmDialog = false, SupportsModal = false, @@ -385,6 +388,11 @@ private static string Truncate(string? value, int maxLength) if (textInfo.LengthInTextElements <= maxLength) return text; - return textInfo.SubstringByTextElements(0, maxLength); + var markerInfo = new StringInfo(TruncationMarker); + var markerLength = markerInfo.LengthInTextElements; + if (maxLength <= markerLength) + return textInfo.SubstringByTextElements(0, maxLength); + + return textInfo.SubstringByTextElements(0, maxLength - markerLength) + TruncationMarker; } } diff --git a/agents/platforms/Aevatar.GAgents.Platform.Lark/LarkStreamingCardShell.cs b/agents/platforms/Aevatar.GAgents.Platform.Lark/LarkStreamingCardShell.cs new file mode 100644 index 000000000..3b1a1f6a9 --- /dev/null +++ b/agents/platforms/Aevatar.GAgents.Platform.Lark/LarkStreamingCardShell.cs @@ -0,0 +1,39 @@ +using System.Text.Json; + +namespace Aevatar.GAgents.Platform.Lark; + +/// +/// Builds the initial CardKit 2.0 card JSON used to seed a streaming card shell: a single +/// markdown element identified by elementId with empty content. Streaming text is +/// written via cardElement.content updates against the live card; this initial JSON only +/// declares the shell. Lives in the Lark platform project so the schema literal stays +/// inside the channel-card-literal guard's allowed boundary. +/// +public static class LarkStreamingCardShell +{ + private static readonly JsonSerializerOptions JsonOptions = new(); + + public static string BuildInitialCardJson(string streamingElementId) + { + ArgumentException.ThrowIfNullOrWhiteSpace(streamingElementId); + + var card = new + { + schema = "2.0", + config = new { streaming_mode = true }, + body = new + { + elements = new object[] + { + new + { + tag = "markdown", + element_id = streamingElementId, + content = string.Empty, + }, + }, + }, + }; + return JsonSerializer.Serialize(card, JsonOptions); + } +} diff --git a/apps/aevatar-console-web/src/pages/studio/components/StudioMemberInvokePanel.tsx b/apps/aevatar-console-web/src/pages/studio/components/StudioMemberInvokePanel.tsx index d968e6719..72a6b253d 100644 --- a/apps/aevatar-console-web/src/pages/studio/components/StudioMemberInvokePanel.tsx +++ b/apps/aevatar-console-web/src/pages/studio/components/StudioMemberInvokePanel.tsx @@ -378,6 +378,9 @@ const StudioMemberInvokePanel: React.FC = ({ ); const runIdLabel = trimOptional(invokeResult.runId) || '尚未开始'; const commandIdLabel = trimOptional(invokeResult.commandId) || '尚未发出'; + const actorIdLabel = + trimOptional(invokeResult.actorId) || currentMemberActorId || '尚未分配'; + const memberIdLabel = normalizedMemberId || '未选中成员'; const endpointLabel = selectedEndpoint?.displayName || selectedEndpointId || '—'; useEffect(() => { @@ -1028,7 +1031,8 @@ const StudioMemberInvokePanel: React.FC = ({ ]); const handleOpenRuns = useCallback(() => { - if (!scopeId || !normalizedMemberId || !selectedEndpoint) { + const currentRunId = trimOptional(invokeResult.runId); + if (!scopeId || !normalizedMemberId || !selectedEndpoint || !currentRunId) { return; } @@ -1066,6 +1070,7 @@ const StudioMemberInvokePanel: React.FC = ({ payloadTypeUrl: currentPayloadTypeUrl || undefined, prompt: currentPrompt || undefined, returnTo: returnTo || undefined, + runId: currentRunId, scopeId, serviceId: selectedService?.serviceId, }), @@ -1155,6 +1160,18 @@ const StudioMemberInvokePanel: React.FC = ({ {commandIdLabel} +
+
Actor ID
+
+ {actorIdLabel} +
+
+
+
Member ID
+
+ {memberIdLabel} +
+
Elapsed
{runElapsedLabel}
@@ -1185,7 +1202,7 @@ const StudioMemberInvokePanel: React.FC = ({ effectiveResponseTypeUrl={effectiveResponseTypeUrl} endpointKind={selectedEndpoint?.kind || 'command'} formError={formError} - hasOpenRunsTarget={Boolean(scopeId && selectedEndpoint)} + hasOpenRunsTarget={Boolean(trimOptional(invokeResult.runId))} invokeStatus={invokeResult.status} isChatEndpoint={isChatEndpoint} layout="dock" diff --git a/apps/aevatar-console-web/src/pages/studio/components/bind/StudioMemberBindPanel.test.tsx b/apps/aevatar-console-web/src/pages/studio/components/bind/StudioMemberBindPanel.test.tsx index e4b53cf40..36c422e89 100644 --- a/apps/aevatar-console-web/src/pages/studio/components/bind/StudioMemberBindPanel.test.tsx +++ b/apps/aevatar-console-web/src/pages/studio/components/bind/StudioMemberBindPanel.test.tsx @@ -284,7 +284,8 @@ describe('StudioMemberBindPanel', () => { expect(screen.getByTestId('studio-bind-smoke-test-section')).toBeTruthy(); expect(screen.getByTestId('studio-bind-snippet-section')).toBeTruthy(); expect(screen.getByTestId('studio-bind-supporting-section')).toBeTruthy(); - fireEvent.click(screen.getByText('Published contract source')); + expect(screen.getByText('Current member publication')).toBeTruthy(); + fireEvent.click(screen.getByText('Contract details')); expect(await screen.findByText('Published service')).toBeTruthy(); expect(primaryGrid.contains(screen.getByText('Published service'))).toBe(false); expect(screen.queryByText('Binding Contract')).toBeNull(); diff --git a/apps/aevatar-console-web/src/pages/studio/components/bind/StudioMemberBindPanel.tsx b/apps/aevatar-console-web/src/pages/studio/components/bind/StudioMemberBindPanel.tsx index bf4aa46f0..a5f0448d9 100644 --- a/apps/aevatar-console-web/src/pages/studio/components/bind/StudioMemberBindPanel.tsx +++ b/apps/aevatar-console-web/src/pages/studio/components/bind/StudioMemberBindPanel.tsx @@ -5,7 +5,7 @@ import { LinkOutlined, } from '@ant-design/icons'; import { useQuery } from '@tanstack/react-query'; -import { Alert, Button, Collapse, Empty, Input, Select, Space, Tag, Typography, message } from 'antd'; +import { Alert, Button, Collapse, Empty, Input, Space, Tag, Typography, message } from 'antd'; import React, { useCallback, useEffect, useMemo, useState } from 'react'; import { applyRuntimeEvent, @@ -249,9 +249,31 @@ const sourceControlStackStyle: React.CSSProperties = { minWidth: 0, }; -const sourceControlSelectStyle: React.CSSProperties = { - height: 58, - width: '100%', +const endpointChoiceRowStyle: React.CSSProperties = { + display: 'flex', + flexWrap: 'wrap', + gap: 6, +}; + +const endpointChoiceButtonStyle: React.CSSProperties = { + alignItems: 'center', + background: '#ffffff', + border: '1px solid #d9e2ef', + borderRadius: 999, + color: '#334155', + cursor: 'pointer', + display: 'inline-flex', + fontSize: 12, + fontWeight: 700, + minHeight: 30, + padding: '0 10px', +}; + +const endpointChoiceButtonActiveStyle: React.CSSProperties = { + ...endpointChoiceButtonStyle, + background: '#111827', + border: '1px solid #111827', + color: '#ffffff', }; const parameterGridStyle: React.CSSProperties = { @@ -726,24 +748,6 @@ const StudioMemberBindPanel: React.FC = ({ smokeInput, ]); - const serviceOptions = useMemo( - () => - services.map((service) => ({ - label: service.displayName || service.serviceId, - value: service.serviceId, - })), - [services], - ); - - const endpointOptions = useMemo( - () => - (selectedService?.endpoints ?? []).map((endpoint) => ({ - label: endpoint.displayName || endpoint.endpointId, - value: endpoint.endpointId, - })), - [selectedService?.endpoints], - ); - const snippetMap = useMemo(() => { if (!bindContract) { return { @@ -763,7 +767,6 @@ const StudioMemberBindPanel: React.FC = ({ const selectedSnippet = snippetMap[snippetTab]; const bindingCatalog: ScopeServiceBindingCatalogSnapshot | undefined = bindingsQuery.data; const bindingList = bindingCatalog?.bindings ?? []; - const hasMultiplePublishedServices = services.length > 1; const revisionList = revisionCatalogQuery.data?.revisions ?? []; const hasEndpointOptions = Boolean(selectedService?.endpoints.length); const endpointUnavailableMessage = @@ -949,12 +952,12 @@ const StudioMemberBindPanel: React.FC = ({ - {bindContract ? 'contract selected' : 'needs endpoint'} + {bindContract ? 'member contract selected' : 'needs endpoint'} {revisionList.length > 0 ? ( revisions · {revisionList.length} @@ -988,46 +991,50 @@ const StudioMemberBindPanel: React.FC = ({
- Published service - {hasMultiplePublishedServices ? ( - setSelectedEndpointId(String(value || ''))} - /> -
{endpointUnavailableMessage ? ( = ({ label: 'Contract details', children: bindContract ? (
+
+ Published service + + {bindContract.serviceId} + + + Platform diagnostic id for this member contract. + +
Workspace ID diff --git a/apps/aevatar-console-web/src/pages/studio/index.test.tsx b/apps/aevatar-console-web/src/pages/studio/index.test.tsx index 16b888129..3e3a3eb98 100644 --- a/apps/aevatar-console-web/src/pages/studio/index.test.tsx +++ b/apps/aevatar-console-web/src/pages/studio/index.test.tsx @@ -3202,6 +3202,21 @@ describe("StudioPage", () => { }); it("strips legacy label params while preserving stable scope and member ids", async () => { + mockStudioMembers = [ + { + memberId: "member-alpha", + scopeId: "scope-a", + displayName: "成员 Alpha", + description: "Legacy service mapped member", + implementationKind: "workflow", + lifecycleStage: "bind_ready", + publishedServiceId: "service-alpha", + lastBoundRevisionId: "rev-alpha", + createdAt: "2026-04-27T08:00:00Z", + updatedAt: "2026-04-27T08:05:00Z", + }, + ]; + renderStudioPage( "/studio?scopeId=scope-a&scopeLabel=%E5%9B%A2%E9%98%9F+A&memberId=service-alpha&memberLabel=%E6%88%90%E5%91%98+Alpha&focus=workflow%3Aworkflow-1&tab=studio" ); @@ -3220,7 +3235,7 @@ describe("StudioPage", () => { const searchParams = new URLSearchParams(window.location.search); expect(searchParams.get("scopeId")).toBe("scope-a"); - expect(searchParams.get("member")).toBe("member:service-alpha"); + expect(searchParams.get("member")).toBe("member:member-alpha"); expect(searchParams.get("memberId")).toBeNull(); expect(searchParams.get("scopeLabel")).toBeNull(); expect(searchParams.get("memberLabel")).toBeNull(); @@ -3228,7 +3243,54 @@ describe("StudioPage", () => { expect(searchParams.get("tab")).toBe("studio"); }); + it("canonicalizes a legacy service member link to the real backend member identity", async () => { + renderStudioPage( + "/studio?scopeId=scope-1&memberId=default&step=invoke&tab=invoke" + ); + + expect(await screen.findByTestId("studio-invoke-surface")).toBeTruthy(); + await waitFor(() => { + expect(screen.getByText("member:workspace-demo")).toBeTruthy(); + expect(screen.getByText("service:default")).toBeTruthy(); + }); + + const searchParams = new URLSearchParams(window.location.search); + expect(searchParams.get("scopeId")).toBe("scope-1"); + expect(searchParams.get("member")).toBe("member:workspace-demo"); + expect(searchParams.get("memberId")).toBeNull(); + expect(searchParams.get("step")).toBe("invoke"); + expect(searchParams.get("tab")).toBe("invoke"); + expect(studioApi.getMember).not.toHaveBeenCalledWith("scope-1", "default"); + }); + it("resyncs the Studio state from stable scope and member ids when the route changes after mount", async () => { + mockStudioMembers = [ + { + memberId: "member-alpha", + scopeId: "scope-a", + displayName: "成员 Alpha", + description: "Legacy service mapped member", + implementationKind: "workflow", + lifecycleStage: "bind_ready", + publishedServiceId: "service-alpha", + lastBoundRevisionId: "rev-alpha", + createdAt: "2026-04-27T08:00:00Z", + updatedAt: "2026-04-27T08:05:00Z", + }, + { + memberId: "member-beta", + scopeId: "scope-b", + displayName: "成员 Beta", + description: "Legacy service mapped member", + implementationKind: "workflow", + lifecycleStage: "bind_ready", + publishedServiceId: "service-beta", + lastBoundRevisionId: "rev-beta", + createdAt: "2026-04-27T08:00:00Z", + updatedAt: "2026-04-27T08:05:00Z", + }, + ]; + renderStudioPage( "/studio?scopeId=scope-a&scopeLabel=%E5%9B%A2%E9%98%9F+A&memberId=service-alpha&memberLabel=%E6%88%90%E5%91%98+Alpha&focus=workflow%3Aworkflow-1&tab=studio" ); @@ -3244,7 +3306,6 @@ describe("StudioPage", () => { expect(screen.getByTestId("studio-context-title")).toHaveTextContent( "workspace-demo" ); - expect(screen.getByTestId("studio-context-meta")).toHaveTextContent("service-beta"); expect(screen.getByTestId("studio-context-meta")).not.toHaveTextContent("团队 B"); expect(screen.getByTestId("studio-context-meta")).not.toHaveTextContent("成员 Beta"); expect(screen.getByTestId("studio-workflow-build-panel")).toBeTruthy(); @@ -3258,9 +3319,15 @@ describe("StudioPage", () => { ); }); + await waitFor(() => { + const searchParams = new URLSearchParams(window.location.search); + expect(searchParams.get("member")).toBe("member:member-beta"); + expect(searchParams.get("memberId")).toBeNull(); + }); + const searchParams = new URLSearchParams(window.location.search); expect(searchParams.get("scopeId")).toBe("scope-b"); - expect(searchParams.get("member")).toBe("member:service-beta"); + expect(searchParams.get("member")).toBe("member:member-beta"); expect(searchParams.get("memberId")).toBeNull(); expect(searchParams.get("scopeLabel")).toBeNull(); expect(searchParams.get("memberLabel")).toBeNull(); @@ -3659,7 +3726,7 @@ describe("StudioPage", () => { }); it("returns to canonical Team detail when Studio has Team context", async () => { - renderStudioPage("/studio?scopeId=scope-1&teamId=t-alpha&memberId=workspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); + renderStudioPage("/studio?scopeId=scope-1&teamId=t-alpha&member=member%3Aworkspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); fireEvent.click(await screen.findByRole("button", { name: "返回团队" })); @@ -3765,12 +3832,12 @@ describe("StudioPage", () => { }); expect( screen.getByText( - "Script starts as a named draft. It becomes a callable member only after Save script is catalog-applied and Bind succeeds.", + "Script creates a backend member and opens a stable script draft identity in Build. It becomes callable after Save script is catalog-applied and Bind succeeds.", ), ).toBeTruthy(); expect(screen.getByText(/Script id: refund-handler/)).toBeTruthy(); fireEvent.click( - within(createDialog).getByRole("button", { name: "Create Script draft" }), + within(createDialog).getByRole("button", { name: "Create member" }), ); expect(await screen.findByTestId("studio-script-build-panel")).toBeTruthy(); @@ -3805,7 +3872,7 @@ describe("StudioPage", () => { ); expect( - within(createDialog).getByRole("button", { name: "Create Script draft" }) + within(createDialog).getByRole("button", { name: "Create member" }) ).toBeDisabled(); expect(screen.getByRole("dialog", { name: "Create member" })).toBeTruthy(); }); @@ -3832,11 +3899,17 @@ describe("StudioPage", () => { ).toHaveAttribute("aria-pressed", "true"); expect(within(createDialog).getByLabelText("Script name")).toHaveValue("script-1"); expect( - within(createDialog).getByRole("button", { name: "Create Script draft" }), + within(createDialog).getByRole("button", { name: "Create member" }), ).toBeEnabled(); }); - it("shows GAgent as a builder member kind before its create API lands", async () => { + it("creates a named GAgent member authority and opens GAgent Build", async () => { + (studioApi.getAppContext as jest.Mock).mockResolvedValueOnce({ + ...defaultStudioAppContext, + scopeId: "scope-1", + scopeResolved: true, + }); + renderStudioPage("/studio?focus=workflow%3Aworkflow-1&tab=studio"); fireEvent.click(await screen.findByRole("button", { name: "Create member" })); @@ -3848,21 +3921,35 @@ describe("StudioPage", () => { fireEvent.click(gagentChip); expect(gagentChip).toHaveAttribute("aria-pressed", "true"); - expect(within(createDialog).queryByLabelText("Member name")).toBeNull(); + const gAgentNameInput = within(createDialog).getByLabelText("GAgent name"); + expect(gAgentNameInput).toHaveValue("gagent-1"); + fireEvent.change(gAgentNameInput, { + target: { + value: "Orders Worker", + }, + }); expect( screen.getByText( - "GAgent member authority exists on backend, but this modal still hands off through Build > GAgent for implementation editing and binding prep.", + "GAgent creates a backend member and opens Build > GAgent for actor type, role, prompt, tools, and persistence authoring.", ), ).toBeTruthy(); fireEvent.click( - within(createDialog).getByRole("button", { name: "Open GAgent builder" }), + within(createDialog).getByRole("button", { name: "Create member" }), ); expect(await screen.findByTestId("studio-gagent-build-panel")).toBeTruthy(); + expect(studioApi.createMember).toHaveBeenCalledWith( + expect.objectContaining({ + scopeId: "scope-1", + displayName: "Orders Worker", + implementationKind: "gagent", + }), + ); await waitFor(() => { const searchParams = new URLSearchParams(window.location.search); expect(searchParams.get("tab")).toBe("gagents"); expect(searchParams.get("step")).toBe("build"); + expect(searchParams.get("member")).toBe("member:orders-worker"); }); }); @@ -4031,7 +4118,7 @@ describe("StudioPage", () => { }); it("carries the selected bind contract into invoke after continuing from build", async () => { - renderStudioPage("/studio?scopeId=scope-1&memberId=workspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); + renderStudioPage("/studio?scopeId=scope-1&member=member%3Aworkspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); expect(await screen.findByTestId("studio-workflow-build-panel")).toBeTruthy(); @@ -4115,6 +4202,21 @@ describe("StudioPage", () => { }); it("shows an invoke empty state when a bound member has no endpoint data", async () => { + mockStudioMembers = [ + ...mockStudioMembers, + { + memberId: "script-alpha", + scopeId: "scope-1", + displayName: "script-alpha", + description: "Script member", + implementationKind: "script", + lifecycleStage: "bind_ready", + publishedServiceId: "script-alpha", + lastBoundRevisionId: "rev-script-1", + createdAt: "2026-04-27T08:00:00Z", + updatedAt: "2026-04-27T08:05:00Z", + }, + ]; mockScopeRuntimeApi.listServices.mockResolvedValueOnce([ { serviceId: "script-alpha", @@ -4126,12 +4228,14 @@ describe("StudioPage", () => { ]); renderStudioPage( - "/studio?scopeId=scope-1&memberId=script-alpha&step=invoke&tab=invoke" + "/studio?scopeId=scope-1&member=member%3Ascript-alpha&step=invoke&tab=invoke" ); expect(await screen.findByTestId("studio-invoke-surface")).toBeTruthy(); - expect(screen.getByText("service:script-alpha")).toBeTruthy(); - expect(screen.getByText("member:script-alpha")).toBeTruthy(); + await waitFor(() => { + expect(screen.getByText("service:script-alpha")).toBeTruthy(); + expect(screen.getByText("member:script-alpha")).toBeTruthy(); + }); expect(screen.getByText("services:none")).toBeTruthy(); expect(screen.getByText("endpoint:no-endpoint")).toBeTruthy(); expect(screen.getByText(/empty:script-alpha 还不能直接调用。/)).toBeTruthy(); @@ -4161,7 +4265,7 @@ describe("StudioPage", () => { ]); (studioApi.getScopeBinding as jest.Mock).mockResolvedValueOnce(null); - renderStudioPage("/studio?scopeId=scope-1&memberId=workspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); + renderStudioPage("/studio?scopeId=scope-1&member=member%3Aworkspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); expect(await screen.findByTestId("studio-workflow-build-panel")).toBeTruthy(); @@ -4235,15 +4339,17 @@ describe("StudioPage", () => { updatedAt: "2026-04-27T08:15:01Z", }); - renderStudioPage("/studio?scopeId=scope-1&memberId=workspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); + renderStudioPage("/studio?scopeId=scope-1&member=member%3Aworkspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); expect(await screen.findByTestId("studio-workflow-build-panel")).toBeTruthy(); fireEvent.click(screen.getByRole("button", { name: "Continue to Bind" })); expect(await screen.findByTestId("studio-bind-surface")).toBeTruthy(); await waitFor(() => { + expect(screen.getByText("member:workspace-demo")).toBeTruthy(); expect(screen.getByText("service:no-service")).toBeTruthy(); expect(screen.getByText("services:none")).toBeTruthy(); + expect(screen.getByText("candidate:workspace-demo")).toBeTruthy(); }); await act(async () => { @@ -4522,7 +4628,7 @@ describe("StudioPage", () => { ); renderStudioPage( - "/studio?scopeId=scope-1&memberId=draft1&focus=workflow%3Aworkflow-1&step=bind&tab=bindings" + "/studio?scopeId=scope-1&member=member%3Adraft1&focus=workflow%3Aworkflow-1&step=bind&tab=bindings" ); expect(await screen.findByTestId("studio-bind-surface")).toBeTruthy(); @@ -4645,7 +4751,7 @@ describe("StudioPage", () => { ); renderStudioPage( - "/studio?scopeId=scope-1&memberId=joker&focus=workflow%3Aworkflow-1&tab=studio" + "/studio?scopeId=scope-1&member=member%3Ajoker&focus=workflow%3Aworkflow-1&tab=studio" ); expect(await screen.findByTestId("studio-workflow-build-panel")).toBeTruthy(); @@ -4744,7 +4850,7 @@ describe("StudioPage", () => { }) ); - renderStudioPage("/studio?scopeId=scope-1&memberId=joker&step=bind&tab=bindings"); + renderStudioPage("/studio?scopeId=scope-1&member=member%3Ajoker&step=bind&tab=bindings"); expect(await screen.findByTestId("studio-bind-surface")).toBeTruthy(); await waitFor(() => { @@ -4858,6 +4964,21 @@ describe("StudioPage", () => { }); it("keeps the current bind surface active when switching members from the rail", async () => { + mockStudioMembers = [ + ...mockStudioMembers, + { + memberId: "joker", + scopeId: "scope-1", + displayName: "joker", + description: "Joker workflow member", + implementationKind: "workflow", + lifecycleStage: "bind_ready", + publishedServiceId: "joker", + lastBoundRevisionId: "rev-joker", + createdAt: "2026-04-27T08:00:00Z", + updatedAt: "2026-04-27T08:05:00Z", + }, + ]; mockScopeRuntimeApi.listServices.mockResolvedValueOnce([ { serviceId: "default", @@ -4915,7 +5036,7 @@ describe("StudioPage", () => { }) ); - renderStudioPage("/studio?scopeId=scope-1&memberId=default&step=bind&tab=bindings"); + renderStudioPage("/studio?scopeId=scope-1&member=member%3Aworkspace-demo&step=bind&tab=bindings"); expect(await screen.findByTestId("studio-bind-surface")).toBeTruthy(); await waitFor(() => { @@ -4949,6 +5070,21 @@ describe("StudioPage", () => { name: "draft1", }, }; + mockStudioMembers = [ + ...mockStudioMembers, + { + memberId: "joker", + scopeId: "scope-1", + displayName: "joker", + description: "Joker workflow member", + implementationKind: "workflow", + lifecycleStage: "bind_ready", + publishedServiceId: "joker", + lastBoundRevisionId: "rev-joker", + createdAt: "2026-04-27T08:00:00Z", + updatedAt: "2026-04-27T08:05:00Z", + }, + ]; (studioApi.listWorkflows as jest.Mock).mockResolvedValueOnce([ { workflowId: "workflow-1", @@ -5004,10 +5140,11 @@ describe("StudioPage", () => { }) ); - renderStudioPage("/studio?scopeId=scope-1&memberId=joker&step=bind&tab=bindings"); + renderStudioPage("/studio?scopeId=scope-1&member=member%3Ajoker&step=bind&tab=bindings"); expect(await screen.findByTestId("studio-bind-surface")).toBeTruthy(); await waitFor(() => { + expect(screen.getByText("member:joker")).toBeTruthy(); expect(screen.getByText("service:joker")).toBeTruthy(); }); @@ -5862,7 +5999,7 @@ describe("StudioPage", () => { ); renderStudioPage( - "/studio?scopeId=scope-1&memberId=script-member&step=bind&tab=bindings" + "/studio?scopeId=scope-1&member=member%3Ascript-member&step=bind&tab=bindings" ); expect(await screen.findByTestId("studio-bind-surface")).toBeTruthy(); @@ -6173,7 +6310,7 @@ describe("StudioPage", () => { }); it("opens the Studio invoke surface from the bind surface endpoint action", async () => { - renderStudioPage("/studio?scopeId=scope-1&teamId=t-alpha&memberId=workspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); + renderStudioPage("/studio?scopeId=scope-1&teamId=t-alpha&member=member%3Aworkspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); fireEvent.click(await screen.findByRole("button", { name: "Bind" })); await waitFor(() => { @@ -6330,7 +6467,7 @@ describe("StudioPage", () => { }); it("walks the lifecycle flow from build to bind to invoke to observe", async () => { - renderStudioPage("/studio?scopeId=scope-1&memberId=workspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); + renderStudioPage("/studio?scopeId=scope-1&member=member%3Aworkspace-demo&focus=workflow%3Aworkflow-1&tab=studio"); expect(await screen.findByTestId("studio-workflow-build-panel")).toBeTruthy(); diff --git a/apps/aevatar-console-web/src/pages/studio/index.tsx b/apps/aevatar-console-web/src/pages/studio/index.tsx index 27a240867..acb18e0f1 100644 --- a/apps/aevatar-console-web/src/pages/studio/index.tsx +++ b/apps/aevatar-console-web/src/pages/studio/index.tsx @@ -161,6 +161,7 @@ type StudioRouteState = { teamId: string; memberKey: string; memberId: string; + legacyMemberId: string; step: StudioStep; focusKey: string; tab: StudioTab; @@ -853,10 +854,7 @@ function readStudioRouteMemberFromParams( return explicitMember; } - const legacyMemberId = trimOptional(params.get('memberId')); - return legacyMemberId - ? parseStudioRouteMember(`member:${legacyMemberId}`) - : { key: '', kind: 'none', value: '', memberId: '', serviceId: '' }; + return { key: '', kind: 'none', value: '', memberId: '', serviceId: '' }; } function buildStudioBuildFocusKey(input: { @@ -1030,6 +1028,25 @@ function buildInventoryScriptName( return `script-${Date.now()}`; } +function buildInventoryGAgentName( + members: ReadonlyArray, +): string { + const usedNames = new Set( + members + .map((member) => normalizeComparableText(member.displayName)) + .filter(Boolean), + ); + + for (let index = 1; index < 1000; index += 1) { + const candidate = `gagent-${index}`; + if (!usedNames.has(candidate)) { + return candidate; + } + } + + return `gagent-${Date.now()}`; +} + function upsertStudioMemberRosterMember( roster: StudioMemberRoster | undefined, scopeId: string, @@ -1192,6 +1209,7 @@ function readStudioRouteState(search?: string): StudioRouteState { teamId: '', memberKey: '', memberId: '', + legacyMemberId: '', step: 'build', focusKey: '', tab: 'workflows', @@ -1216,6 +1234,7 @@ function readStudioRouteState(search?: string): StudioRouteState { teamId: trimOptional(params.get('teamId')), memberKey: routeMember.key, memberId: routeMember.memberId, + legacyMemberId: trimOptional(params.get('memberId')), step: parseStudioStep(params.get('step')), focusKey: buildFocus.key, tab: parseStudioTab(params.get('tab')), @@ -1284,10 +1303,8 @@ function findPublishedStudioMemberByMemberKey( return ( publishedMembers.find( - ({ memberSummary, service }) => - trimOptional(memberSummary?.memberId) === memberToken || - trimOptional(memberSummary?.publishedServiceId) === memberToken || - trimOptional(service.serviceId) === memberToken, + ({ memberSummary }) => + trimOptional(memberSummary?.memberId) === memberToken, ) ?? null ); } @@ -2071,21 +2088,10 @@ function resolveStudioMemberSummaryFromMemberKey( return directMemberMatch; } - const legacyPublishedServiceMatch = - studioScopeMembers.find( - (member) => - trimOptional(member.publishedServiceId) === parsedMember.memberId, - ) ?? null; - if (legacyPublishedServiceMatch) { - return legacyPublishedServiceMatch; - } - return ( publishedMembers.find( - ({ service, memberSummary }) => - trimOptional(memberSummary?.memberId) === parsedMember.memberId || - trimOptional(memberSummary?.publishedServiceId) === parsedMember.memberId || - trimOptional(service.serviceId) === parsedMember.memberId, + ({ memberSummary }) => + trimOptional(memberSummary?.memberId) === parsedMember.memberId, )?.memberSummary ?? null ); } @@ -2180,17 +2186,6 @@ function resolvePublishedServiceIdFromMemberKey( return resolvedPublishedServiceId; } - const legacyMemberToken = readMemberIdFromMemberKey(memberKey); - if (legacyMemberToken) { - return ( - trimOptional( - publishedMembers.find( - ({ service }) => trimOptional(service.serviceId) === legacyMemberToken, - )?.service.serviceId, - ) || legacyMemberToken - ); - } - const workflowRouteValue = readWorkflowMemberRouteValueFromMemberKey(memberKey); if (workflowRouteValue) { return trimOptional( @@ -2234,25 +2229,6 @@ function resolveStudioMemberOwnerKey( return `member:${trimOptional(matchedMemberSummary.memberId)}`; } - const matchedPublishedMember = publishedMembers.find( - ({ service }) => - trimOptional(service.serviceId) === parsedMember.memberId || - trimOptional(service.serviceId) === parsedMember.serviceId, - ); - const matchedWorkflowId = trimOptional( - buildWorkflowMemberKeyFromSummary(matchedPublishedMember?.matchedWorkflow), - ); - if (matchedWorkflowId) { - return matchedWorkflowId; - } - - const matchedScriptId = trimOptional( - matchedPublishedMember?.matchedScript?.script?.scriptId, - ); - if (matchedScriptId) { - return `script:${matchedScriptId}`; - } - return parsedMember.key; } @@ -2394,11 +2370,8 @@ const StudioPage: React.FC = () => { ); const routeSelectedMemberKey = useMemo( () => - trimOptional(routeState.memberKey) || - (trimOptional(routeState.memberId) - ? `member:${trimOptional(routeState.memberId)}` - : ''), - [routeState.memberId, routeState.memberKey], + trimOptional(routeState.memberKey), + [routeState.memberKey], ); const isStudioLocation = typeof window !== 'undefined' && window.location.pathname === '/studio'; @@ -2810,6 +2783,10 @@ const StudioPage: React.FC = () => { () => buildInventoryScriptName(availableScopeScripts, studioScopeMembers), [availableScopeScripts, studioScopeMembers], ); + const suggestedCreateGAgentName = useMemo( + () => buildInventoryGAgentName(studioScopeMembers), + [studioScopeMembers], + ); const publishedScopeServiceRevisionQueries = useQueries({ queries: publishedScopeServices.map((service) => { const serviceId = trimOptional(service.serviceId); @@ -2893,11 +2870,17 @@ const StudioPage: React.FC = () => { visibleWorkflowSummaries, ]); const explicitRouteBackendMemberId = useMemo(() => { - if (routeSelectedMember.kind !== 'member') { + const legacyRouteMemberToken = trimOptional(routeState.legacyMemberId); + if (routeSelectedMember.kind !== 'member' && !legacyRouteMemberToken) { return ''; } - const routeMemberToken = readMemberIdFromMemberKey(routeSelectedMemberKey); + const canonicalRouteMemberToken = readMemberIdFromMemberKey( + routeSelectedMemberKey, + ); + const routeMemberToken = + canonicalRouteMemberToken || + legacyRouteMemberToken; const directRouteMember = studioScopeMembers.find( (member) => trimOptional(member.memberId) === routeMemberToken, ); @@ -2905,11 +2888,14 @@ const StudioPage: React.FC = () => { return trimOptional(directRouteMember.memberId); } - const serviceBackedRouteMember = studioScopeMembers.find( - (member) => trimOptional(member.publishedServiceId) === routeMemberToken, - ); - if (serviceBackedRouteMember) { - return trimOptional(serviceBackedRouteMember.memberId); + if (legacyRouteMemberToken && !canonicalRouteMemberToken) { + const serviceBackedRouteMember = studioScopeMembers.find( + (member) => + trimOptional(member.publishedServiceId) === legacyRouteMemberToken, + ); + if (serviceBackedRouteMember) { + return trimOptional(serviceBackedRouteMember.memberId); + } } const routeMemberSummary = resolveStudioMemberSummaryFromMemberKey( @@ -2920,9 +2906,10 @@ const StudioPage: React.FC = () => { return ( trimOptional(routeMemberSummary?.memberId) || - routeMemberToken + canonicalRouteMemberToken ); }, [ + routeState.legacyMemberId, publishedScopeMembers, routeSelectedMember.kind, routeSelectedMemberKey, @@ -3373,7 +3360,7 @@ const StudioPage: React.FC = () => { templateWorkflow || routeBuildFocus.kind === 'workflow' || routeSelectedMember.kind === 'workflow' || - trimOptional(routeState.memberId) + trimOptional(routeState.legacyMemberId) ) { return; } @@ -3389,7 +3376,7 @@ const StudioPage: React.FC = () => { }, [ routeBuildFocus.kind, routeSelectedMember.kind, - routeState.memberId, + routeState.legacyMemberId, selectedWorkflowId, templateWorkflow, visibleWorkflowSummaries, @@ -4328,9 +4315,7 @@ const StudioPage: React.FC = () => { } const currentRouteState = readStudioRouteState(window.location.search); - const currentRouteMemberKey = - trimOptional(currentRouteState.memberKey) || - buildBackendMemberKey(currentRouteState.memberId); + const currentRouteMemberKey = trimOptional(currentRouteState.memberKey); const requestMemberKey = buildBackendMemberKey(resolvedBuildMemberId); if (!requestMemberKey) { return getLocationSnapshot() === requestLocationSnapshot; @@ -4369,9 +4354,6 @@ const StudioPage: React.FC = () => { (resolvedBuildMemberId ? `member:${resolvedBuildMemberId}` : '') || buildCandidateMemberKey || trimOptional(routeState.memberKey) || - (trimOptional(routeState.memberId) - ? `member:${trimOptional(routeState.memberId)}` - : '') || activeBuildFocusKey || (() => { const resolvedBoundMemberId = resolvePublishedMemberIdFromServiceId( @@ -4454,7 +4436,6 @@ const StudioPage: React.FC = () => { publishedScopeMembers, resolvedStudioScopeId, routeSelectedBackendMemberKey, - routeState.memberId, routeState.memberKey, routeState.teamId, selectedScriptId, @@ -4755,7 +4736,9 @@ const StudioPage: React.FC = () => { useEffect(() => { if ( !createMemberModalOpen || - (createMemberKind !== 'workflow' && createMemberKind !== 'script') + (createMemberKind !== 'workflow' && + createMemberKind !== 'script' && + createMemberKind !== 'gagent') ) { return; } @@ -4867,19 +4850,80 @@ const StudioPage: React.FC = () => { return; } - setCreateMemberModalOpen(false); - setCreateMemberTeamId(''); - history.push( - buildStudioRoute({ - scopeId: resolvedStudioScopeId || undefined, - teamId: createMemberTeamId || undefined, - step: 'build', - tab: 'gagents', - }), - ); - setBuildSurface('gagent'); - setStudioSurface('build'); - void message.info('Opened GAgent builder.'); + const gAgentDisplayName = trimOptional(createMemberName); + if (!gAgentDisplayName) { + void message.warning('GAgent member name is required.'); + return; + } + + if ( + studioScopeMembers.some( + (member) => + normalizeComparableText(member.displayName) === + normalizeComparableText(gAgentDisplayName) && + normalizeStudioMemberBindingImplementationKind(member.implementationKind) === + 'gagent', + ) + ) { + void message.warning('A GAgent member with the same name already exists.'); + return; + } + + if (!resolvedStudioScopeId) { + void message.warning('Connect a workspace before creating a GAgent member.'); + return; + } + + setInventoryBusyKey('create'); + setInventoryBusyAction('create'); + try { + const createdGAgentMember = await studioApi.createMember({ + scopeId: resolvedStudioScopeId, + displayName: gAgentDisplayName, + implementationKind: 'gagent', + ...(createMemberTeamId ? { teamId: createMemberTeamId } : {}), + }); + queryClient.setQueryData( + ['studio-scope-members', resolvedStudioScopeId], + (current) => + upsertStudioMemberRosterMember( + current, + resolvedStudioScopeId, + createdGAgentMember, + ), + ); + void queryClient.invalidateQueries({ + queryKey: ['studio-scope-members', resolvedStudioScopeId], + }); + setSelectedWorkflowId(''); + setSelectedScriptId(''); + setTemplateWorkflow(''); + setCreateMemberModalOpen(false); + setCreateMemberTeamId(''); + history.push( + buildStudioRoute({ + scopeId: resolvedStudioScopeId, + teamId: createMemberTeamId || undefined, + memberKey: `member:${createdGAgentMember.memberId}`, + step: 'build', + tab: 'gagents', + }), + ); + setBuildSurface('gagent'); + setStudioSurface('build'); + void message.success( + `Created GAgent member ${createdGAgentMember.displayName} and opened Build.`, + ); + } catch (memberError) { + void message.error( + memberError instanceof Error + ? `Studio could not register the GAgent member authority: ${memberError.message}` + : 'Studio could not register the GAgent member authority.', + ); + } finally { + setInventoryBusyKey(''); + setInventoryBusyAction(''); + } return; } @@ -5987,7 +6031,7 @@ const StudioPage: React.FC = () => { (serviceId: string, endpointId: string) => { const routeMemberSummary = resolveStudioMemberSummaryFromMemberKey( trimOptional(routeState.memberKey) || - buildBackendMemberKey(routeState.memberId), + buildBackendMemberKey(routeSelectedBackendMemberId), publishedScopeMembers, studioScopeMembers, ); @@ -6029,7 +6073,7 @@ const StudioPage: React.FC = () => { history, publishedScopeMembers, resolvedStudioScopeId, - routeState.memberId, + routeSelectedBackendMemberId, routeState.memberKey, routeState.teamId, studioScopeMembers, @@ -6062,9 +6106,9 @@ const StudioPage: React.FC = () => { buildStudioFocusKey({ activeBuildFocusKey, routeMemberKey: routeSelectedMemberKey, - routeMemberId: routeState.memberId, + routeMemberId: routeSelectedBackendMemberId, }), - [activeBuildFocusKey, routeSelectedMemberKey, routeState.memberId], + [activeBuildFocusKey, routeSelectedBackendMemberId, routeSelectedMemberKey], ); const selectedWorkflowSummary = useMemo( () => @@ -6389,9 +6433,18 @@ const StudioPage: React.FC = () => { const pinnedRouteBackendMemberKey = buildBackendMemberKey( pinnedRouteBackendMemberIdRef.current, ); + if (trimOptional(routeState.legacyMemberId) && !pinnedRouteBackendMemberKey) { + return; + } + const buildBackendMemberKeyFromLegacy = + trimOptional(routeState.legacyMemberId) && pinnedRouteBackendMemberKey + ? pinnedRouteBackendMemberKey + : ''; const persistedMemberKey = studioSurface === 'build' - ? trimOptional(persistableBuildMemberKey) || undefined + ? buildBackendMemberKeyFromLegacy || + trimOptional(persistableBuildMemberKey) || + undefined : pinnedRouteBackendMemberKey || trimOptional(lifecycleSurfaceMemberKey) || undefined; @@ -6433,6 +6486,7 @@ const StudioPage: React.FC = () => { routeBuildFocus.kind, routeBuildFocus.value, routeSelectedMemberKey, + routeState.legacyMemberId, routeState.teamId, runPrompt, selectedWorkflowId, @@ -6474,11 +6528,9 @@ const StudioPage: React.FC = () => { trimOptional(routeSelectedBackendMemberId) || trimOptional(workbenchStudioMemberSummary?.memberId) || readMemberIdFromMemberKey(workbenchMemberKey) || - readMemberIdFromMemberKey(routeState.memberKey) || - trimOptional(routeState.memberId), + readMemberIdFromMemberKey(routeState.memberKey), [ routeSelectedBackendMemberId, - routeState.memberId, routeState.memberKey, workbenchMemberKey, workbenchStudioMemberSummary?.memberId, @@ -7017,7 +7069,7 @@ const StudioPage: React.FC = () => { trimOptional(workbenchPublishedServiceRevision?.staticActorTypeName) || trimOptional(workbenchPublishedService?.displayName) || trimOptional(workbenchPublishedService?.serviceId) || - trimOptional(routeState.memberId) || + trimOptional(routeSelectedBackendMemberId) || 'Current member' : trimOptional(activeWorkflowName) || (isBuildScriptsSurface ? trimOptional(selectedScriptId) : '') || @@ -7059,7 +7111,7 @@ const StudioPage: React.FC = () => { trimOptional(workbenchStudioMemberBinding?.publishedServiceId) || trimOptional(workbenchStudioMember?.publishedServiceId) || trimOptional(workbenchPublishedService?.serviceId) || - trimOptional(routeState.memberId) || + trimOptional(routeState.legacyMemberId) || (workbenchStudioMember ? formatStudioMemberLifecycleStage( workbenchStudioMember.lifecycleStage, @@ -7073,7 +7125,7 @@ const StudioPage: React.FC = () => { : formatStudioAssetMeta({ primary: currentMemberImplementationLabel, secondary: - trimOptional(routeState.memberId) || + trimOptional(routeState.legacyMemberId) || activeBuildFocusKey || 'Current member focus', }) || 'Studio is tracking the current member focus.'; @@ -7095,7 +7147,7 @@ const StudioPage: React.FC = () => { trimOptional(workbenchStudioMember?.lastBoundRevisionId) || trimOptional(workbenchPublishedServiceRevision?.revisionId) || trimOptional(workbenchPublishedService?.serviceId) || - trimOptional(routeState.memberId) || + trimOptional(routeState.legacyMemberId) || activeBuildFocusKey : '', }); @@ -7226,10 +7278,11 @@ const StudioPage: React.FC = () => { const hasInvokeTargetMemberSelection = Boolean(workbenchStudioMemberId); const invokeTargetServiceId = - currentInvokeSelectionServiceId || - currentBindingSelectionServiceId || - currentSelectedMemberServiceId || - trimOptional(routeState.memberId); + hasInvokeTargetMemberSelection + ? currentSelectedMemberServiceId + : currentInvokeSelectionServiceId || + currentBindingSelectionServiceId || + trimOptional(routeState.legacyMemberId); const invokeTargetService = useMemo( () => { if (!invokeTargetServiceId) { @@ -7278,7 +7331,11 @@ const StudioPage: React.FC = () => { ? currentBindingSelectionEndpointId : invokeTargetDefaultEndpointId; const invokeEmptyState = useMemo(() => { - if (hasInvokeTargetMemberSelection && invokeTargetService) { + if ( + hasInvokeTargetMemberSelection && + invokeTargetService && + invokeTargetService.endpoints.length > 0 + ) { return null; } @@ -8326,7 +8383,7 @@ const StudioPage: React.FC = () => { !templateWorkflow && !workflowsQuery.isLoading && (visibleWorkflowSummaries.length === 0 || - Boolean(trimOptional(routeState.memberId))) && + Boolean(trimOptional(routeState.legacyMemberId))) && (!appContextQuery.data?.features.scripts || !scopeScriptsQuery.isLoading); const studioContextPrimaryTitle = showWorkflowEntryEmptyState @@ -8374,7 +8431,7 @@ const StudioPage: React.FC = () => { : '成员工作台'; const studioBoundServiceLabel = hasSelectedMemberFocus - ? trimOptional(routeState.memberId) || + ? trimOptional(routeState.legacyMemberId) || trimOptional(workbenchPublishedService?.serviceId) || 'No bound service' : ''; @@ -8391,7 +8448,7 @@ const StudioPage: React.FC = () => { teamId: routeState.teamId, tab: 'overview', memberId: - trimOptional(routeState.memberId) || + trimOptional(routeSelectedBackendMemberId) || readMemberIdFromMemberKey(routeState.memberKey) || undefined, serviceId: trimOptional(workbenchPublishedService?.serviceId) || undefined, @@ -8400,7 +8457,7 @@ const StudioPage: React.FC = () => { scopeId: resolvedStudioScopeId, tab: 'overview', serviceId: - trimOptional(routeState.memberId) || + trimOptional(routeState.legacyMemberId) || trimOptional(workbenchPublishedService?.serviceId) || undefined, }) @@ -8894,13 +8951,7 @@ const StudioPage: React.FC = () => { title="Create member" onCancel={closeCreateMemberFlow} onOk={() => void handleCreateMember(createMemberKind)} - okText={ - createMemberKind === 'workflow' - ? 'Create member' - : createMemberKind === 'script' - ? 'Create Script draft' - : 'Open GAgent builder' - } + okText="Create member" okButtonProps={{ disabled: inventoryBusyAction === 'create' || @@ -8912,7 +8963,9 @@ const StudioPage: React.FC = () => { (createMemberKind === 'script' && (!appContextQuery.data?.features.scripts || !createScriptId || - createScriptIdAlreadyExists)), + createScriptIdAlreadyExists)) || + (createMemberKind === 'gagent' && + (!resolvedStudioScopeId || !trimOptional(createMemberName))), loading: inventoryBusyAction === 'create', }} cancelButtonProps={{ @@ -8953,6 +9006,8 @@ const StudioPage: React.FC = () => { setCreateMemberName(suggestedCreateWorkflowName); } else if (kind === 'script') { setCreateMemberName(suggestedCreateScriptName); + } else { + setCreateMemberName(suggestedCreateGAgentName); } }} > @@ -8961,24 +9016,37 @@ const StudioPage: React.FC = () => { ))}
- Choose the implementation kind first. Workflow entry now - registers a backend member authority; Script creates a named - draft identity before Build; GAgent opens its Build workspace - for implementation editing and binding prep. + Choose the implementation kind first. Studio creates the + backend member authority, then opens the matching Build + surface for Workflow, Script, or GAgent authoring.
- {createMemberKind === 'workflow' || createMemberKind === 'script' ? ( + {createMemberKind === 'workflow' || + createMemberKind === 'script' || + createMemberKind === 'gagent' ? (