From 24a3a7bd6caa8e87b3a91e93c95959e3cfdb906d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 15 Jun 2026 13:03:05 +0000 Subject: [PATCH] Sync plugin files from GitHub-Copilot-for-Azure --- .../azure-skills/.claude-plugin/plugin.json | 2 +- .../azure-skills/.cursor-plugin/plugin.json | 2 +- .../plugins/azure-skills/.plugin/plugin.json | 2 +- .github/plugins/azure-skills/CHANGELOG.md | 8 + .../skills/azure-prepare/SKILL.md | 5 +- .../references/specialized-routing.md | 13 +- .../skills/microsoft-foundry/SKILL.md | 472 +++++++-------- .../agent-optimizer/references/eval-yaml.md | 2 +- .../foundry-agent/create/create-hosted.md | 486 +++++++-------- .../create/quick-start-hosted.md | 320 ++++++++++ .../create/references/azd-ai-cli.md | 149 +++++ .../create/references/local-run.md | 146 +++++ .../foundry-agent/create/references/tools.md | 209 +++++++ .../create/scripts/resolve-project-id.ps1 | 197 ++++++ .../create/scripts/resolve-project-id.sh | 296 +++++++++ .../create/scripts/verify-environment.ps1 | 127 ++++ .../create/scripts/verify-environment.sh | 109 ++++ .../foundry-agent/deploy/deploy.md | 569 ++++++------------ .../references/direct-code-deployment.md | 339 ----------- .../foundry-agent/invoke/invoke.md | 28 +- .../models/deploy-model/SKILL.md | 2 + .../project/create/create-foundry-project.md | 50 +- .../references/standard-agent-setup.md | 2 + .../skills/python-appservice-deploy/SKILL.md | 36 ++ .../references/create-app.md | 133 ++++ .../references/deploy-azcli.md | 112 ++++ .../references/deploy-azd.md | 83 +++ .../references/detect.md | 79 +++ .../references/errors.md | 67 +++ .../references/post-deploy-message.md | 96 +++ .../references/startup-commands.md | 94 +++ .../references/transient-retry.md | 44 ++ .../scripts/generate-app-name.ps1 | 57 ++ .../scripts/generate-app-name.sh | 60 ++ .../scripts/retry-az-create.ps1 | 59 ++ .../scripts/retry-az-create.sh | 52 ++ 36 files changed, 3253 insertions(+), 1254 deletions(-) create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/quick-start-hosted.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/azd-ai-cli.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/local-run.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/tools.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/resolve-project-id.ps1 create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/resolve-project-id.sh create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/verify-environment.ps1 create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/verify-environment.sh delete mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/deploy/references/direct-code-deployment.md create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/SKILL.md create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/references/create-app.md create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/references/deploy-azcli.md create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/references/deploy-azd.md create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/references/detect.md create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/references/errors.md create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/references/post-deploy-message.md create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/references/startup-commands.md create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/references/transient-retry.md create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/scripts/generate-app-name.ps1 create mode 100755 .github/plugins/azure-skills/skills/python-appservice-deploy/scripts/generate-app-name.sh create mode 100644 .github/plugins/azure-skills/skills/python-appservice-deploy/scripts/retry-az-create.ps1 create mode 100755 .github/plugins/azure-skills/skills/python-appservice-deploy/scripts/retry-az-create.sh diff --git a/.github/plugins/azure-skills/.claude-plugin/plugin.json b/.github/plugins/azure-skills/.claude-plugin/plugin.json index 20262485..213b8fc7 100644 --- a/.github/plugins/azure-skills/.claude-plugin/plugin.json +++ b/.github/plugins/azure-skills/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "azure", "description": "Microsoft Azure MCP and Skills integration for cloud resource management, deployments, and Azure services. Manage your Azure infrastructure, monitor applications, and deploy resources directly from Claude Code.", - "version": "1.1.68", + "version": "1.1.70", "author": { "name": "Microsoft", "url": "https://www.microsoft.com" diff --git a/.github/plugins/azure-skills/.cursor-plugin/plugin.json b/.github/plugins/azure-skills/.cursor-plugin/plugin.json index 2f4ddcf9..db7c2709 100644 --- a/.github/plugins/azure-skills/.cursor-plugin/plugin.json +++ b/.github/plugins/azure-skills/.cursor-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "azure", "description": "Microsoft Azure MCP and Skills integration for cloud resource management, deployments, and Azure services. Manage your Azure infrastructure, monitor applications, and deploy resources directly from Cursor.", - "version": "1.1.68", + "version": "1.1.70", "author": { "name": "Microsoft", "url": "https://www.microsoft.com" diff --git a/.github/plugins/azure-skills/.plugin/plugin.json b/.github/plugins/azure-skills/.plugin/plugin.json index 9108cab4..b2575beb 100644 --- a/.github/plugins/azure-skills/.plugin/plugin.json +++ b/.github/plugins/azure-skills/.plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "azure", "description": "Microsoft Azure MCP and Skills integration for cloud resource management, deployments, and Azure services. Manage your Azure infrastructure, monitor applications, and deploy resources directly from your development environment.", - "version": "1.1.68", + "version": "1.1.70", "author": { "name": "Microsoft", "url": "https://www.microsoft.com" diff --git a/.github/plugins/azure-skills/CHANGELOG.md b/.github/plugins/azure-skills/CHANGELOG.md index f5ad6512..fc51433e 100644 --- a/.github/plugins/azure-skills/CHANGELOG.md +++ b/.github/plugins/azure-skills/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 1.1.70 + +- feat: improve the hosted agent getting started with coding ([#2610](https://github.com/microsoft/GitHub-Copilot-for-Azure/pull/2610)) + +## 1.1.69 + +- feat: add python-appservice-deploy skill for Python/Flask/Django/FastAPI on App Service ([#2487](https://github.com/microsoft/GitHub-Copilot-for-Azure/pull/2487)) + ## 1.1.68 - feat(azure-compute): add VM creation workflow with approval gate before deploy ([#2297](https://github.com/microsoft/GitHub-Copilot-for-Azure/pull/2297)) diff --git a/.github/plugins/azure-skills/skills/azure-prepare/SKILL.md b/.github/plugins/azure-skills/skills/azure-prepare/SKILL.md index 40803029..113c40ed 100644 --- a/.github/plugins/azure-skills/skills/azure-prepare/SKILL.md +++ b/.github/plugins/azure-skills/skills/azure-prepare/SKILL.md @@ -1,10 +1,10 @@ --- name: azure-prepare -description: "Prepare Azure apps for deployment (infra Bicep/Terraform, azure.yaml, Dockerfiles). Use for create/modernize or create+deploy; not cross-cloud migration (use azure-cloud-migrate). DO NOT USE FOR: copilot-sdk apps (use azure-hosted-copilot-sdk). WHEN: \"create app\", \"build web app\", \"create API\", \"create serverless HTTP API\", \"create frontend\", \"create back end\", \"build a service\", \"modernize application\", \"update application\", \"add authentication\", \"add caching\", \"host on Azure\", \"create and deploy\", \"deploy to Azure\", \"deploy to Azure using Terraform\", \"deploy to Azure App Service\", \"deploy to Azure App Service using Terraform\", \"deploy to Azure Container Apps\", \"deploy to Azure Container Apps using Terraform\", \"generate Terraform\", \"generate Bicep\", \"function app\", \"timer trigger\", \"service bus trigger\", \"event-driven function\", \"containerized Node.js app\", \"social media app\", \"static portfolio website\", \"todo list with frontend and API\", \"prepare my Azure application to use Key Vault\", \"managed identity\"." +description: "Prepare Azure apps for deployment (infra Bicep/Terraform, azure.yaml, Dockerfiles). Use for create/modernize or create+deploy; not cross-cloud migration (use azure-cloud-migrate). DO NOT USE FOR: copilot-sdk apps (use azure-hosted-copilot-sdk), or Python code-only App Service deploys (use python-appservice-deploy). WHEN: \"create app\", \"build web app\", \"create API\", \"modernize application\", \"host on Azure\", \"deploy to Azure\", \"deploy to Azure using Terraform\", \"deploy to Azure App Service\", \"deploy to Azure App Service using Terraform\", \"deploy to Azure Container Apps\", \"generate Terraform\", \"generate Bicep\", \"function app\", \"timer trigger\", \"service bus trigger\", \"event-driven function\", \"managed identity\"." license: MIT metadata: author: Microsoft - version: "1.2.13" + version: "1.2.14" --- # Azure Prepare @@ -68,6 +68,7 @@ Activate this skill when user wants to: | Prompt keywords | Invoke FIRST | |----------------|-------------| +| Python + App Service (e.g., "deploy Python to App Service", "Flask on Azure App Service", "publish Python web app to App Service") | **python-appservice-deploy** | | Lambda, AWS Lambda, migrate AWS, migrate GCP, Lambda to Functions, migrate from AWS, migrate from GCP | **azure-cloud-migrate** | | copilot SDK, copilot app, copilot-powered, @github/copilot-sdk, CopilotClient | **azure-hosted-copilot-sdk** | | Azure Functions, function app, serverless function, timer trigger, HTTP trigger, func new | Stay in **azure-prepare** — prefer Azure Functions templates in Step 4 | diff --git a/.github/plugins/azure-skills/skills/azure-prepare/references/specialized-routing.md b/.github/plugins/azure-skills/skills/azure-prepare/references/specialized-routing.md index 10c6d311..b53900e4 100644 --- a/.github/plugins/azure-skills/skills/azure-prepare/references/specialized-routing.md +++ b/.github/plugins/azure-skills/skills/azure-prepare/references/specialized-routing.md @@ -4,14 +4,15 @@ ## Prompt-Based Routing Table -> **⚠️ PRIORITY RULE**: Check rows **top to bottom**. The first match wins. If the prompt mentions **AWS Lambda migration or AWS Lambda**, invoke **azure-cloud-migrate** even if Azure Functions are also mentioned. +> **⚠️ PRIORITY RULE**: Check rows **top to bottom**. The first match wins. **Python + App Service (code-only) is the highest priority** — route to `python-appservice-deploy` ONLY when the prompt is a code-deploy request **without** IaC or infra keywords (see Priority 1 qualifier below). If the prompt mentions **AWS Lambda migration or AWS Lambda**, invoke **azure-cloud-migrate** even if Azure Functions are also mentioned. | Priority | User prompt mentions | Invoke skill FIRST | Then resume azure-prepare at | |----------|---------------------|--------------------|-----------------------------| -| **1 (highest)** | Lambda, AWS Lambda, migrate AWS, migrate GCP, Lambda to Functions, migrate from AWS, migrate from GCP | **azure-cloud-migrate** | Phase 1 Step 4 (Select Recipe) — azure-cloud-migrate does assessment + code conversion, then azure-prepare takes over for infrastructure, local testing, or deployment | -| 2 | copilot SDK, copilot app, copilot-powered, @github/copilot-sdk, CopilotClient, sendAndWait, copilot-sdk-service | **azure-hosted-copilot-sdk** | Phase 1 Step 4 (Select Recipe) | -| 3 | Azure Functions, function app, serverless function, timer trigger, HTTP trigger, queue trigger, func new, func start | Stay in **azure-prepare** | Phase 1 Step 4 (Select Recipe) — prefer Azure Functions templates | -| 4 (lowest) | workflow, orchestration, multi-step, pipeline, fan-out/fan-in, saga, long-running process, durable, order processing | Stay in **azure-prepare** | Phase 1 Step 4 — select **durable** recipe. **MUST** load [durable.md](services/functions/durable.md), [DTS reference](services/durable-task-scheduler/README.md), and [DTS Bicep patterns](services/durable-task-scheduler/bicep.md). | +| **1 (highest)** | Python + Azure App Service **AND NOT** any of: `Terraform`, `Bicep`, `IaC`, `VNet`, `private endpoint`, `Key Vault`, `Cosmos`, `Postgres`, `MySQL`, `SQL`, `Front Door`, `multi-environment`, `Lambda`, `migrate from AWS`, `migrate from GCP`, `Fargate`, `Cloud Run`, `ECS`, `EKS`, `GKE` (e.g., "deploy Python to App Service", "Flask on App Service", "Python web app on App Service") | **python-appservice-deploy** | This is a code-only deploy skill. Do not resume `azure-prepare`. If the prompt contains any IaC, infra, or cross-cloud migration keyword above, **skip this row** and continue to row 2+ (i.e., let `azure-cloud-migrate` handle Lambda/Fargate/Cloud Run migrations, or stay in `azure-prepare` for the full infrastructure path). | +| 2 | Lambda, AWS Lambda, migrate AWS, migrate GCP, Lambda to Functions, migrate from AWS, migrate from GCP | **azure-cloud-migrate** | Phase 1 Step 4 (Select Recipe) — azure-cloud-migrate does assessment + code conversion, then azure-prepare takes over for infrastructure, local testing, or deployment | +| 3 | copilot SDK, copilot app, copilot-powered, @github/copilot-sdk, CopilotClient, sendAndWait, copilot-sdk-service | **azure-hosted-copilot-sdk** | Phase 1 Step 4 (Select Recipe) | +| 4 | Azure Functions, function app, serverless function, timer trigger, HTTP trigger, queue trigger, func new, func start | Stay in **azure-prepare** | Phase 1 Step 4 (Select Recipe) — prefer Azure Functions templates | +| 5 (lowest) | workflow, orchestration, multi-step, pipeline, fan-out/fan-in, saga, long-running process, durable, order processing | Stay in **azure-prepare** | Phase 1 Step 4 — select **durable** recipe. **MUST** load [durable.md](services/functions/durable.md), [DTS reference](services/durable-task-scheduler/README.md), and [DTS Bicep patterns](services/durable-task-scheduler/bicep.md). | > ⚠️ This checks the user's **prompt text**, not just existing code. Essential for greenfield projects where there is no codebase to scan. @@ -24,7 +25,7 @@ azure-prepare is the default entry point for all Azure app work. Some technologi Without this check, azure-prepare generates generic infrastructure that misses these optimizations. -> ⚠️ **Re-entry guard**: When azure-prepare is invoked as a **resume** from a specialized skill (e.g., azure-hosted-copilot-sdk Step 4), **skip this routing check** and proceed directly to Step 4. The specialized skill has already completed its work. +> ⚠️ **Re-entry guard**: When azure-prepare is invoked as a **resume** from a specialized skill (e.g., `azure-hosted-copilot-sdk` Step 4, or `python-appservice-deploy` handing back for full-infra needs like VNet / Key Vault / DB provisioning), **skip this routing check** and proceed directly to Step 4. The specialized skill has already completed its work. ## Flow diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/SKILL.md b/.github/plugins/azure-skills/skills/microsoft-foundry/SKILL.md index 20904300..d6299c15 100644 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/SKILL.md +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/SKILL.md @@ -1,238 +1,240 @@ ---- -name: microsoft-foundry -description: "Deploy, evaluate, fine-tune, and manage Foundry agents end-to-end: Docker build, ACR push, hosted/prompt agent create, batch eval, continuous eval, prompt optimizer, Agent Optimizer scaffold, agent.yaml, dataset curation from traces, model fine-tuning (SFT/DPO/RFT). USE FOR: deploy agent, hosted agent, create agent, add tool to agent, invoke agent, evaluate agent, continuous eval, continuous monitoring, optimize prompt, improve prompt, optimize agent instructions, agent optimizer, deploy model, Foundry project, RBAC, role assignment, permissions, quota, capacity, region, troubleshoot agent, deployment failure, AI Services, create Foundry resource, provision, knowledge index, customize deployment, onboard, availability, fine-tune, SFT, DPO, RFT, training-data, grader, distillation, fine-tuned model, large file upload. DO NOT USE FOR: Azure Functions, App Service, general Azure deploy (use azure-deploy), general Azure prep (use azure-prepare)." -license: MIT -metadata: - author: Microsoft - version: "1.1.24" ---- - -# Microsoft Foundry Skill - -This skill helps developers work with Microsoft Foundry resources, covering model discovery and deployment, complete dev lifecycle of AI agent, evaluation workflows, and troubleshooting. - -## Pre-Execution Requirements - -> **MANDATORY: Before executing ANY workflow, you MUST first call the Azure MCP `foundry` tool and inspect the available Foundry MCP tools and related parameters.** Treat this initial `foundry` call as a discovery/help step. For this skill, Azure MCP `foundry` is the required entry point for Foundry-related MCP operations. - -## Sub-Skills - -> **MANDATORY: Before executing ANY workflow-specific steps, you MUST read the corresponding sub-skill document.** Do not call workflow-specific MCP tools for a workflow without reading its skill document. This applies even if you already know the MCP tool parameters — the skill document contains required workflow steps, pre-checks, and validation logic that must be followed. This rule applies on every new user message that triggers a different workflow, even if the skill is already loaded. - -This skill includes specialized sub-skills for specific workflows. **Use these instead of the main skill when they match your task:** - -| Sub-Skill | When to Use | Reference | -|-----------|-------------|-----------| -| **deploy** | Containerize, build, push to ACR, create/update/clone agent deployments | [deploy](foundry-agent/deploy/deploy.md) | -| **invoke** | Send messages to an agent, single or multi-turn conversations | [invoke](foundry-agent/invoke/invoke.md) | -| **invocations-ws** | Build, deploy, and connect to hosted agents that speak the `invocations_ws` duplex WebSocket protocol — voice agents, real-time streams, and signaling for out-of-band media transports. | [invocations-ws](foundry-agent/invocations-ws/invocations-ws.md) | -| **observe** | Evaluate agent quality, run batch evals, analyze failures, optimize prompts, improve agent instructions, compare versions, set up CI/CD monitoring, and enable continuous production evaluation | [observe](foundry-agent/observe/observe.md) | -| **trace** | Query traces, analyze latency/failures, correlate eval results to specific responses via App Insights `customEvents` | [trace](foundry-agent/trace/trace.md) | -| **troubleshoot** | View hosted agent logs, query telemetry, diagnose failures | [troubleshoot](foundry-agent/troubleshoot/troubleshoot.md) | -| **create** | Create new hosted agent applications. Supports Microsoft Agent Framework, LangGraph, or custom frameworks in Python or C#, across `responses`, `invocations`, or `invocations_ws` protocols. | [create](foundry-agent/create/create-hosted.md) | -| **agent-optimizer** | Make existing Python hosted-agent code optimization-ready, configure eval.yaml, run Agent Optimizer jobs, apply candidates locally, and deploy through azd after review. | [agent-optimizer](foundry-agent/agent-optimizer/agent-optimizer.md) | -| **eval-datasets** | Harvest production traces into evaluation datasets, manage dataset versions and splits, track evaluation metrics over time, detect regressions, and maintain full lineage from trace to deployment. Use for: create dataset from traces, dataset versioning, evaluation trending, regression detection, dataset comparison, eval lineage. | [eval-datasets](foundry-agent/eval-datasets/eval-datasets.md) | +--- +name: microsoft-foundry +description: "Deploy, evaluate, fine-tune, and manage Foundry agents end-to-end with azd: hosted agent scaffold/run/deploy, prompt agent create, batch eval, continuous eval, prompt optimizer, Agent Optimizer scaffold, agent.yaml, dataset curation from traces, model fine-tuning (SFT/DPO/RFT). USE FOR: azd ai agent, azd provision/deploy, deploy agent, hosted agent, create agent, add tool to agent, invoke agent, evaluate agent, continuous eval, continuous monitoring, optimize prompt, improve prompt, optimize agent instructions, agent optimizer, deploy model, Foundry project, RBAC, role assignment, permissions, quota, capacity, region, troubleshoot agent, deployment failure, AI Services, create Foundry resource, provision, knowledge index, customize deployment, onboard, availability, fine-tune, SFT, DPO, RFT, training-data, grader, distillation, fine-tuned model, large file upload. DO NOT USE FOR: Azure Functions, App Service, general Azure deploy (use azure-deploy), general Azure prep (use azure-prepare)." +license: MIT +metadata: + author: Microsoft + version: "1.1.25" +--- + +# Microsoft Foundry Skill + +This skill helps developers work with Microsoft Foundry resources, covering model discovery and deployment, complete dev lifecycle of AI agent, evaluation workflows, and troubleshooting. + +## Pre-Execution Requirements + +Before using Foundry MCP operations, call the Azure MCP `foundry` tool and inspect the available Foundry MCP tools and related parameters. Treat this as the discovery/help step for MCP-based workflows. + +## Sub-Skills + +> **MANDATORY: Before executing ANY workflow-specific steps, you MUST read the corresponding sub-skill document.** Do not call workflow-specific MCP tools for a workflow without reading its skill document. This applies even if you already know the MCP tool parameters — the skill document contains required workflow steps, pre-checks, and validation logic that must be followed. This rule applies on every new user message that triggers a different workflow, even if the skill is already loaded. + +This skill includes specialized sub-skills for specific workflows. **Use these instead of the main skill when they match your task:** + +| Sub-Skill | When to Use | Reference | +|-----------|-------------|-----------| +| **deploy** | Deploy hosted agents to Foundry, smoke-test a deployment, create or update prompt agents, and manage agent versions and multi-environment deploys. | [deploy](foundry-agent/deploy/deploy.md) | +| **invoke** | Send messages to an agent, single or multi-turn conversations | [invoke](foundry-agent/invoke/invoke.md) | +| **invocations-ws** | Build, deploy, and connect to hosted agents that speak the `invocations_ws` duplex WebSocket protocol — voice agents, real-time streams, and signaling for out-of-band media transports. | [invocations-ws](foundry-agent/invocations-ws/invocations-ws.md) | +| **observe** | Evaluate agent quality, run batch evals, analyze failures, optimize prompts, improve agent instructions, compare versions, set up CI/CD monitoring, and enable continuous production evaluation | [observe](foundry-agent/observe/observe.md) | +| **trace** | Query traces, analyze latency/failures, correlate eval results to specific responses via App Insights `customEvents` | [trace](foundry-agent/trace/trace.md) | +| **troubleshoot** | View hosted agent logs, query telemetry, diagnose failures | [troubleshoot](foundry-agent/troubleshoot/troubleshoot.md) | +| **create (quick start)** | Create a new hosted Foundry agent from scratch end-to-end — scaffold, provision a new Foundry project, deploy, and smoke-test. Opinionated happy-path that accepts common overrides (language, region, sample, topic, existing project, existing model). For anything not covered by the quickstart, use **create**. | [create/quick-start-hosted.md](foundry-agent/create/quick-start-hosted.md) | +| **create** | Use when the standard end-to-end happy path doesn't fit — lifting existing agent code into the project, deploying outside the default code path, wiring connections at scaffold time, advanced setup, or recovering from a failed quickstart run. | [create](foundry-agent/create/create-hosted.md) | +| **agent-optimizer** | Make existing Python hosted-agent code optimization-ready, configure eval.yaml, run Agent Optimizer jobs, apply candidates locally, and deploy through azd after review. | [agent-optimizer](foundry-agent/agent-optimizer/agent-optimizer.md) | +| **eval-datasets** | Harvest production traces into evaluation datasets, manage dataset versions and splits, track evaluation metrics over time, detect regressions, and maintain full lineage from trace to deployment. Use for: create dataset from traces, dataset versioning, evaluation trending, regression detection, dataset comparison, eval lineage. | [eval-datasets](foundry-agent/eval-datasets/eval-datasets.md) | | **project/create** | Creating a new Azure AI Foundry project for hosting agents and models. Use when onboarding to Foundry or setting up new infrastructure. | [project/create/create-foundry-project.md](project/create/create-foundry-project.md) | -| **resource/create** | Creating Azure AI Services multi-service resource (Foundry resource) using Azure CLI. Use when manually provisioning AI Services resources with granular control. | [resource/create/create-foundry-resource.md](resource/create/create-foundry-resource.md) | +| **resource/create** | Creating Azure AI Services multi-service resource (Foundry resource) using Azure CLI. Use when manually provisioning AI Services resources with granular control. | [resource/create/create-foundry-resource.md](resource/create/create-foundry-resource.md) | | **private-network** | Answer questions about Foundry network isolation **and** deploy Foundry with VNet isolation (BYO VNet, Managed VNet, hybrid). Covers architecture concepts, template selection, deployment, and post-deployment validation. | [resource/private-network/private-network.md](resource/private-network/private-network.md) | -| **models/deploy-model** | Unified model deployment with intelligent routing. Handles quick preset deployments, fully customized deployments (version/SKU/capacity/RAI), and capacity discovery across regions. Routes to sub-skills: `preset` (quick deploy), `customize` (full control), `capacity` (find availability). | [models/deploy-model/SKILL.md](models/deploy-model/SKILL.md) | -| **quota** | Managing quotas and capacity for Microsoft Foundry resources. Use when checking quota usage, troubleshooting deployment failures due to insufficient quota, requesting quota increases, or planning capacity. | [quota/quota.md](quota/quota.md) | -| **rbac** | Managing RBAC permissions, role assignments, managed identities, and service principals for Microsoft Foundry resources. Use for access control, auditing permissions, and CI/CD setup. | [rbac/rbac.md](rbac/rbac.md) | -| **finetuning** | Fine-tune models on Azure AI Foundry — SFT distillation, DPO preference optimization, RFT with graders and tool calling. Dataset preparation, grader calibration, training, checkpoint selection, deployment, evaluation. Use for: fine-tune, SFT, DPO, RFT, training data, grader, distillation, fine-tuned model, large file upload. | [finetuning/SKILL.md](finetuning/SKILL.md) | - -> 💡 **Tip:** For a complete onboarding flow: `project/create` (public) or `private-network` (VNet isolation) → `models/deploy-model` → agent workflows (`create` → `deploy` → `invoke`). - -> 💡 **Fine-Tuning:** Use `finetuning` for all model customization — SFT distillation, DPO preference optimization, and RFT with graders. Includes quickstart, grader calibration, and training curve analysis. - -> 💡 **Model Deployment:** Use `models/deploy-model` for all deployment scenarios — it intelligently routes between quick preset deployment, customized deployment with full control, and capacity discovery across regions. - -> 💡 **Prompt Optimization:** For requests like "optimize my prompt" or "improve my agent instructions," load [observe](foundry-agent/observe/observe.md) and use the `prompt_optimize` MCP tool through that eval-driven workflow. - -## Infrastructure Lifecycle - -Match user intent to the correct infrastructure workflow. - -| User Intent | Workflow | -|-------------|---------| -| "Create Foundry" / "Set up Foundry" (ambiguous) | Use `AskUserQuestion`: (a) just an AI Services resource, (b) a project with public access, or (c) a project with network isolation? Route: (a) → [resource/create](resource/create/create-foundry-resource.md), (b) → [project/create](project/create/create-foundry-project.md), (c) → [private-network](resource/private-network/private-network.md) | -| Set up Foundry with VNet isolation | [private-network](resource/private-network/private-network.md) | -| Create a Foundry project (public) | [project/create](project/create/create-foundry-project.md) | -| Create a bare Foundry resource | [resource/create](resource/create/create-foundry-resource.md) | - -## Agent Development Lifecycle - -Match user intent to the correct agent workflow. Read each sub-skill in order before executing. - -| User Intent | Workflow (read in order) | -|-------------|------------------------| -| Create a new agent from scratch | [create](foundry-agent/create/create-hosted.md) → [deploy](foundry-agent/deploy/deploy.md) → [invoke](foundry-agent/invoke/invoke.md) | -| Optimize existing Python hosted agent | [agent-optimizer](foundry-agent/agent-optimizer/agent-optimizer.md) → scaffold/review → eval.yaml → optimize → apply candidate → deploy → invoke | -| Deploy an agent (code already exists) | deploy (includes eval-suite setup) → invoke → observe (evaluate/optimize) | -| Update/redeploy an agent after code changes | deploy (includes eval-suite setup) → invoke → observe (evaluate/optimize) | -| Invoke/test/chat with an agent | invoke | -| Optimize / improve agent prompt or instructions | observe (Step 4: Optimize) | -| Evaluate and optimize agent (full loop) | observe | -| Enable continuous evaluation monitoring | observe (Step 6: CI/CD & Monitoring) | -| Troubleshoot an agent issue | invoke → troubleshoot | -| Fix a broken agent (troubleshoot + redeploy) | invoke → troubleshoot → apply fixes → deploy → invoke | - -## Agent: .foundry Workspace Standard - -Every agent source folder can keep Foundry-specific cache and overlay state under `.foundry/`: - -```text -/ - .foundry/ - agent-metadata.yaml - agent-metadata.prod.yaml - suites/ - datasets/ - evaluators/ - results/ -``` - -- In azd projects, derive deployment context (project endpoint, agent name/version, ACR, App Insights) from `azure.yaml` plus `azd env get-values`; do not duplicate those values in metadata when azd already provides them. -- `agent-metadata.yaml` is the preferred local/dev overlay for non-azd values, remote Foundry suite references, local cache paths, result summaries, and explicit overrides. Optional sidecar files such as `agent-metadata.prod.yaml` can hold a single prod or CI-targeted overlay without mixing multiple environments in one file. -- `suites/`, `datasets/`, and `evaluators/` are local cache folders. Reuse them when they are current, and ask before refreshing or overwriting them. -- See [Agent Metadata Contract](references/agent-metadata-contract.md) for the canonical schema and workflow rules. - -## Agent: Setup References - -- [Standard Agent Setup](references/standard-agent-setup.md) - Standard capability-host setup with customer-managed data, search, and AI Services resources. - -## Agent: Common Project Context Resolution - -Agent skills should run this step **only when they need configuration values they don't already have**. If a value (for example, agent root, environment, project endpoint, or agent name) is already known from the user's message or a previous skill in the same session, skip resolution for that value. - -### Step 1: Discover Agent Roots and azd Context - -First check whether the workspace has `azure.yaml` with services using `host: azure.ai.agent`. - -- **One azd agent service** -> use that service's `project` folder as the agent root. -- **Multiple azd agent services** -> require the user to choose the target service/folder. -- **No azd agent service** -> search the workspace for `.foundry/` folders that contain `agent-metadata.yaml` or `agent-metadata..yaml`. - - **One match** -> use that agent root. - - **Multiple matches** -> require the user to choose the target agent folder. - - **No matches** -> for create/deploy workflows, seed a new `.foundry/` folder during setup; for all other workflows, stop and ask the user which agent source folder to initialize. - -After selecting an agent root, keep all local `.foundry` cache inspection, source inspection, evaluator suggestions, dataset suggestions, and prompt-optimization context inside that folder only. Do **not** scan sibling agent folders unless the user explicitly switches roots. - -### Step 2: Resolve Environment and Deployment Context - -If `azure.yaml` is present, resolve the azd environment first: - -1. Environment explicitly named by the user -2. `AZURE_ENV_NAME` from `azd env get-values` -3. azd default environment from `.azure/config.json` -4. Environment already selected earlier in the session - -Run `azd env get-values` for the selected environment when project/deployment values are not already known. Prefer azd values for deployment context: - -| azd Variable | Resolves To | -|-------------|-------------| -| `AZURE_AI_PROJECT_ENDPOINT` or `AZURE_AIPROJECT_ENDPOINT` | Project endpoint | -| `AGENT__NAME` | Agent name for the selected azd service | -| `AGENT__VERSION` | Agent version for the selected azd service | -| `AZURE_CONTAINER_REGISTRY_NAME` or `AZURE_CONTAINER_REGISTRY_ENDPOINT` | ACR registry name / image URL prefix | -| `APPLICATIONINSIGHTS_CONNECTION_STRING` | App Insights connection string for trace workflows | -| `AZURE_SUBSCRIPTION_ID`, `AZURE_RESOURCE_GROUP`, `AZURE_AI_ACCOUNT_NAME`, `AZURE_AI_PROJECT_NAME` | Azure resource lookup and Playground links | - -When azd supplies these values, use them as the source of truth and do not copy them into `.foundry/agent-metadata*.yaml` on metadata writes. - -### Step 3: Select Metadata Overlay and Resolve Environment - -Inside the selected agent root, choose the metadata file in this order: -1. Metadata filename or path explicitly provided by the user or workflow -2. If an explicit environment is already known and `.foundry/agent-metadata..yaml` exists, use that file -3. `.foundry/agent-metadata.yaml` -4. If multiple metadata files remain and no rule above selects one, prompt the user to choose - -Read the selected metadata file and resolve any remaining environment choice in this order: -1. Environment explicitly named by the user -2. If the selected metadata file defines exactly one environment, use it -3. Environment already selected earlier in the session -4. `defaultEnvironment` from metadata - -If the selected metadata file still contains multiple environments and none of the rules above selects one, prompt the user to choose. Keep the selected agent root, metadata file, environment, and whether context came from azd or metadata visible in every workflow summary. - -If the selected environment exposes older `testSuites[]` metadata but not `evaluationSuites[]`, treat `testSuites[]` as the source for this session and normalize each entry in memory to the `evaluationSuites[]` shape before continuing. If the metadata is older still and only exposes legacy `testCases[]`, normalize that list the same way. Preserve dataset and evaluator fields, keep any existing `tags`, and map legacy `priority` to `tags.tier` only when `tags.tier` is missing: `P0` -> `smoke`, `P1` -> `regression`, `P2` -> `coverage`. - -### Step 4: Resolve eval.yaml Local Evaluation Intent - -If `eval.yaml` exists in the selected agent root, parse it before generating new suites: - -- `agent.name` -> target agent candidate; verify it matches the selected azd/metadata agent before using it. -- `dataset_file` -> local seed dataset candidate. -- `evaluators[]` -> candidate Foundry evaluator names; verify with `evaluator_catalog_get` before treating them as remote evaluators. -- `name` -> local eval/suite candidate; verify remotely before persisting as `suiteName`. -- `options.eval_model`, `options.pass_threshold`, `max_samples`, `trace_days`, and `generation_instruction` -> setup defaults. - -Treat `eval.yaml` as local evaluation intent, not proof that a Foundry suite exists. Persist synced suite/dataset/evaluator references to `.foundry` only after remote lookup or registration succeeds. - -### Step 5: Resolve Common Configuration - -Layer sources in this order: - -1. Explicit user input and values already selected in the session -2. azd environment values for deployment context -3. `.foundry/agent-metadata*.yaml` overlay values and remote suite/cache references -4. `agent.yaml` and `eval.yaml` local source configuration -5. User prompts for anything still missing - -If azd and metadata both provide the same value and they differ, stop and ask which source is authoritative. If they match, use the azd value and avoid rewriting the duplicate on future metadata writes. - -| Effective Value | Preferred Source | Used By | -|-----------------|------------------|---------| -| Project endpoint | azd env | deploy, invoke, observe, trace, troubleshoot | -| Agent name/version | azd agent variables, then `agent.yaml` | invoke, observe, trace, troubleshoot | -| ACR | azd env | deploy | -| Evaluation suites and cache paths | `.foundry/agent-metadata*.yaml` | observe, eval-datasets | -| Local seed dataset/evaluator intent | `eval.yaml` | observe, eval-datasets | - -### Step 6: Write Metadata Overlay (Create/Deploy/Observe Only) - -On any metadata write (deploy, auto-setup, dataset refresh, or trace-to-dataset update), persist only non-derivable overlay/cache state in the selected metadata file: - -- azd binding (`azd.environmentName`, `azd.service`) when useful for future resolution -- `evaluationSuites[]` with remote suite/dataset/evaluator references and local cache paths -- `lastEval`, result files, comparison summaries, or explicit non-azd overrides - -Do not copy azd-owned deployment values into metadata when azd already provides them. If the selected file is a preferred single-environment file, rewrite only that one environment block. If the selected file is a legacy multi-environment file, rewrite only the selected environment block. Never copy or merge environments across sibling metadata files automatically. If the selected environment still uses older `testSuites[]` or legacy `testCases[]`, rewrite it to `evaluationSuites[]` and remove migrated `priority` fields from the rewritten entries. - -### Step 7: Collect Missing Values - -Use the `ask_user` or `askQuestions` tool **only for values not resolved** from the user's message, session context, metadata, or azd bootstrap. Common values skills may need: -- **Agent root** — Target azd service project folder or folder containing `.foundry/agent-metadata*.yaml` -- **Metadata file** — `agent-metadata.yaml` for local/dev, or an explicit sidecar such as `agent-metadata.prod.yaml` -- **Environment** — azd environment, `dev`, `prod`, or another environment key from metadata -- **Project endpoint** — AI Foundry project endpoint URL -- **Agent name** — Name of the target agent - -> 💡 **Tip:** If the user already provides the agent path, environment, project endpoint, or agent name, extract it directly — do not ask again. - -## Agent: Agent Types - -All agent skills support two agent types: - -| Type | Kind | Description | -|------|------|-------------| -| **Prompt** | `"prompt"` | LLM-based agents backed by a model deployment | -| **Hosted** | `"hosted"` | Container-based agents running custom code | - -Use `agent_get` MCP tool to determine an agent's type when needed. - -## Tool Usage Conventions - -- Use the `ask_user` or `askQuestions` tool whenever collecting information from the user -- Use the `task` or `runSubagent` tool to delegate long-running or independent sub-tasks (e.g., env var scanning, status polling, Dockerfile generation) -- Prefer Azure MCP tools over direct CLI commands when available -- Reference official Microsoft documentation URLs instead of embedding CLI command syntax - -## Additional Resources - -- [Foundry Hosted Agents](https://learn.microsoft.com/azure/ai-foundry/agents/concepts/hosted-agents?view=foundry) -- [Foundry Agent Runtime Components](https://learn.microsoft.com/azure/ai-foundry/agents/concepts/runtime-components?view=foundry) - -## SDK Quick Reference - +| **models/deploy-model** | Unified model deployment with intelligent routing. Handles quick preset deployments, fully customized deployments (version/SKU/capacity/RAI), and capacity discovery across regions. Routes to sub-skills: `preset` (quick deploy), `customize` (full control), `capacity` (find availability). | [models/deploy-model/SKILL.md](models/deploy-model/SKILL.md) | +| **quota** | Managing quotas and capacity for Microsoft Foundry resources. Use when checking quota usage, troubleshooting deployment failures due to insufficient quota, requesting quota increases, or planning capacity. | [quota/quota.md](quota/quota.md) | +| **rbac** | Managing RBAC permissions, role assignments, managed identities, and service principals for Microsoft Foundry resources. Use for access control, auditing permissions, and CI/CD setup. | [rbac/rbac.md](rbac/rbac.md) | +| **finetuning** | Fine-tune models on Azure AI Foundry — SFT distillation, DPO preference optimization, RFT with graders and tool calling. Dataset preparation, grader calibration, training, checkpoint selection, deployment, evaluation. Use for: fine-tune, SFT, DPO, RFT, training data, grader, distillation, fine-tuned model, large file upload. | [finetuning/SKILL.md](finetuning/SKILL.md) | + +> 💡 **Tip:** For a complete onboarding flow: `project/create` (public) or `private-network` (VNet isolation) → `models/deploy-model` → agent workflows (`create` → `deploy` → `invoke`). + +> 💡 **Fine-Tuning:** Use `finetuning` for all model customization — SFT distillation, DPO preference optimization, and RFT with graders. Includes quickstart, grader calibration, and training curve analysis. + +> 💡 **Model Deployment:** Use `models/deploy-model` for all deployment scenarios — it intelligently routes between quick preset deployment, customized deployment with full control, and capacity discovery across regions. + +> 💡 **Prompt Optimization:** For requests like "optimize my prompt" or "improve my agent instructions," load [observe](foundry-agent/observe/observe.md) and use the `prompt_optimize` MCP tool through that eval-driven workflow. + +## Infrastructure Lifecycle + +Match user intent to the correct infrastructure workflow. + +| User Intent | Workflow | +|-------------|---------| +| "Create Foundry" / "Set up Foundry" (ambiguous) | Use `AskUserQuestion`: (a) just an AI Services resource, (b) a project with public access, or (c) a project with network isolation? Route: (a) → [resource/create](resource/create/create-foundry-resource.md), (b) → [project/create](project/create/create-foundry-project.md), (c) → [private-network](resource/private-network/private-network.md) | +| Set up Foundry with VNet isolation | [private-network](resource/private-network/private-network.md) | +| Create a Foundry project (public) | [project/create](project/create/create-foundry-project.md) | +| Create a bare Foundry resource | [resource/create](resource/create/create-foundry-resource.md) | + +## Agent Development Lifecycle + +Match user intent to the correct agent workflow. Read each sub-skill in order before executing. + +| User Intent | Workflow (read in order) | +|-------------|------------------------| +| Create a new hosted agent end-to-end (scaffold + deploy + test) | [quick-start-hosted](foundry-agent/create/quick-start-hosted.md) (self-contained end-to-end) | +| Anything beyond the standard quickstart (existing code, deployment customization, scaffold-time connections, recovery) | [create](foundry-agent/create/create-hosted.md) → [deploy](foundry-agent/deploy/deploy.md) → [invoke](foundry-agent/invoke/invoke.md) | +| Optimize existing Python hosted agent | [agent-optimizer](foundry-agent/agent-optimizer/agent-optimizer.md) → scaffold/review → eval.yaml → optimize → apply candidate → deploy → invoke | +| Deploy an agent (code already exists) | deploy (includes eval-suite setup) → invoke → observe (evaluate/optimize) | +| Update/redeploy an agent after code changes | deploy (includes eval-suite setup) → invoke → observe (evaluate/optimize) | +| Invoke/test/chat with an agent | invoke | +| Optimize / improve agent prompt or instructions | observe (Step 4: Optimize) | +| Evaluate and optimize agent (full loop) | observe | +| Enable continuous evaluation monitoring | observe (Step 6: CI/CD & Monitoring) | +| Troubleshoot an agent issue | invoke → troubleshoot | +| Fix a broken agent (troubleshoot + redeploy) | invoke → troubleshoot → apply fixes → deploy → invoke | + +## Agent: .foundry Workspace Standard + +Every agent source folder can keep Foundry-specific cache and overlay state under `.foundry/`: + +```text +/ + .foundry/ + agent-metadata.yaml + agent-metadata.prod.yaml + suites/ + datasets/ + evaluators/ + results/ +``` + +- In azd projects, derive deployment context (project endpoint, agent name/version, ACR, App Insights) from `azure.yaml` plus `azd env get-values`; do not duplicate those values in metadata when azd already provides them. +- `agent-metadata.yaml` is the preferred local/dev overlay for non-azd values, remote Foundry suite references, local cache paths, result summaries, and explicit overrides. Optional sidecar files such as `agent-metadata.prod.yaml` can hold a single prod or CI-targeted overlay without mixing multiple environments in one file. +- `suites/`, `datasets/`, and `evaluators/` are local cache folders. Reuse them when they are current, and ask before refreshing or overwriting them. +- See [Agent Metadata Contract](references/agent-metadata-contract.md) for the canonical schema and workflow rules. + +## Agent: Setup References + +- [Standard Agent Setup](references/standard-agent-setup.md) — advanced setup for production workloads that need data-residency control (bring-your-own Cosmos DB / Storage / AI Search via a Foundry capability host). The default `azd ai agent` flow uses **Basic Agent Setup** and does **not** provision `capabilityHosts/agents` — do not flag its absence as a bug. For default post-provision state, see the "Expected env-var fingerprint" section in [foundry-agent/create/create-hosted.md](foundry-agent/create/create-hosted.md). + +## Agent: Common Project Context Resolution + +Agent skills should run this step **only when they need configuration values they don't already have**. If a value (for example, agent root, environment, project endpoint, or agent name) is already known from the user's message or a previous skill in the same session, skip resolution for that value. + +### Step 1: Discover Agent Roots and azd Context + +First check whether the workspace has `azure.yaml` with services using `host: azure.ai.agent`. + +- **One azd agent service** -> use that service's `project` folder as the agent root. +- **Multiple azd agent services** -> require the user to choose the target service/folder. +- **No azd agent service** -> search the workspace for `.foundry/` folders that contain `agent-metadata.yaml` or `agent-metadata..yaml`. + - **One match** -> use that agent root. + - **Multiple matches** -> require the user to choose the target agent folder. + - **No matches** -> for create/deploy workflows, seed a new `.foundry/` folder during setup; for all other workflows, stop and ask the user which agent source folder to initialize. + +After selecting an agent root, keep all local `.foundry` cache inspection, source inspection, evaluator suggestions, dataset suggestions, and prompt-optimization context inside that folder only. Do **not** scan sibling agent folders unless the user explicitly switches roots. + +### Step 2: Resolve Environment and Deployment Context + +If `azure.yaml` is present, resolve the azd environment first: + +1. Environment explicitly named by the user +2. `AZURE_ENV_NAME` from `azd env get-values` +3. azd default environment from `.azure/config.json` +4. Environment already selected earlier in the session + +Run `azd env get-values` for the selected environment when project/deployment values are not already known. Prefer azd values for deployment context: + +| azd Variable | Resolves To | +|-------------|-------------| +| `AZURE_AI_PROJECT_ENDPOINT` or `AZURE_AIPROJECT_ENDPOINT` | Project endpoint | +| `AGENT__NAME` | Agent name for the selected azd service | +| `AGENT__VERSION` | Agent version for the selected azd service | +| `AZURE_CONTAINER_REGISTRY_NAME` or `AZURE_CONTAINER_REGISTRY_ENDPOINT` | ACR registry name / image URL prefix | +| `APPLICATIONINSIGHTS_CONNECTION_STRING` | App Insights connection string for trace workflows | +| `AZURE_SUBSCRIPTION_ID`, `AZURE_RESOURCE_GROUP`, `AZURE_AI_ACCOUNT_NAME`, `AZURE_AI_PROJECT_NAME` | Azure resource lookup and Playground links | + +When azd supplies these values, use them as the source of truth and do not copy them into `.foundry/agent-metadata*.yaml` on metadata writes. + +### Step 3: Select Metadata Overlay and Resolve Environment + +Inside the selected agent root, choose the metadata file in this order: +1. Metadata filename or path explicitly provided by the user or workflow +2. If an explicit environment is already known and `.foundry/agent-metadata..yaml` exists, use that file +3. `.foundry/agent-metadata.yaml` +4. If multiple metadata files remain and no rule above selects one, prompt the user to choose + +Read the selected metadata file and resolve any remaining environment choice in this order: +1. Environment explicitly named by the user +2. If the selected metadata file defines exactly one environment, use it +3. Environment already selected earlier in the session +4. `defaultEnvironment` from metadata + +If the selected metadata file still contains multiple environments and none of the rules above selects one, prompt the user to choose. Keep the selected agent root, metadata file, environment, and whether context came from azd or metadata visible in every workflow summary. + +If the selected environment exposes older `testSuites[]` metadata but not `evaluationSuites[]`, treat `testSuites[]` as the source for this session and normalize each entry in memory to the `evaluationSuites[]` shape before continuing. If the metadata is older still and only exposes legacy `testCases[]`, normalize that list the same way. Preserve dataset and evaluator fields, keep any existing `tags`, and map legacy `priority` to `tags.tier` only when `tags.tier` is missing: `P0` -> `smoke`, `P1` -> `regression`, `P2` -> `coverage`. + +### Step 4: Resolve eval.yaml Local Evaluation Intent + +If `eval.yaml` exists in the selected agent root, parse it before generating new suites: + +- `agent.name` -> target agent candidate; verify it matches the selected azd/metadata agent before using it. +- `dataset_file` -> local seed dataset candidate. +- `evaluators[]` -> candidate Foundry evaluator names; verify with `evaluator_catalog_get` before treating them as remote evaluators. +- `name` -> local eval/suite candidate; verify remotely before persisting as `suiteName`. +- `options.eval_model`, `options.pass_threshold`, `max_samples`, `trace_days`, and `generation_instruction` -> setup defaults. + +Treat `eval.yaml` as local evaluation intent, not proof that a Foundry suite exists. Persist synced suite/dataset/evaluator references to `.foundry` only after remote lookup or registration succeeds. + +### Step 5: Resolve Common Configuration + +Layer sources in this order: + +1. Explicit user input and values already selected in the session +2. azd environment values for deployment context +3. `.foundry/agent-metadata*.yaml` overlay values and remote suite/cache references +4. `agent.yaml` and `eval.yaml` local source configuration +5. User prompts for anything still missing + +If azd and metadata both provide the same value and they differ, stop and ask which source is authoritative. If they match, use the azd value and avoid rewriting the duplicate on future metadata writes. + +| Effective Value | Preferred Source | Used By | +|-----------------|------------------|---------| +| Project endpoint | azd env | deploy, invoke, observe, trace, troubleshoot | +| Agent name/version | azd agent variables, then `agent.yaml` | invoke, observe, trace, troubleshoot | +| ACR | azd env | deploy | +| Evaluation suites and cache paths | `.foundry/agent-metadata*.yaml` | observe, eval-datasets | +| Local seed dataset/evaluator intent | `eval.yaml` | observe, eval-datasets | + +### Step 6: Write Metadata Overlay (Create/Deploy/Observe Only) + +On any metadata write (deploy, auto-setup, dataset refresh, or trace-to-dataset update), persist only non-derivable overlay/cache state in the selected metadata file: + +- azd binding (`azd.environmentName`, `azd.service`) when useful for future resolution +- `evaluationSuites[]` with remote suite/dataset/evaluator references and local cache paths +- `lastEval`, result files, comparison summaries, or explicit non-azd overrides + +Do not copy azd-owned deployment values into metadata when azd already provides them. If the selected file is a preferred single-environment file, rewrite only that one environment block. If the selected file is a legacy multi-environment file, rewrite only the selected environment block. Never copy or merge environments across sibling metadata files automatically. If the selected environment still uses older `testSuites[]` or legacy `testCases[]`, rewrite it to `evaluationSuites[]` and remove migrated `priority` fields from the rewritten entries. + +### Step 7: Collect Missing Values + +Use the `ask_user` or `askQuestions` tool **only for values not resolved** from the user's message, session context, metadata, or azd bootstrap. Common values skills may need: +- **Agent root** — Target azd service project folder or folder containing `.foundry/agent-metadata*.yaml` +- **Metadata file** — `agent-metadata.yaml` for local/dev, or an explicit sidecar such as `agent-metadata.prod.yaml` +- **Environment** — azd environment, `dev`, `prod`, or another environment key from metadata +- **Project endpoint** — AI Foundry project endpoint URL +- **Agent name** — Name of the target agent + +> 💡 **Tip:** If the user already provides the agent path, environment, project endpoint, or agent name, extract it directly — do not ask again. + +## Agent: Agent Types + +All agent skills support two agent types: + +| Type | Kind | Description | +|------|------|-------------| +| **Prompt** | `"prompt"` | LLM-based agents backed by a model deployment | +| **Hosted** | `"hosted"` | Container-based agents running custom code | + +Use `agent_get` MCP tool to determine an agent's type when needed. + +## Tool Usage Conventions + +- Use the `ask_user` or `askQuestions` tool whenever collecting information from the user +- Use the `task` or `runSubagent` tool to delegate long-running or independent sub-tasks (e.g., env var scanning, status polling, Dockerfile generation) +- Prefer Azure MCP tools over direct CLI commands when available +- Reference official Microsoft documentation URLs instead of embedding CLI command syntax + +## Additional Resources + +- [Foundry Hosted Agents](https://learn.microsoft.com/azure/ai-foundry/agents/concepts/hosted-agents?view=foundry) +- [Foundry Agent Runtime Components](https://learn.microsoft.com/azure/ai-foundry/agents/concepts/runtime-components?view=foundry) + +## SDK Quick Reference + - [Python](references/sdk/foundry-sdk-py.md) diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/agent-optimizer/references/eval-yaml.md b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/agent-optimizer/references/eval-yaml.md index 992b3fc0..6016d809 100644 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/agent-optimizer/references/eval-yaml.md +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/agent-optimizer/references/eval-yaml.md @@ -1,6 +1,6 @@ # eval.yaml Guidance -Create `eval.yaml` directly when the conversation or `.foundry/agent-metadata*.yaml` already selected the dataset/evaluators. Otherwise ask whether to run `azd ai agent eval init` or let optimize use built-in defaults. +Create `eval.yaml` directly when the conversation or `.foundry/agent-metadata*.yaml` already selected the dataset/evaluators. Otherwise ask whether to run `azd ai agent eval generate` or let optimize use built-in defaults. ## Include diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/create-hosted.md b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/create-hosted.md index 48a9d1f5..226899c0 100644 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/create-hosted.md +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/create-hosted.md @@ -1,346 +1,278 @@ -# Create Hosted Agent Application +# Create Hosted Agent (azd ai) -Create new hosted agent applications for Microsoft Foundry, or convert existing agent projects to be Foundry-compatible using the hosting adapter. +Scaffold a hosted Foundry agent project with the Azure Developer CLI (`azd`) and the `azure.ai.agents` extension. The same flow covers greenfield (from a curated sample) and brownfield (lift existing code), then drops you into a local inner-loop so you can iterate before deploying. + +> **Creating a new agent end-to-end from scratch?** Use [quick-start-hosted.md](quick-start-hosted.md) instead -- an opinionated happy-path with safe defaults. Stay here for anything not covered by the quickstart. + +> **Scope:** `azd ai` is the preferred *code-first* path -- use it when the intent is agent code on disk, in a repo, with infrastructure-as-code and a local inner-loop. If the intent is only to create a remote agent resource (no code on disk), other approaches may apply -- for prompt agents see [create-prompt.md](create-prompt.md), or use the Foundry MCP tools / portal. ## Quick Reference | Property | Value | |----------|-------| -| **Samples Repo** | `microsoft-foundry/foundry-samples` | -| **Python Samples** | `samples/python/hosted-agents/` | -| **C# Samples** | `samples/csharp/hosted-agents/` | -| **Hosted Agents Docs** | https://learn.microsoft.com/azure/ai-foundry/agents/concepts/hosted-agents | -| **Default Selection** | `Python` + `responses` + `Microsoft Agent Framework` | -| **Best For** | Creating new or converting existing agent projects for Foundry | +| Agent type | Hosted (container or code) | +| Primary CLI | `azd ai agent` (from extension `azure.ai.agents`) | +| Scaffold command | `azd ai agent init -m --deploy-mode code --runtime python_3_13 --entry-point main.py`, pass `--runtime dotnet_10 --entry-point MyAgent.dll` for .NET project (or `--from-code` for brownfield) | +| Local run | `azd ai agent run` + `azd ai agent invoke --local "..."` | +| Deploy handoff | [deploy/deploy.md](../deploy/deploy.md) | +| Sample catalog | `azd ai agent sample list --featured-only --output json` | +| Reference docs | [azd-ai-cli](references/azd-ai-cli.md), [local-run](references/local-run.md), [tools](references/tools.md) | ## When to Use This Skill -- Create a new hosted agent application from scratch (greenfield) -- Start from an official sample and customize it -- Convert an existing agent project to be Foundry-compatible (brownfield) -- Help user choose a language, protocol, framework, or sample for their agent - -## Workflow - -> Relative reference paths in this file are resolved from the directory containing `create.md`. For example, `./references/agentframework.md` means the file next to this document under `create/references/`, not a path relative to the runtime working directory. - -### Step 1: Determine Scenario - -Check the user's workspace for existing agent project indicators: - -- **No agent-related code found** → **Greenfield**. Proceed to Greenfield Workflow (Step 2). -- **Existing agent code present** → **Brownfield**. Proceed to Brownfield Workflow. +- Create a new hosted agent from a curated Foundry sample. +- Lift an existing agent project (Python, .NET, Node.js) into a hosted Foundry agent. +- Add tools (web search, AI Search, MCP, A2A) to a hosted agent. +- Run and iterate on a hosted agent locally before deploying. -### Step 2: Gather Requirements (Greenfield) +For prompt agents (LLM + instructions, no container), use [create-prompt.md](create-prompt.md). For deploy, use [deploy.md](../deploy/deploy.md). -If the user hasn't already specified, use `ask_user` to collect in this order: +## Hosted vs Prompt -**Language:** Python (default) or C#. +| | Hosted | Prompt | +|--|--------|--------| +| Custom Python / .NET / Node code? | Yes -> this skill | No -> [create-prompt.md](create-prompt.md) | +| Tools / RAG / MCP / A2A | Toolbox + connections | Built-in tool configs | +| Local debugging | `azd ai agent run` | Limited | +| Output | New immutable agent version per `azd deploy` | `agent_update` via MCP / SDK | -**Protocol:** +## Workflow -| Protocol | Best For | -|----------|----------| -| `responses` (default) | Conversational agents using the OpenAI-compatible `/responses` contract | -| `invocations` | Arbitrary payloads, custom SSE behavior, protocol bridges, webhook-style callers, or client-managed sessions | -| `invocations_ws` | Real-time duplex workloads — voice agents, live streams, signaling for out-of-band media transports. The verify and adapter sections below assume HTTP — for WS specifics (URL with `agent_session_id`, browser-proxy requirement, framing), follow the dedicated [invocations-ws skill](../invocations-ws/invocations-ws.md). | +### Step 1 -- Verify the environment + +> **Preflight: get `AZURE_SUBSCRIPTION_ID` + `AZURE_LOCATION` into the azd env *before* the first `azd ai agent init`.** Without both, init defers model resolution → `azure.yaml services..config.deployments[]` ends up empty → `AI_PROJECT_DEPLOYMENTS=[]` → `azd provision` creates zero model deployments → `agent.yaml` keeps the literal `{{AZURE_AI_MODEL_DEPLOYMENT_NAME}}` placeholder. `azd ai agent init` itself has **no** `--subscription` / `--location` flags (those live on core `azd init`). Pick the **first** option that fits, ranked best-first: +> +> 1. **Pre-bootstrap with core `azd init`** — per-project, no global state. **Recommended default for scripted / MCP / agent-driven flows.** Run in the target empty directory: +> ```bash +> azd init -t Azure-Samples/azd-ai-starter-basic . -e --subscription -l +> azd ai agent init -m --no-prompt --deploy-mode code --runtime python_3_13 --entry-point main.py +> ``` +> Core `azd init` creates `azure.yaml` + the azd env with `AZURE_SUBSCRIPTION_ID` / `AZURE_LOCATION` already populated; the extension's `ensureProject` sees the existing project and the model resolver reads the values core just wrote. (Use this even though `azd ai agent init` can scaffold from scratch — it's the only headless path that avoids deferral without mutating global config.) +> 2. **`azd ai agent init --project-id `** — only when the Foundry project already exists in Azure. Init extracts the subscription from the ARM ID and uses the project's own location. Skip Option 1. +> 3. **Interactive mode** — omit `--no-prompt`. Init prompts for subscription + location. Only when a human is at a terminal. +> 4. **Global config (last resort, mutates `~/.azure/config.json` for every azd project on the machine):** +> ```bash +> azd config set defaults.subscription +> azd config set defaults.location +> ``` +> Avoid in per-project / scripted flows. Use only when no per-project option fits and the machine is single-tenant. +> +> **If you only discover the need to set sub + location *after* init has already scaffolded `src//`, do *not* naively re-run `azd ai agent init`.** It is not idempotent: under `--no-prompt` it silently creates `-2`; in interactive mode the collision prompt's **default selection is "Use a different service name"** (you must actively arrow-up to "Overwrite existing"). See the [recovery paths](#step-4a----greenfield-scaffold-from-a-sample) in Step 4a. +> +> Never `azd env set AI_PROJECT_DEPLOYMENTS '[...]'` and never `az cognitiveservices account deployment create ...` for the azd Golden Path — both break the lifecycle. + +Run the bundled verification script to check that the local environment is set up correctly: -> 💡 **Tip:** A single hosted agent can expose **multiple protocols simultaneously**. Declare each in `agent.yaml` under `protocols:` and register the matching handlers on the same `InvocationAgentServerHost` (e.g., `invocations` + `invocations_ws` to pair a control/batch HTTP path with a WebSocket path). +```bash +./scripts/verify-environment.sh # macOS / Linux +./scripts/verify-environment.ps1 # Windows (pwsh) +``` -**Framework:** +Act on the summary prefixes: `[OK]` nothing to do; `[WARN]` non-blocking (continue); `[ACTION]` resolve first (missing extension -> `azd extension install azure.ai.agents`; failed auth -> ask the user to run `azd auth login`, never run it yourself). -The paths below refer to the framework-level directories in the Foundry sample repo. Choose the protocol-specific subpath in Step 3. +Branch on the reported agent status: -| Framework | Python Path | C# Path | -|-----------|-------------|---------| -| Microsoft Agent Framework (default) | `agent-framework` | `agent-framework` | -| LangGraph | `bring-your-own` | ❌ Python only | -| Custom | `bring-your-own` | `bring-your-own` | +- `not_deployed` -> Step 2. +- `active` / `deployed` -> already deployed. Skip to [deploy/deploy.md](../deploy/deploy.md) for redeploy or [tools](references/tools.md) to add a tool. -> ⚠️ **Warning:** LangGraph is Python-only. For C# + LangGraph, suggest Microsoft Agent Framework or Custom instead. +### Step 2 -- New or existing Foundry project? -> 💡 **Tip:** In the sample repo, **Custom** corresponds to the **Bring Your Own** lanes. +Ask: "Do you want to create a new Foundry project, or use an existing one?" Skip the question when the prompt already says to use an existing project or supplies a Foundry project endpoint / project ARM resource ID. -> 💡 **Tip:** LangGraph samples are under **Bring Your Own**, not under a separate top-level `langgraph` directory. +- **New project** -- do NOT pass `--project-id`. `azd provision` (in deploy) will create it. +- **Existing project with ARM resource ID** -- pass that exact ID to `azd ai agent init --project-id`. +- **Existing project with Foundry project endpoint only** -- resolve the project ARM resource ID with the bundled script, then pass the returned `id` to `azd ai agent init --project-id`: + ```bash + ./scripts/resolve-project-id.sh --endpoint "" # macOS / Linux + ./scripts/resolve-project-id.ps1 -Endpoint "" # Windows (pwsh) + ``` +- **Existing project with neither endpoint nor ARM ID** -- ask for the ARM resource ID. -If user has no specific preference, suggest Python + `responses` + Microsoft Agent Framework as defaults. +Do not guess, derive, or construct the project ID from the endpoint. For `--project-id`, pass either the user-supplied project ARM resource ID or the `id` returned by Azure lookup / the bundled resolve script. -In non-interactive or YOLO mode, default to Python + `responses` + Microsoft Agent Framework unless the user's request clearly requires another supported combination. +### Step 3 -- Pick the scaffolding source -### Step 3: Browse and Select Sample +| User has ... | Use | +|--------------|-----| +| Empty workspace, or wants a starter | **Greenfield** -- Step 4a | +| Hand-written agent code already in cwd | **Brownfield** -- Step 4b | -List available samples using the GitHub API. First resolve the `sample_browse_path` (the browse root) from the selected language, protocol, and framework: +If unsure, default to greenfield. Never guess a manifest URL by hand. -| Selection | Sample Browse Path | -|-----------|--------------------| -| Python + Microsoft Agent Framework + `responses` | `samples/python/hosted-agents/agent-framework/responses/` | -| Python + Microsoft Agent Framework + `invocations` | `samples/python/hosted-agents/agent-framework/invocations/` | -| Python + LangGraph | `samples/python/hosted-agents/bring-your-own/{protocol}/langgraph-chat/` | -| Python + Custom | `samples/python/hosted-agents/bring-your-own/{protocol}/` | -| Python + Custom + `invocations_ws` | `samples/python/hosted-agents/bring-your-own/invocations_ws/` | -| C# + Microsoft Agent Framework + `responses` | `samples/csharp/hosted-agents/agent-framework/` | -| C# + Microsoft Agent Framework + `invocations` | `samples/csharp/hosted-agents/agent-framework/invocations-echo-agent/` | -| C# + Custom | `samples/csharp/hosted-agents/bring-your-own/{protocol}/` | +### Step 4a -- Greenfield: scaffold from a sample -Use the chosen lane to browse the repo under `sample_browse_path`: +List the curated catalog (filter by language if known): +```bash +azd ai agent sample list --featured-only --language python --output json ``` -GET https://api.github.com/repos/microsoft-foundry/foundry-samples/contents/{sample_browse_path} -``` - -If the user has specified what they want the agent to do, choose the most relevant or most simple sample under that lane and record its exact `selected_sample_path`. Only if the user has not given any preferences, present the sample directories under `sample_browse_path` to the user and help them choose based on their requirements (e.g., RAG, tools, multi-agent workflows, HITL). - -If the requested combination does not have a real sample, say so clearly and suggest the nearest supported lane. -> ⚠️ **Tools:** Hosted agents access tools through a **Foundry Toolbox MCP endpoint** — they do NOT wire tools directly. If the user wants an agent with tools (web search, AI search, code interpreter, MCP servers, etc.), select the `toolbox` samples (see [references/use-toolbox-in-hosted-agent.md#code-integration-patterns](references/use-toolbox-in-hosted-agent.md#code-integration-patterns)). These samples include Foundry Toolbox integration in the sample code out of the box, but the user still needs an actual toolbox resource — you'll resolve its endpoint in Step 6 (Verify Startup). +Each entry has a `manifestUrl` and an `initCommand`. Prefer direct code deploy at init time. `--no-prompt` defaults to container deploy unless you pass `--deploy-mode code`, so include the code flags up front. -### Step 4: Download Sample Files +For a generic new hosted agent request, start from the basic sample. Use tool/function-calling samples only when the user explicitly asks for external actions, APIs, tools, connectors, or data lookup. -Download only the selected sample directory — do NOT clone the entire repo. Preserve the directory structure by creating subdirectories as needed. +> **Before running init**, make sure subscription + location are resolvable via one of the four options in [Step 1 preflight](#step-1----verify-the-environment). For headless / scripted flows the recommended path is to **pre-bootstrap with core `azd init`**: +> +> ```bash +> azd init -t Azure-Samples/azd-ai-starter-basic . -e --subscription -l +> ``` +> +> Then run `azd ai agent init` inside the bootstrapped directory. `azd ai agent init` itself has **no** `--subscription` / `--location` flags (passing them fails with `unknown flag`); core `azd init` does. If init still defers resolution (empty `config.deployments[]` / `{{...}}` placeholder), see the recovery paths after the init example below — do **not** blindly re-run init. -Use the exact `selected_sample_path` selected in Step 3. +Python Example (add `--project-id ""` for an existing Foundry project; add `--agent-name ` if the user wants a custom name -- omit otherwise to keep the sample default): -**Using `gh` CLI (preferred if available):** ```bash -gh api repos/microsoft-foundry/foundry-samples/contents/{selected_sample_path} \ - --jq '.[] | select(.type=="file") | .download_url' | while read url; do - filepath="${url##*/{selected_sample_path}/}" - mkdir -p "$(dirname "$filepath")" - curl -sL "$url" -o "$filepath" -done +azd ai agent init --no-prompt \ + -m "" \ + --deploy-mode code \ + --runtime python_3_13 \ + --entry-point main.py ``` -**Using curl (fallback):** -```bash -curl -s "https://api.github.com/repos/microsoft-foundry/foundry-samples/contents/{selected_sample_path}" | \ - jq -r '.[] | select(.type=="file") | .path + "\t" + .download_url' | while IFS=$'\t' read path url; do - relpath="${path#{selected_sample_path}/}" - mkdir -p "$(dirname "$relpath")" - curl -sL "$url" -o "$relpath" - done -``` - -For nested directories, recursively fetch the GitHub contents API for entries where `type == "dir"` and repeat the download for each. - -### Step 5: Customize and Implement - -1. Read the sample's `README.md` and `agent.yaml` or `agent.manifest.yaml` to understand its structure -2. Read the sample code to understand patterns, protocol handling, and dependencies used -3. If using Agent Framework, follow the best practices in [references/agentframework.md](references/agentframework.md) -4. Implement the user's specific requirements on top of the sample -5. Update configuration (`.env`, dependency files, `agent.yaml`, `agent.manifest.yaml`) as needed, and keep the selected protocol consistent across code and config -6. Ensure the project is in a runnable state - -### Step 6: Verify Startup - -1. Install dependencies (use virtual environment for Python) -2. Ask user to provide values for `.env` variables if placeholders were used using `ask_user` tool. - - **If the agent uses tools / toolboxes**: resolve the toolbox endpoint per [references/use-toolbox-in-hosted-agent.md#resolve-toolbox-endpoint](references/use-toolbox-in-hosted-agent.md#resolve-toolbox-endpoint). -3. Run the main entrypoint -4. Fix startup errors and retry if needed -5. Send a protocol-appropriate test request to the correct endpoint: - - `responses` → `POST http://localhost:8088/responses` - - `invocations` → `POST http://localhost:8088/invocations` - - `invocations_ws` → open a WebSocket to `ws://localhost:8088/invocations_ws` (not HTTP POST). The wire format is developer-defined per the sample; see the [invocations-ws skill](../invocations-ws/invocations-ws.md) for the framing model and discovery guidance. -6. Fix any errors from the test request and retry until it succeeds -7. Once startup and test request succeed, stop the server to prevent resource usage - -**Guardrails:** -- ✅ Perform real run to catch startup errors -- ✅ Cleanup after verification (stop server) -- ✅ Ignore auth/connection/timeout errors (expected without Azure config) -- ❌ Don't wait for user input or create test scripts - -## Brownfield Workflow: Convert Existing Agent to Hosted Agent - -Use this workflow when the user has an existing agent project that needs to be made compatible with Foundry hosted agent deployment. The key requirement is wrapping the existing agent with the appropriate hosting adapter. - -### Step B1: Analyze Existing Project - -Scan the project to determine: - -1. **Language** — Python (look for `requirements.txt`, `pyproject.toml`, `*.py`) or C# (look for `*.csproj`, `*.cs`) -2. **Framework** — Identify which agent framework is in use: - -| Indicator | Framework | -|-----------|-----------| -| Imports from `agent_framework` or `Microsoft.Agents.AI` | Microsoft Agent Framework | -| Imports from `langgraph`, `langchain` | LangGraph | -| No recognized framework imports, or other frameworks (e.g., Semantic Kernel, AutoGen, custom code) | Custom | - -3. **Target protocol** — If the user has not specified one, infer whether the project should target `responses`, `invocations`, or `invocations_ws` based on the existing caller contract (HTTP request/response → `responses` or `invocations`; long-lived duplex stream / real-time media → `invocations_ws`) -4. **Entry point** — Identify the main script/entrypoint that creates and runs the agent -5. **Agent object** — Identify the agent instance that needs to be wrapped (e.g., a `BaseAgent` subclass, a compiled `StateGraph`, or an existing server/app) +> `--agent-name` at init names both `agent.yaml name:` and `azure.yaml services::` in one shot; renaming after init requires editing both files. -### Step B2: Add Hosting Adapter Dependency +Do not run `azd env new`, `azd env select`, or `azd env set` before `azd ai agent init` in a new temp/workspace; there is no azd project yet, so those commands fail and waste time. For an existing project, `--project-id` is enough during init. Set endpoint/model values immediately after init, once `azure.yaml` and the azd env exist. -Add the correct adapter package based on framework, language, and protocol. Get the latest version from the package registry — do not hardcode versions. +> Tip: if the manifest declares a `parameters:` block (check by `curl `), collect required values before init when an azd project already exists. In a new empty workspace, prefer a sample without required secrets; there is no azd env to set until init creates the project files. -**Python adapter packages:** +`init` writes `azure.yaml` (or appends to it), `/agent.yaml`, and `/.agentignore` (code-deploy only). A successful direct-code init produces `/agent.yaml` with `code_configuration:`. For file shapes, see [azd-ai-cli](references/azd-ai-cli.md). -| Framework | Package(s) | -|-----------|------------| -| Microsoft Agent Framework | `responses`: `agent-framework-foundry-hosting`; `invocations`: `agent-framework-foundry-hosting` | -| LangGraph | `responses`: `azure-ai-agentserver-responses` + `azure-ai-agentserver-core`; `invocations`: `azure-ai-agentserver-invocations` + `azure-ai-agentserver-core` | -| Custom | `responses`: `azure-ai-agentserver-responses`; `invocations`: `azure-ai-agentserver-invocations` | +#### Model deployments (azd Golden Path) -**.NET adapter packages:** +`azure.yaml services..config.deployments[]` is the **single source of truth** for model deployments in azd-managed Foundry projects. The flow is: -| Framework | Package(s) | -|-----------|------------| -| Microsoft Agent Framework | `responses`: `Microsoft.Agents.AI.Foundry.Hosting`; `invocations`: `Microsoft.Agents.AI.Foundry.Hosting` + `Azure.AI.AgentServer.Invocations` | -| Custom | `responses`: `Azure.AI.AgentServer.Responses`; `invocations`: `Azure.AI.AgentServer.Invocations` | - -Add the package to the project's dependency file (`requirements.txt`, `pyproject.toml`, or `.csproj`). For Python, also add `python-dotenv` if not present. - -### Step B3: Wrap Agent with Hosting Adapter - -Modify the project's main entrypoint to wrap the existing agent with the adapter. The approach differs by framework and protocol: - -**Microsoft Agent Framework + `responses` (Python):** -- Import `ResponsesHostServer` from the adapter package -- Pass the agent instance (from `agent_framework` package) to the adapter -- Call `.run()` on the adapter as the default entrypoint - -**Microsoft Agent Framework + `invocations` (Python):** -- Use `InvocationAgentServerHost()` -- Implement an `@app.invoke_handler` -- Manage session state if the agent needs multi-turn memory - -**Microsoft Agent Framework + `responses` (C#):** -- Register Foundry responses hosting and map the `responses` protocol - -**Microsoft Agent Framework + `invocations` (C#):** -- Register invocations services and an invocation handler -- Map the `invocations` protocol - -**LangGraph:** -- Python only -- Follow the `bring-your-own/{protocol}/langgraph-chat` sample for the selected protocol lane +``` +manifest → azd ai agent init → azure.yaml config.deployments[] → AI_PROJECT_DEPLOYMENTS env (internal) → Bicep → Microsoft.CognitiveServices/accounts/deployments +``` -**Custom:** -- Follow the corresponding `bring-your-own/{protocol}` sample for the selected language -- Prefer the protocol SDK sample for the selected lane instead of inventing a custom contract when a sample already exists +Rules: + +- **`azd ai agent init` writes `config.deployments[]` from the sample's manifest** and also sets `AZURE_AI_MODEL_DEPLOYMENT_NAME` to the first deployment's `name`. `azd provision` then creates the deployment through Bicep. No `az` calls are needed in the Golden Path. +- **`deployments[].name` is the literal Azure deployment resource name** — not a label, not a placeholder. Use a human-readable model name (e.g. `gpt-4o-mini`, `gpt-4.1-mini`). **Never** use the literal string `AZURE_AI_MODEL_DEPLOYMENT_NAME` as the `name` value; doing so creates a deployment literally named `AZURE_AI_MODEL_DEPLOYMENT_NAME` and the agent will 404 on its first invoke. +- **Adding a *second* model (or any change to `config.deployments[]`) to an existing project:** edit `azure.yaml services..config.deployments[]` directly (and update `agent.yaml model_deployment_name:` / `${AZURE_AI_MODEL_DEPLOYMENT_NAME}` if the new entry should become the default), then run `azd provision`. The extension's `preprovision` hook calls `envUpdate` automatically, which re-marshals `azure.yaml deployments[]` and re-writes `AI_PROJECT_DEPLOYMENTS` with the correct double-escaping before Bicep runs. **Do not re-run `azd ai agent init`** for this case — it triggers the non-idempotent collision flow (see anti-patterns) and at best (with explicit "Overwrite existing") re-resolves models from the original manifest rather than merging your edit. +- **`agent.yaml`: prefer `${AZURE_AI_MODEL_DEPLOYMENT_NAME}` over a hardcoded model name.** The `${VAR}` form is resolved from the active azd env at run / deploy time, so a single `azd env set AZURE_AI_MODEL_DEPLOYMENT_NAME ` (or env switch dev → prod) updates `agent.yaml` without touching the file. Init writes this form by default (`init_from_code.go`); only the literal `{{AZURE_AI_MODEL_DEPLOYMENT_NAME}}` (double braces) is a failure marker that means model resolution deferred. +- **Recovery: `config.deployments[]` is empty or `agent.yaml` has the literal `{{AZURE_AI_MODEL_DEPLOYMENT_NAME}}` placeholder.** First get sub + location into the env (see [Step 1 preflight](#step-1----verify-the-environment) options). Then pick **one** of these three paths — init is **not** idempotent: + 1. **Clean re-init (preferred when no user code has been added to `src//` yet):** delete `src//`, remove the `services.:` block from `azure.yaml`, then re-run `azd ai agent init`. No collision, scaffolds cleanly with the resolved model. + 2. **Interactive overwrite:** re-run `azd ai agent init` **without `--no-prompt`**. When the collision prompt appears, **actively arrow-up and select "Overwrite existing"** — the default selection is *not* overwrite (it's "Use a different service name", which produces `-2`). + 3. **Hand-fix in place (preserves any user code in `src//`):** edit `azure.yaml services..config.deployments[]` to add the model block (`name`, `model.{name, format, version}`, `sku.{name, capacity}`), replace the literal `{{AZURE_AI_MODEL_DEPLOYMENT_NAME}}` in `agent.yaml` with `${AZURE_AI_MODEL_DEPLOYMENT_NAME}`, then `azd env set AZURE_AI_MODEL_DEPLOYMENT_NAME `. Run `azd provision`; the `preprovision` hook auto-syncs `AI_PROJECT_DEPLOYMENTS`. +- **Anti-patterns — do not do these:** + - **Blindly re-running `azd ai agent init` against an existing project.** Under `--no-prompt` init silently auto-suffixes (`-2`, then `-3`, ...) via `nextAvailableName`; in interactive mode the collision prompt's default is "Use a different service name". There is **no flag** (`--force` does not apply here) to make `--no-prompt` overwrite. Use one of the three recovery paths above. + - **Reaching for `azd config set defaults.subscription` / `defaults.location` as the *first* fix for the deferral.** This mutates `~/.azure/config.json` for every azd project on the machine. Prefer pre-bootstrap with `azd init -t ... --subscription -l` (per-project) or `--project-id` (existing project) first — see the [Step 1 preflight options](#step-1----verify-the-environment). + - `azd env set AI_PROJECT_DEPLOYMENTS '[...]'` — `AI_PROJECT_DEPLOYMENTS` is internal extension state. The extension writes it with double-escaped JSON (`\\` and `\"`) required by Bicep parameter substitution; `azd env set` only single-escapes and breaks the parse with `invalid character 'n' after object key:value pair`. + - `az cognitiveservices account deployment create ...` against the azd-managed Foundry account — creates the deployment outside the azd lifecycle, so `azd provision` won't manage it and `azd down` won't clean it up. Use `az cognitiveservices` (or [models/deploy-model](../../models/deploy-model/SKILL.md)) **only** for shared/pre-existing Foundry projects that are not managed by this azd project. + - Hand-patching the `{{AZURE_AI_MODEL_DEPLOYMENT_NAME}}` placeholder in `agent.yaml` *without also* adding the matching entry to `azure.yaml services..config.deployments[]` — the agent will reference a deployment name that Bicep never created. Use the [hand-fix recovery path](#step-4a----greenfield-scaffold-from-a-sample) above (path #3) which fixes both files together. + +Check the scaffold before local run: + +1. **Verify `azure.yaml services..config.deployments[]` is non-empty** and that `/agent.yaml` has either a literal `model_deployment_name:` value or the `${AZURE_AI_MODEL_DEPLOYMENT_NAME}` substitution form — **not** the double-brace literal `{{AZURE_AI_MODEL_DEPLOYMENT_NAME}}` (that placeholder is the marker that init deferred model resolution). Also confirm `azure.yaml` has only **one** service entry for your agent — a duplicate `-2` means a previous init re-ran against the existing project (collision prompt default + `--no-prompt` silent auto-suffix; see anti-patterns above). If either condition fails, use one of the three [recovery paths in the anti-patterns section](#model-deployments-azd-golden-path) (clean re-init / interactive overwrite / hand-fix). Do **not** `azd env set AI_PROJECT_DEPLOYMENTS`. +2. If the user supplied an existing project endpoint, project ARM ID, or model deployment name, set them in the active azd env and verify the values. `azd ai agent run` injects azd env values before `.env`, so a stale `AZURE_AI_MODEL_DEPLOYMENT_NAME` can override a correct `.env` file. + ```bash + azd env set AZURE_AI_PROJECT_ENDPOINT "" + azd env set AZURE_AI_PROJECT_ID "" + azd env set AZURE_AI_MODEL_DEPLOYMENT_NAME "" + azd env get-values + ``` +3. Create the agent source `.env` with the same endpoint and model deployment values: + ```env + FOUNDRY_PROJECT_ENDPOINT=https://.services.ai.azure.com/api/projects/ + AZURE_AI_MODEL_DEPLOYMENT_NAME= + ``` +4. Prefer direct code deployment. Inspect `/agent.yaml`; if `code_configuration:` is missing and the agent does not need a custom Dockerfile or system packages, add it before deployment. +5. Prefer `--agent-name` at init time (above). Fallback only: if init already ran without it, rename the agent in `/agent.yaml` AND the matching key under `azure.yaml services:` to the same value, preserving its `project:` path. +6. If you change CPU or memory, keep `/agent.yaml` and `azure.yaml services..config.container.resources` aligned because the `azure.yaml` service config can override the agent file. + +### Step 4b -- Brownfield: lift existing code + +Use ONLY when the workspace already contains hand-written agent source. -**`invocations_ws`:** -- Use the `azure-ai-agentserver-invocations` SDK and register a WebSocket handler with `@app.ws_handler` on the same `InvocationAgentServerHost` -- Follow the [invocations-ws skill](../invocations-ws/invocations-ws.md) for the wire-level contract and `agent_session_id` semantics -- Reference samples live under `samples/python/hosted-agents/bring-your-own/invocations_ws/` +```bash +azd ai agent init --no-prompt \ + --src ./src/my-agent \ + --agent-name my-agent \ + --deploy-mode code \ + --runtime python_3_13 \ + --entry-point app.py +``` -> ⚠️ **Warning:** The adapter MUST be the default entrypoint (no flags required to start). This is required for both local debugging and containerized deployment. +`--runtime` and `--entry-point` are required with `--deploy-mode code --no-prompt`. Runtimes: `python_3_13`, `python_3_14`, `dotnet_10`, `node_22`. `--deploy-mode container` builds from `Dockerfile`. For an existing Foundry project, add `--project-id ""`. -### Step B4: Configure Environment +### Step 5 -- Run locally and iterate -1. Create or update a `.env` file with required environment variables (project endpoint, model deployment name, etc.) - - **If the agent uses tools / toolboxes**: resolve the toolbox endpoint per [references/use-toolbox-in-hosted-agent.md#resolve-toolbox-endpoint](references/use-toolbox-in-hosted-agent.md#resolve-toolbox-endpoint). -2. For Python: ensure the code uses `load_dotenv(override=False)` so Foundry-injected environment variables are available at runtime. -3. If the project uses Azure credentials: ensure Python uses `azure.identity.DefaultAzureCredential` for **local development**. In production, use `ManagedIdentityCredential`. See [auth-best-practices.md](../../references/auth-best-practices.md) +Read and follow [local-run](references/local-run.md). Complete one representative local invocation before deploying. -### Step B5: Create agent.yaml +### Step 6 -- Add tools (optional) -Create an `agent.yaml` file in the project root. This file defines the agent's metadata and deployment configuration for Foundry. Required fields: +Tools attach through **toolboxes** -- bundled MCP-compatible endpoints. Flow: -- `name` — Unique identifier (alphanumeric + hyphens, max 63 chars) -- `description` — What the agent does -- `template.kind` — Must be `hosted` -- `template.protocols` — Must include the selected protocol and matching version from the chosen sample -- `template.environment_variables` — List all environment variables the agent needs at runtime +1. Create the **connection** (`azd ai agent connection create ...`). +2. Create or update the **toolbox** (`azd ai toolbox create` / `connection add`). +3. Set the agent env var (`azd env set TOOLBOX__MCP_ENDPOINT ...`). +4. Reference it in `agent.yaml` `environment_variables[]`. +5. `azd deploy`. -Refer to the chosen sample's `agent.yaml` or `agent.manifest.yaml` in the [foundry-samples repo](https://github.com/microsoft-foundry/foundry-samples/tree/main/samples/python/hosted-agents) for the exact schema. +Full recipes (GitHub MCP, Azure AI Search, A2A, Bing Custom) in [tools](references/tools.md). -### Step B6: Create Dockerfile +### Step 7 -- Hand off to deploy -Create a `Dockerfile` if one doesn't exist. Requirements: +Once local invocation succeeds, tell the user the agent is ready and ask if they want to deploy. Read [deploy/deploy.md](../deploy/deploy.md). -- Base image appropriate for the language (e.g., `python:3.12-slim` for Python, `mcr.microsoft.com/dotnet/sdk` for C#) -- Copy source code into the container -- Install dependencies -- Expose port **8088** (the adapter's default port) -- Set the main entrypoint as the CMD +## Expected env-var fingerprint (post-provision) -> ⚠️ **Warning:** When building, MUST use `--platform linux/amd64`. Hosted agents run on Linux AMD64 infrastructure. Images built for other architectures (e.g., ARM64 on Apple Silicon) will fail. +After `azd provision` completes for an `azd ai agent`-scaffolded project (default Basic Agent Setup), `azd env get-values` should show this canonical state. Verify before debugging deployment or runtime issues. -Refer to the chosen sample's `Dockerfile` in the [foundry-samples repo](https://github.com/microsoft-foundry/foundry-samples/tree/main/samples/python/hosted-agents) for the exact pattern. +| Variable | Expected value | Notes | +|----------|----------------|-------| +| `ENABLE_HOSTED_AGENTS` | `true` | Set automatically by `azd ai agent init`. | +| `ENABLE_CAPABILITY_HOST` | `false` | Set automatically by `azd ai agent init`. Leave as-is unless you are intentionally targeting Standard Agent Setup. | +| `FOUNDRY_PROJECT_ENDPOINT` | `https://.services.ai.azure.com/api/projects/` | Populated by provision (or pre-set if reusing an existing project). | +| `AZURE_AI_PROJECT_ID` | Full ARM resource ID of the Foundry project | Populated by provision; required for deploy. | +| `AZURE_AI_MODEL_DEPLOYMENT_NAME` | Model deployment name (e.g. `gpt-4o`) | Set automatically by `azd ai agent init` from the first entry in `azure.yaml services..config.deployments[]`. Required for local run and deploy. | +| `AI_PROJECT_DEPLOYMENTS` | escaped JSON array, e.g. `[{\"name\":\"gpt-4o\",...}]` | **Internal extension state.** Managed by `azd ai agent init` from `azure.yaml services..config.deployments[]`. Carries deployments into the Bicep parameter `aiProjectDeploymentsJson`. **Never** set with `azd env set` — manual edits single-escape the JSON and break Bicep `json()` parsing. | +| `AI_AGENT_PENDING_PROVISION` | *(empty / unset)* | Non-empty means provision is still mid-flight; do not deploy. | -### Step B7: Test Locally +`Microsoft.CognitiveServices/accounts/capabilityHosts/agents` is **not** provisioned by `azd ai agent init` (Basic Agent Setup). Its absence is expected. The resource only appears under Standard Agent Setup, which is documented separately in [references/standard-agent-setup.md](../../references/standard-agent-setup.md). -1. Install dependencies (use virtual environment for Python) -2. Run the main entrypoint — the adapter should start an HTTP server on `localhost:8088` -3. Send a protocol-appropriate test request: - - `responses` → `POST /responses` - - `invocations` → `POST /invocations` - - `invocations_ws` → open a WebSocket to `ws://localhost:8088/invocations_ws` (see the [invocations-ws skill](../invocations-ws/invocations-ws.md) for framing) -4. Verify the response follows the expected protocol shape for the selected lane -5. Fix any errors and retry until the test request succeeds -6. Stop the server +Both `ENABLE_HOSTED_AGENTS` and `ENABLE_CAPABILITY_HOST` are set automatically by `azd ai agent init` — you do not need to manage them. If you ever set them manually outside this flow, see [project/create/create-foundry-project.md](../../project/create/create-foundry-project.md#step-3-create-directory-and-initialize) for the manual-flag procedure. -> 💡 **Tip:** If auth/connection errors occur for Azure services, that's expected without real Azure credentials configured. The key validation is that the HTTP server starts and accepts requests. +See the canonical env-var registry: [azure-dev/cli/azd/docs/environment-variables.md](https://github.com/Azure/azure-dev/blob/main/cli/azd/docs/environment-variables.md). ## Common Guidelines -IMPORTANT: YOU MUST FOLLOW THESE. - -Apply these to both greenfield and brownfield projects: - -1. **Sample-first** — Start from a real sample in the current `foundry-samples` repo. Do not invent unsupported combinations, paths, or protocol behavior. - -2. **Protocol consistency** — Keep the selected protocol consistent across sample choice, code, config, and verification steps. - -3. **Logging** — Implement proper logging using the language's standard logging framework (Python `logging` module, .NET `ILogger`). Hosted agents stream container stdout/stderr logs to Foundry, so all log output is visible via the troubleshoot workflow. Use structured log levels (INFO, WARNING, ERROR) and include context like request IDs and agent names. - -4. **Framework-specific best practices** — When using Microsoft Agent Framework, read the [Agent Framework best practices](references/agentframework.md) for hosting adapter setup, credential patterns, and debugging guidance. - -5. **Deploy handoff** — After the agent has been created and local verification succeeds, explicitly tell the user that they can deploy the agent if they want, and ask them to say `deploy agent to foundry` to continue with the deploy sub-skill. - -6. **Tool integration** — Hosted agents access tools through [Foundry Toolbox](references/use-toolbox-in-hosted-agent.md), NOT by wiring tools directly. If the user needs tools (web search, AI search, code execution, file search, MCP servers, etc.), follow the toolbox integration guide. The toolbox provides a single MCP-compatible endpoint that handles credential injection and tool discovery. - -7. **Reserved environment variables** — The Foundry platform injects environment variables into every hosted agent container at startup. You MUST NOT generate, suggest, or configure any of these in `.env` files, `agent.yaml` `environment_variables`, or application code: - - **Blocked prefixes** (any variable starting with these is reserved): - - `FOUNDRY_*` — platform-injected identity, session, project, and toolset variables - - `AGENT_*` — reserved for platform use - - **Exact reserved names** (platform-managed, overwritten at runtime): - - `PORT` — HTTP listen port (default `8088`) - - `HOME` — session filesystem path (`/home/session`) - - `SSE_KEEPALIVE_INTERVAL` — SSE keep-alive config - - `APPLICATIONINSIGHTS_CONNECTION_STRING` — observability - - `OTEL_EXPORTER_OTLP_ENDPOINT` — OTLP collector endpoint - - **Key `FOUNDRY_*` variables available at runtime** (read-only, do not set): - - `FOUNDRY_PROJECT_ENDPOINT` — project endpoint URL for calling Azure services - - `FOUNDRY_AGENT_NAME` — the deployed agent's name - - `FOUNDRY_AGENT_VERSION` — the deployed agent's version - - `FOUNDRY_TOOLBOX_ENDPOINT` — MCP-compatible toolbox endpoint (if toolbox is configured) - - If user code needs to read these values at runtime (e.g., `FOUNDRY_PROJECT_ENDPOINT` to call Azure services), read them from the environment — do not set or override them. - -## Coding Tips - -Use these when generating or modifying project code: - -1. **Create a `.gitignore` file** — After generating code, create a `.gitignore` file if one does not already exist. If one already exists, update it as needed. - - Choose the ignore entries based on the language, framework, and files generated. - - Do not leave the project with no ignored files. - - For Python projects, `.venv/` MUST be ignored at a minimum. +1. **Sample-first** -- always get `manifestUrl` from `azd ai agent sample list`. +2. **Prefer azd over az** -- fall back to `az` only as a last resort, with explicit consent. +3. **Don't auto-login** -- `azd auth login` opens a browser; ask the user. +4. **JSON output** -- add `--output json` only to read-only `azd ai agent` commands such as `show`. Do not add it to `azd ai agent invoke`; invoke supports `default` and `raw`, not `json`. +5. **Two files** -- `agent.yaml` is the agent; `azure.yaml services..config` is service config. See [azd-ai-cli](references/azd-ai-cli.md). +6. **Reserved env vars** -- `FOUNDRY_*` and `AGENT_*` are platform-injected at runtime; `AI_PROJECT_DEPLOYMENTS`, `AI_PROJECT_RESOURCES`, and `AI_PROJECT_TOOL_CONNECTIONS` are extension-managed transport for Bicep. Never set any of these with `azd env set` — edit `azure.yaml services..config` and re-run `azd ai agent init`. ## Non-Interactive / YOLO Mode -When running in non-interactive mode (e.g., YOLO mode), skip selection prompts and use these defaults unless the user has already specified otherwise: - -- **Language** — `Python` -- **Protocol** — `responses` -- **Framework** — `Microsoft Agent Framework` - -If the user's request clearly requires another supported lane, use that lane instead of forcing the defaults. +Defaults when unspecified: greenfield + Python + `azd ai agent sample list --featured-only --language python`, choose the simplest recommended sample that matches the request, plus `--no-prompt` on every write. If creating a new project and the user did not provide a project name, auto-generate one using the pattern `ai-project-` (6-8 lowercase alphanumeric characters). Show the generated name to the user but do not block on confirmation. If using an existing project, ensure `azd ai agent init` receives `--project-id`: use the supplied ARM ID, or run the Step 2 resolve script for the supplied Foundry project endpoint and pass the returned `id`. Stop and ask only when neither an ARM ID nor a resolvable endpoint is available. If the manifest declares secret parameters, collect them with `ask_user` and set them via `azd env set PARAM_...` before init -- keep `--no-prompt` (do not fall into azd's interactive prompts). ## Error Handling -| Error | Cause | Resolution | -|-------|-------|------------| -| GitHub API rate limit | Too many requests | Authenticate with `gh auth login` | -| `gh` not available | CLI not installed | Use curl REST API fallback | -| Sample not found | Path changed in repo or selected lane has no matching sample | List the selected parent directory again and choose a current sample | -| Requested combination not supported | Example: C# + LangGraph | Explain the gap and switch to the nearest supported lane | -| Protocol mismatch | Code, `agent.yaml`, and test request are not aligned | Make all three match the selected protocol | -| Dependency install fails | Version conflicts | Use versions from the selected sample's own dependency file | +| Error | Fix | +|-------|-----| +| `extension not installed` | `azd extension install azure.ai.agents` | +| `not_logged_in` / `login_expired` | Ask user to run `azd auth login` | +| `unknown flag: --subscription` / `--location` on `azd ai agent init` | Wrong command — those flags live on **core** `azd init`. See [Step 1 preflight](#step-1----verify-the-environment) for the four options. | +| `no project exists; to create a new project, run azd init` on `azd env set` | The azd env does not exist yet — `azd env set` cannot create it. See [Step 1 preflight](#step-1----verify-the-environment). | +| `agent.yaml` contains literal `{{AZURE_AI_MODEL_DEPLOYMENT_NAME}}` placeholder after init | Init deferred model resolution. **Do not blindly re-run init** (default prompt = `-2`; `--no-prompt` silently auto-suffixes). Pick one of the three [recovery paths](#model-deployments-azd-golden-path): clean re-init after deleting `src//`, interactive overwrite, or hand-fix `azure.yaml` + replace `{{...}}` with `${AZURE_AI_MODEL_DEPLOYMENT_NAME}` and `azd env set AZURE_AI_MODEL_DEPLOYMENT_NAME `, then `azd provision`. | +| `azure.yaml` has duplicate `-2` entry after re-running init | Init is not idempotent: interactive default is "Use a different service name" and `--no-prompt` silently appends `-2`. To recover, merge the resolved `deployments:` block from `-2` into the original service, delete the `-2` entry from `azure.yaml`, remove `src/-2/`, then `azd provision`. | +| `invalid character 'n' after object key:value pair` during `azd provision` | You used `azd env set AI_PROJECT_DEPLOYMENTS '[...]'` (single-escaped JSON breaks Bicep `json()`). Clear it (`azd env set AI_PROJECT_DEPLOYMENTS ""`), declare the deployment in `azure.yaml services..config.deployments[]` instead, then re-run `azd provision` (its `preprovision` hook re-syncs `AI_PROJECT_DEPLOYMENTS` with the correct double-escaping). | +| `missing_project_endpoint` | Run `azd provision`, or `azd env set AZURE_AI_PROJECT_ENDPOINT ` | +| `project_not_found` | cwd has no `azure.yaml`; move to project root or run init | +| Secret parameter prompt under `--no-prompt` | In an empty workspace, choose a simpler sample without secret parameters. In an existing azd project, set `PARAM__` with `azd env set` before init; keep `--no-prompt`. | +| `cannot use --version with --local` | Drop `--version`, or drop `--local` to hit the deployed agent | +| `could not detect project type` | Set `startupCommand` in `azure.yaml` or pass `--start-command` | +| Local run issue | Follow [local-run](references/local-run.md) common failures | + +Run `azd ai agent doctor --output json` to surface failing checks with `suggestion` fields. + +## Next Steps + +- Deploy to Foundry -> [deploy/deploy.md](../deploy/deploy.md) +- Add tools -> [tools](references/tools.md) +- Invoke the deployed agent -> [invoke/invoke.md](../invoke/invoke.md) +- Evaluate / optimize -> [observe/observe.md](../observe/observe.md) +- Diagnose failures -> [troubleshoot/troubleshoot.md](../troubleshoot/troubleshoot.md) diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/quick-start-hosted.md b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/quick-start-hosted.md new file mode 100644 index 00000000..3ca7c40b --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/quick-start-hosted.md @@ -0,0 +1,320 @@ +# Quick Start: Hosted Foundry Agent + +Opinionated happy-path for first-time users creating their first hosted Foundry agent. Safe defaults, minimal decisions. + +> **Scope:** Defaults below are applied automatically when the user is silent. The user may override the language or sample explicitly; new-vs-existing Foundry project is handled inline. For anything not covered here, stop and read [create-hosted.md](create-hosted.md). + +## When to Use This Skill + +Use this when the request is to create a new hosted Foundry agent end-to-end — scaffold, provision, deploy, and smoke-test. Common overrides (language, region, sample, topic, existing project, existing model) are fine; bounce to [create-hosted.md](create-hosted.md) for anything else. + +## Quick Reference + +| Property | Default (when user is silent) | Override | +|----------|-------------------------------|----------| +| Language / runtime | Python 3.13 (`python_3_13`) | Any of `python_3_13`, `python_3_14`, `dotnet_10`, `node_22` | +| Sample | Featured basic starter for the chosen language (`azd ai agent sample list --featured-only --language --output json`) | User may name a different featured sample | +| Subscription | `az account show` | User may supply | +| Region | `northcentralus` | Ask user to confirm or pick another | +| Foundry project | New (no `--project-id`) | If user supplies ARM ID or endpoint, pass `--project-id` | +| Model deployment | Whatever the sample's manifest declares | If user supplies a deployment name, `azd env set AZURE_AI_MODEL_DEPLOYMENT_NAME` after init | +| Deploy mode | `code` (no Docker, no ACR build) | — | +| Stops at | Deployed agent + remote smoke invoke + eval generation submitted | — | + +## Workflow + +Walk through every step in order. **Before Step 2**, scan the user's original prompt for any of these values: project name, language, subscription, region, existing Foundry project endpoint or ARM ID, existing model deployment name, agent topic/purpose. **Do not ask** for anything already supplied. + +### Step 1 — Verify the environment + +Run the bundled script: + +```bash +./scripts/verify-environment.sh # macOS / Linux +./scripts/verify-environment.ps1 # Windows (pwsh) +``` + +Act on prefixes: `[OK]` continue; `[WARN]` continue; `[ACTION]` resolve first. + +- Missing `azure.ai.agents` extension → `azd extension install azure.ai.agents`. +- Not logged in → STOP. Ask the user to run `azd auth login`. Never run `azd auth login` yourself. + +### Step 2 — Collect remaining inputs (one batch) + +For any values **not** already in the prompt, ask the rest in a single `AskUserQuestion` round: + +| Value | Default | Notes | +|-------|---------|-------| +| Project / agent name | `ai-agent-` (6 lowercase alphanumeric chars) | Used as agent name, service key, and project directory. | +| Language | `python_3_13` | One of `python_3_13`, `python_3_14`, `dotnet_10`, `node_22`. | +| Subscription | `az account show --query id -o tsv` | Must be a GUID. | +| Region | `northcentralus` | Confirm or override. | +| Existing Foundry project? | No (new) | If Yes: collect ARM resource ID *or* Foundry project endpoint URL. | +| Existing model deployment? | No (use sample manifest's model) | If Yes: collect the deployment name. | + +If the user supplied only a **Foundry project endpoint** (not an ARM ID), resolve the ARM ID before Step 6: + +```bash +./scripts/resolve-project-id.sh --endpoint "" # macOS / Linux +./scripts/resolve-project-id.ps1 -Endpoint "" # Windows (pwsh) +``` + +Use the returned `id` value. Never guess or construct the ARM ID from the endpoint. + +### Step 3 — Pick the sample + +```bash +azd ai agent sample list --featured-only --language --output json +``` + +> `--language` here takes the short form (`python`, `dotnetCsharp`) — not the runtime token (`python_3_13` fails with `unknown language`). The runtime tokens are only used in Step 6's `azd ai agent init --runtime ...`. + +Pick the basic starter (e.g. `azd-ai-starter-basic` for Python — avoid samples with `parameters:` blocks requiring secrets). Capture the `manifestUrl`. + +Step 6 needs `--runtime` and `--entry-point` values. These are CLI args, **not** fields in the manifest — use these standard defaults for the chosen language: + +| Language | `--runtime` | `--entry-point` | +|----------|-------------|-----------------| +| Python | `python_3_13` | `main.py` | +| .NET | `dotnet_10` | `MyAgent.dll` | +| Node | `node_22` | `index.js` | + +### Step 4 — Create the project directory + +```bash +mkdir +cd +``` + +### Step 5 — Pre-bootstrap with core `azd init` + +This step writes `AZURE_SUBSCRIPTION_ID` + `AZURE_LOCATION` into the azd env *before* `azd ai agent init` runs, which prevents init from deferring model resolution and leaving the `{{AZURE_AI_MODEL_DEPLOYMENT_NAME}}` placeholder in `agent.yaml`. + +```bash +azd init -t Azure-Samples/azd-ai-starter-basic . \ + -e - \ + --subscription \ + -l \ + --no-prompt +``` + +Use env name `-` as the **default** to avoid collisions with stuck "Deleting"-state resource groups from prior runs. Use bare `` only when you're confident the name has never been used in this subscription. + +### Step 6 — Scaffold the agent + +```bash +azd ai agent init --no-prompt \ + -m "" \ + --deploy-mode code \ + --runtime python_3_13 \ + --entry-point main.py \ + --agent-name +``` + +Values you **must** substitute from Step 3 — do not pass placeholders or guesses: + +- `--runtime`: exactly one of `python_3_13`, `python_3_14`, `dotnet_10` (the bare value `python` fails with `--runtime must be one of: python_3_13, python_3_14, dotnet_10`). +- `--entry-point`: the file name from the manifest's `code_configuration.entry_point` (e.g. `main.py`, not `app.py` — a wrong value scaffolds correctly but breaks local run and deploy). + +If using an existing Foundry project, add `--project-id ""`. + +⏳ May take time — init resolves the model catalog server-side. Wait for the prompt to return; do not interrupt. + +`init` writes `azure.yaml` (appending the service), `src//agent.yaml`, `src//.agentignore`, and the sample source files under `src//`. + +### Step 7 — Customize the scaffolded sample (per user's original intent) + +The scaffold is a generic working sample. Edit only what the user's original prompt asked for — touch tools, dependencies, or model config only when the user explicitly asked for external actions, APIs, tools, connectors, data lookup, or a specific model. + +Typical changes: + +- `src//agent.yaml` — update `description:` to match the user's intent (this also feeds Step 14 eval generation). +- `src//` — update the system prompt / instructions to match the user's intent. + +Only when the user explicitly asked for it: + +- Add or modify tool / function-calling code in ``. +- Add dependencies to `pyproject.toml` / `requirements.txt` (Python), `*.csproj` (.NET), or `package.json` (Node). +- Change the model in `azure.yaml services..config.deployments[]` before Step 9 provision. + +If the user's original prompt was generic (no specific agent purpose described), skip customization and ship the sample as-is. + +### Step 8 — Sanity-check the scaffold + +Verify all four before continuing. If any check fails, pick **one** recovery path, then re-verify: + +| Check | Expected | If failed | +|-------|----------|-----------| +| `azure.yaml services..config.deployments[]` | Non-empty array with `name`, `model.{name,format,version}`, `sku.{name,capacity}` | Model resolution deferred — use recovery | +| `src//agent.yaml` `model_deployment_name:` | Literal name **or** `${AZURE_AI_MODEL_DEPLOYMENT_NAME}` substitution | If literal `{{AZURE_AI_MODEL_DEPLOYMENT_NAME}}` (double braces): use recovery | +| `src//agent.yaml` `code_configuration.entry_point:` | Matches a real file in `src//` (e.g. `main.py` and `main.py` exists) | If mismatch (e.g. `entry_point: app.py` but only `main.py` exists): edit `agent.yaml` to the real filename, then re-verify. Most often caused by passing a wrong `--entry-point` in Step 6. | +| `azure.yaml services:` keys | Only one `` entry | If `-2` exists: init was re-run; use recovery | + +**Recovery paths** (pick based on whether Step 7 has already customized `src//`): + +1. **Hand-fix in place** *(use when Step 7 customization is already done — preserves user code)* — edit `azure.yaml services..config.deployments[]` to add the model block, replace `{{AZURE_AI_MODEL_DEPLOYMENT_NAME}}` in `agent.yaml` with `${AZURE_AI_MODEL_DEPLOYMENT_NAME}`, then `azd env set AZURE_AI_MODEL_DEPLOYMENT_NAME `. +2. **Clean re-init** *(use only when Step 7 has not run yet — destructive: deletes `src//`)* — delete `src//`, remove the `services.:` block from `azure.yaml`, re-run Step 6. +3. **Interactive overwrite** *(loses Step 7 edits — re-resolves the model from the original manifest)* — re-run Step 6 *without* `--no-prompt`. When the collision prompt appears, **arrow-up to "Overwrite existing"** (default is *not* overwrite). + +Never `azd env set AI_PROJECT_DEPLOYMENTS '[...]'` (single-escaped JSON breaks Bicep parse). Never `az cognitiveservices account deployment create` against this account (creates the deployment outside the azd lifecycle). + +If recovery still fails → escape to [create-hosted.md](create-hosted.md). + +### Step 9 — Provision Azure resources + +```bash +azd provision --no-prompt +``` + +⏳ May take time — creates the resource group, Foundry account + project, model deployment, App Insights, Log Analytics. Wait for the prompt to return; do not interrupt. + +### Step 10 — Wire local env vars + +```bash +azd env get-values +``` + +Capture `FOUNDRY_PROJECT_ENDPOINT` and `AZURE_AI_MODEL_DEPLOYMENT_NAME`. Write `src//.env`: + +```env +FOUNDRY_PROJECT_ENDPOINT=https://.services.ai.azure.com/api/projects/ +AZURE_AI_MODEL_DEPLOYMENT_NAME= +``` + +Also mirror them into the azd env (so `azd ai agent run` injects the right values — it reads azd env *before* `.env`): + +```bash +azd env set AZURE_AI_PROJECT_ENDPOINT "" +azd env set AZURE_AI_MODEL_DEPLOYMENT_NAME "" +``` + +### Step 11 — Local smoke test + +Set up a venv with `uv` installed first. `azd ai agent run` installs Python dependencies on first start; with an activated venv that has `uv` available, it uses `uv` (seconds) instead of plain `pip` (minutes). + +> **Important:** the venv must live in `src//` (next to `requirements.txt`). `azd ai agent run` resolves the venv relative to the service source directory; a venv at the project root is ignored and azd silently creates a second one without `uv`, wasting the speedup. + +**Python:** +```bash +cd src/ +python -m venv .venv +# Activate the venv — pick the line for your shell: +.\.venv\Scripts\Activate.ps1 # Windows pwsh +source .venv/bin/activate # macOS / Linux +python -m pip install uv +cd - # back to project root for the azd commands below +``` + +**.NET / Node:** no pre-install step — `azd ai agent run` runs `dotnet restore` / `npm install` itself on first start. + +Run the agent locally. For Python, do this **with the service-dir venv still activated** — activation is what lets `azd ai agent run` find `uv` for the fast dependency install. `azd ai agent run` **is** the local server — a foreground process holding port 8088 that must stay alive from start, through every `invoke --local`, until you explicitly stop it. + +Start it in a **managed** background session your shell tool can poll and stop (most tools detect a long-running foreground process and return a session/shell id — use that id). Do **not** use job operators (`bash &`, `nohup`, `start /B`, popped windows): on Linux/macOS the child gets `SIGHUP` and **dies when its parent bash exits**, so the next command sees `could not connect` even though `ss` from inside the *same* bash just showed `:8088` bound. + +> ⚠️ **Readiness gate — do not skip.** After starting `azd ai agent run`, **watch the server log for the ready line, something like `Running` (e.g. `Running on http://0.0.0.0:8088`) — not just `Starting …`**, which azd prints as a banner before the Python process has bound the socket. Invoking before the socket is bound fails with `could not connect`. +> - **Poll the log every ~15 seconds**, fire the invoke as soon as the ready line appears. Do **not** wait long blocks (60s+). +> - **Don't substitute log polling** with `sleep N && curl`, `netstat` / `ss` / `lsof`, or `ps aux` probes — only the log tells you readiness. +> - **If `invoke --local` fails,** re-read the server log. Error before the ready line (missing env var, auth, port in use) → fix the cause and restart `azd ai agent run` in the managed session. Ready line present but request still fails → the issue is in the request, not the server. Either way, do **not** bypass with `python main.py` or raw `curl POST /responses` — those skip the wiring the deployed agent uses. +> - **If `invoke --local` returns `could not connect` after you saw the ready line in a previous shell,** the server died when that shell exited (classic `&` symptom). Restart in the managed session — do not retry with another `&`. + +```bash +azd ai agent run --no-inspector +``` + +Smoke-invoke (local): + +```bash +azd ai agent invoke --local "" +``` + +Stop the local server via the managed session's stop primitive before continuing — a lingering process holds files in the project and breaks later cleanup. + +### Step 12 — Deploy + +```bash +azd deploy --no-prompt +``` + +⏳ May take time — zips `src//` (respecting `.agentignore`), uploads to Foundry, builds runtime remotely, registers agent version. Wait for the prompt to return; do not interrupt. + +### Step 13 — Verify + remote smoke + +```bash +azd ai agent show --output json +``` + +Expect `"status": "active"` (or `"deployed"`) and an `agent_endpoints` map. + +Remote invoke (billed): + +```bash +azd ai agent invoke "" +``` + +`azd ai agent invoke` has **no `--force` flag**. If the command succeeds, read the response. If it surfaces a confirmation prompt or message, summarize the cost implication for the user (*"this will call the deployed agent and incur model usage charges"*), get explicit consent, and re-run — do **not** invent flags. + +### Step 14 — Submit eval suite generation (async, fire-and-forget) + +> ⚠️ **Pre-summary gate.** Do not write the Step 15 final summary until this step has been submitted. The eval suite is part of the deployment artifact; skipping it ships an incomplete result. + +Read the `description:` from `src//agent.yaml` (the value you set in Step 7) and pass it as `--gen-instruction`: + +```bash +azd ai agent eval generate --gen-instruction "" --no-wait --no-prompt +``` + +Expected output: + +``` +Eval generate submitted (async) + dataset generation: datagen- (queued) + evaluator generation: evaluatorgen- (in_progress) + Config written to: src//eval.yaml + When ready, run: + azd ai agent eval run +``` + +Generation runs server-side and takes several minutes. Tell the user: + +> *"Eval suite generation submitted. Run `azd ai agent eval run` whenever you're ready — it'll wait for generation to finish and execute the eval in one step."* + +### Step 15 — Final summary + +Produce a concise summary covering: agent name/version/status/endpoints, a Playground link, the resources created, and the three follow-up commands below. Construct the Playground URL from `azd env get-values` (or read `playground_url` directly from `azd ai agent show --output json` if present): + +``` +https://ai.azure.com/nextgen/r/{encodedSubId},{resourceGroup},,{accountName},{projectName}/build/agents/{agentName}/build?version={agentVersion} +``` + +`encodedSubId` = URL-safe base64 of the subscription GUID, padding stripped: + +```bash +python -c "import base64,uuid;print(base64.urlsafe_b64encode(uuid.UUID('').bytes).rstrip(b'=').decode())" +``` + +Three follow-up commands to include: + +```bash +azd ai agent invoke "" # chat with the deployed agent (billed) +azd ai agent eval run # finalize + run the eval suite (Step 14) +azd down # tear down all resources when done +``` + +## Error Handling + +| Symptom | Fix | +|---------|-----| +| `azd ai agent init` fails with `--runtime must be one of: python_3_13, python_3_14, dotnet_10` | You passed a bare value like `python`. Use the full runtime token (e.g. `python_3_13`). | +| `azd ai agent init` fails with `--entry-point is required when using --deploy-mode code with --no-prompt` | Pass `--entry-point ` matching the manifest's `code_configuration.entry_point` from Step 3. | +| `agent.yaml` `entry_point` doesn't match any file in `src//` | You guessed the entry-point in Step 6. Edit `agent.yaml` to the real filename (verify with `ls src//`). No re-init needed. | +| `azd deploy` postdeploy hook fails with missing `AZURE_TENANT_ID` | Run `az account show --query tenantId -o tsv` and `azd env set AZURE_TENANT_ID `, then re-run `azd deploy --no-prompt`. The deployed agent version from the first deploy is still valid; the postdeploy hook just registers env vars. | +| Scaffold sanity check fails (Step 8) | Pick a recovery path from Step 8. If still failing → [create-hosted.md](create-hosted.md). | +| Local invoke returns model `404` / wrong deployment | Stale `AZURE_AI_MODEL_DEPLOYMENT_NAME` in azd env overrides `.env`. Re-run Step 10 to sync both. | +| `azd ai agent invoke ... --force` returns `unknown flag: --force` | `--force` is not a valid flag for invoke. Re-run without it. | +| Anything else | Escape to [create-hosted.md](create-hosted.md). | + +## Escape Hatch + +If any step fails in a way not covered above, the output looks unexpected, or the user's request drifts outside what this quickstart covers → **stop improvising**. Read [create-hosted.md](create-hosted.md) and follow its full workflow. + + diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/azd-ai-cli.md b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/azd-ai-cli.md new file mode 100644 index 00000000..a7283fac --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/azd-ai-cli.md @@ -0,0 +1,149 @@ +# azd ai CLI Reference + +Core mental model for the `azd ai agent` extension. Use this when you need to understand command surface, file layout, or where a given setting lives. + +## CLI surface + +```bash +azd ai project show # which Foundry project endpoint is active +azd ai agent show # is the agent deployed? what version? +azd ai agent doctor # full health check, suggests fixes + +azd ai agent sample list # curated catalog -- pick a manifestUrl +azd ai agent init -m # scaffold from a sample +azd ai agent init --from-code # scaffold from existing source + +azd ai agent run # start the agent on localhost:8088 +azd ai agent invoke "" # remote invoke (billed; gated) +azd ai agent invoke --local "" # local invoke (no billing) + +azd provision # core azd; creates Foundry project + infra +azd deploy # core azd; packages + registers new agent version +azd ai agent endpoint update # patch agentEndpoint / agentCard in place + +azd ai agent connection list / show / create / update / delete +azd ai toolbox list / show / create / update / delete +azd ai toolbox connection add / remove / list +azd ai toolbox version list + +azd ai agent files list / show / upload / download / delete / stat / mkdir +azd ai agent sessions list / show / create / update / delete +azd ai agent monitor # per-session log stream (SSE) + +azd ai agent eval generate / run / show / update / list +azd ai agent optimize / optimize status / optimize apply / optimize deploy / optimize cancel +``` + +Read-only commands accept `--output json` and never require `--force`. Write commands are gated by a confirmation envelope (see "Confirmation envelope" below). + +## Two files, two schemas + +After `azd ai agent init`, every hosted agent is defined by **two** files plus the active azd env. Putting a field in the wrong file is the most common scaffolding failure. + +| File | What it holds | +|------|---------------| +| `/agent.yaml` | The flat `ContainerAgent`: `kind`, `name`, `protocols`, `environment_variables`, `agentEndpoint`, `agentCard`, `code_configuration` / `image`, container `resources` (cpu, memory). | +| `azure.yaml services..config` | Model deployments, project connections, toolboxes, tool resources, container settings, `startupCommand`. | +| `.azure//.env` (`azd env set`) | Secrets and `PARAM__` credential values referenced from `azure.yaml`. | + +`azd deploy` reads `agent.yaml` and creates a new immutable agent version. `azd provision` reads `config.deployments[]` and `config.connections[]` and applies them via Bicep. + +`agent.manifest.yaml` (the file passed to `-m`) is the seed format -- it is NOT on disk after init. Init splits its `parameters:` / `resources:` blocks across the three files above. Don't reintroduce the `template:` wrapper into `agent.yaml`. + +### Minimal `agent.yaml` (hosted) + +```yaml +# yaml-language-server: $schema=https://raw.githubusercontent.com/microsoft/AgentSchema/refs/heads/main/schemas/v1.0/ContainerAgent.yaml +kind: hosted +name: my-agent +protocols: + - protocol: responses + version: "1.0.0" +resources: + cpu: "0.25" + memory: "0.5Gi" +environment_variables: + - name: AZURE_AI_MODEL_DEPLOYMENT_NAME + value: ${AZURE_AI_MODEL_DEPLOYMENT_NAME} +code_configuration: + runtime: python_3_13 + entry_point: app.py + dependency_resolution: remote_build # or "bundled" +``` + +- `protocols` -- `responses` (OpenAI), `invocations` (A2A). Editing requires `azd deploy`. +- `resources` -- valid tiers: `0.25/0.5Gi`, `1/2Gi`, `2/4Gi`. +- `environment_variables` -- `${VAR}` resolves from the active azd env. Not for secrets. +- `code_configuration` present -> direct code deploy (ZIP, Foundry builds). Absent -> container/ACR deploy (Dockerfile + `docker:` in `azure.yaml`). `image:` skips the Dockerfile build. +- In non-interactive mode, `azd ai agent init` defaults to container deploy. Pass `--deploy-mode code --runtime --entry-point ` during init to get `code_configuration`. +- `agentEndpoint` / `agentCard` -- patch in place with `azd ai agent endpoint update` (no new version). + +### Minimal `azure.yaml` service config + +```yaml +services: + my-agent: + project: ./src/my-agent + host: azure.ai.agent + language: python + config: + startupCommand: "python -m main" + container: + resources: + cpu: "0.5" + memory: "1Gi" + deployments: + - name: AZURE_AI_MODEL_DEPLOYMENT_NAME + model: + name: gpt-4.1-mini + format: OpenAI + version: "2024-04-09" + sku: + name: GlobalStandard + capacity: 50 + connections: [...] # see tools.md + toolboxes: [...] # see tools.md +``` + +- `startupCommand` -- what `azd ai agent run` executes locally. Auto-detected at init. +- `config.container.resources` -- deployment-time CPU/memory. Keep this aligned with `agent.yaml resources`; this value can override the agent file. +- `deployments[]` -- model deployments provisioned via Bicep. `name` is the env var the agent reads. +- `connections[]` -- project connections provisioned via Bicep. Use `PARAM__` env-var references for secrets. +- `toolboxes[]` -- declarative record of intent; today you still drive the toolbox CLI to materialize them on Foundry. See [tools](tools.md). + +## State (azd env vars) + +| Variable | Read by | Where to set | +|----------|---------|--------------| +| `AZURE_AI_PROJECT_ENDPOINT` | Every `azd ai agent` command | `azd env set` or `azd ai project show` | +| `AZURE_AI_PROJECT_ID` | `azd ai agent show` (playground URL) | `azd env set` | +| `AZURE_SUBSCRIPTION_ID`, `AZURE_LOCATION` | `azd provision` | `azd config get defaults` | +| `AGENT__NAME` / `_VERSION` / `__ENDPOINT` | Auto-written by deploy | Auto | +| `PARAM__` | Connection credentials in `azure.yaml` | `azd env set` | + +Manage with `azd env get-values`, `azd env set`, `azd env list`, `azd env new`, `azd env select`. + +The platform also injects `FOUNDRY_*` and `AGENT_*` into the running container at runtime. **Never** put these in the agent.yaml environment_variables section. + +## Resolving subscription / location + +`azd ai project show` returns only the Foundry project endpoint. For subscription / location, try in order: + +1. `azd config get defaults` +2. `azd env get-values` +3. Ask the user. +4. Last resort, with explicit consent: `az account list --output json`. + +For the Foundry project ARM ID (`--project-id`), ask the user: "New project, or use an existing one?" If existing, ask for the ID and hint where to find it (https://ai.azure.com -> Operate -> Admin). Do NOT shell out to `az cognitiveservices` -- it returns the wrong resource shape. + +## Common error codes + +- `not_logged_in` / `login_expired` -- ask the user to run `azd auth login`. +- `missing_project_endpoint` -- run `azd provision`, or `azd env set AZURE_AI_PROJECT_ENDPOINT `. +- `project_not_found` -- cwd has no `azure.yaml`. Move to project root or run init. +- `invalid_agent_manifest` -- `agent.yaml` is malformed. Run `azd ai agent doctor` and read the named field. +- `invalid_connection` -- inspect with `azd ai agent connection show `. +- `eval_config_invalid` -- `eval.yaml` failed validation. Run `azd ai agent doctor`. +- `agent_definition_not_found` -- deployed name doesn't match `azure.yaml`. Re-deploy from project root. + +Any unfamiliar `code` value is safe to surface verbatim to the user. diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/local-run.md b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/local-run.md new file mode 100644 index 00000000..872e581a --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/local-run.md @@ -0,0 +1,146 @@ +# Local Run Reference + +Use this when iterating on a hosted agent before deploying. + +> **Prerequisite:** Local run does NOT require `azd provision` or any deployed Azure infrastructure. The agent runs on your machine and calls the Foundry model endpoint directly using your local credentials (`DefaultAzureCredential` — falls back to `az login` / VS Code identity). You only need a `.env` file in the agent directory with: +> ```env +> FOUNDRY_PROJECT_ENDPOINT=https://.services.ai.azure.com/api/projects/ +> AZURE_AI_MODEL_DEPLOYMENT_NAME= +> ``` +> If you already ran `azd provision`, extract these from `azd env get-values`. +> +> **If no project endpoint is available yet**, follow [deploy.md Step 2](../../deploy/deploy.md#step-2----provision-azure-resources-one-time-per-env) to provision or resolve the project, then return here for local iteration before deploying the agent. +> +> **Critical: keep `.env` and `azd env` in sync.** `azd ai agent run` injects the active `azd env` values into the agent process before Python loads `.env`. Many samples use `load_dotenv(override=False)`, so an existing process environment value wins over `.env`. If you change the project endpoint or model deployment, update both `.env` and `azd env`: +> ```bash +> azd env set FOUNDRY_PROJECT_ENDPOINT "https://.services.ai.azure.com/api/projects/" +> azd env set AZURE_AI_MODEL_DEPLOYMENT_NAME "" +> azd env get-values +> ``` +> A stale `AZURE_AI_MODEL_DEPLOYMENT_NAME` in `azd env` can make local run call the wrong deployment even when `.env` is correct, commonly surfacing as a Foundry responses API `404 Not Found`. + +## Prepare the local environment + +For Python agents, prepare the environment from the **agent's service source directory** -- the folder that contains `requirements.txt` and `agent.yaml` (typically `/src//`, not the azd project root). `azd ai agent run` resolves the venv relative to this folder; a `.venv` created in the project root is ignored and azd silently creates a second one without `uv`. + +1. `cd` into the service source directory. +2. Create a venv, for example `python -m venv .venv`. +3. Activate the venv. +4. Install `uv` inside the active venv: `python -m pip install uv`. +5. In the same shell with the service-dir `.venv` activated, run `azd ai agent run` (from any cwd in the project); it installs `requirements.txt` itself and uses `uv` from the active venv for faster Python dependency installation. + +> **Important:** The venv must live next to `requirements.txt`, not in the azd project root. Install `uv` before running `azd ai agent run`, and keep that venv activated when running the command; otherwise the local run falls back to slower dependency installation. Do NOT manually run `pip install -r requirements.txt` / `uv pip install -r requirements.txt --prerelease=allow`; let `azd ai agent run` install dependencies. + +## Start the agent locally + +Activate the service-dir `.venv`, then in that venv run: + +```bash +azd ai agent run +``` + +What this does: + +1. Resolves the agent service from `azure.yaml` (auto-picks when only one exists). +2. Detects the project type (Python, .NET, Node.js) from files in the service source dir. +3. Installs dependencies if needed. For Python, `azd ai agent run` installs `requirements.txt` itself and uses `uv` from the active local environment when available. +4. Starts the agent in the foreground on `localhost:8088` (default). +5. Opens **Agent Inspector** in your browser (unless `--no-inspector`). + +> First startup takes 30-60 seconds. Wait before sending the first invocation. + +`Ctrl+C` stops the agent and clears the saved local session id in an interactive terminal. + +For headless or CI runs, pass `--no-inspector` and start the local server in a managed background session that later steps can monitor and stop. Wait for the "Agent ready" message, invoke it from a second command, then stop the same background session before deploying or leaving a temporary workspace. + +Do **not** start `azd ai agent run` as a detached process that you cannot monitor or stop (for example, a bare `azd ai agent run ... &`, or a popped PowerShell window on Windows). Keep logs, readiness polling, and the PID/process handle for cleanup. + +## Useful flags + +| Flag | Purpose | +|------|---------| +| `--port ` / `-p ` | Override the listen port. Useful when 8088 is taken. | +| `--start-command ""` / `-c ""` | Override `azure.yaml` and auto-detect. Example: `--start-command "python app.py"`. | +| `--no-inspector` | Skip opening Agent Inspector. Use in CI / SSH. | + +Pass the service name when there are multiple `ai.agent` services: + +```bash +azd ai agent run my-agent +``` + +## Where the start command comes from + +Resolution order (first non-empty wins): + +1. `--start-command` flag. +2. `azure.yaml services..config.startupCommand`. +3. Auto-detected from project type. + +Example: + +```yaml +# azure.yaml +services: + my-agent: + project: src/my-agent + language: python + host: azure.ai.agent + config: + startupCommand: "uvicorn app:app --host 0.0.0.0 --port 4001" +``` + +If detection fails and no override is set, `run` errors with the project dir and asks for `--start-command` or `startupCommand`. + +## Invoke the local agent + +```bash +azd ai agent invoke --local "hello, are you up?" +``` + +Do not use `--output json` with invoke. The invoke command supports `default` and `raw` output only. + +If the user did not explicitly specify a prompt, use `"hello, are you up"` for the local smoke test; only verify that the agent can return a response. + +Run one representative local invocation before deploying. If the local invocation returns a model `404` or wrong deployment error, check `azd env get-values` before changing code; stale azd env values are the most common cause. + +`--local` differs from a remote invoke in: + +- Targets `http://localhost:` instead of the Foundry endpoint. +- Skips the confirmation envelope (no billing, no remote mutation). +- `--version` is rejected (versions are a remote concept). +- Named-agent invocation is rejected (only one agent runs locally at a time). + +Other useful flags: + +| Flag | Purpose | +|------|---------| +| `--protocol responses` (default) / `--protocol invocations` | Wire format your agent speaks. | +| `--input-file request.json` / `-f request.json` | Send a file body instead of a string message. | +| `--new-session` | Drop the saved local session and start fresh. | +| `--port ` | Match the port you started `run` with. | + +After the local invocation completes, stop the `azd ai agent run` process you started before moving on. + +## When to graduate to remote + +Local dev validates code shape; remote validates infra + identity + Foundry binding. Move to deploy when: + +- You changed `agent.yaml` `model:`, `tools:`, `connections:`, or `protocols:`. Those only take effect on the deployed agent. +- You need to test against real Foundry connections (search indexes, Bing, MCP, A2A) that have no local mock. +- You are ready to publish a new immutable agent version. + +Before proceeding to deploy, clean up the local agent process. + +Next step -> [deploy/deploy.md](../../deploy/deploy.md). + +## Common failures + +| Symptom | Likely cause | Fix | +|---------|--------------|-----| +| `could not connect to localhost:` | `run` not started, or wrong port | Start `azd ai agent run`; pass `--port` to `invoke --local` if non-default. | +| `could not detect project type in ` | Missing project marker file | Set `startupCommand` in `azure.yaml` or pass `--start-command`. | +| `cannot use --local with a named agent` | Named-agent invoke against localhost | Drop the name; only one local agent at a time. | +| `cannot use --version with --local` | `--version` is remote-only | Drop `--version`, or remove `--local` to hit the deployed agent. | +| Inspector never opens | Headless env, or extension install failed | Pass `--no-inspector`, or run `azd extension install azure.ai.inspector`. | +| Auth / connection errors against Azure services | Local credentials not wired | Expected -- `DefaultAzureCredential` falls back to your `az login` / VS Code identity. Use `azd auth login` if needed. | diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/tools.md b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/tools.md new file mode 100644 index 00000000..4d68131f --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/references/tools.md @@ -0,0 +1,209 @@ +# Tools and Toolboxes (azd ai) + +How to attach tools (web search, Azure AI Search, MCP, A2A) to a hosted agent using `azd ai toolbox` and `azd ai agent connection`. + +A **toolbox** is a curated bundle of connection-backed tools that Foundry exposes as a single MCP-compatible endpoint. The agent connects to one URL and discovers every tool inside. `azd deploy` does NOT auto-create toolboxes -- you drive the lifecycle explicitly. + +## Install the extension once + +```bash +azd extension install azure.ai.toolboxes +``` + +## The flow (every recipe) + +1. Create the **connection** (`azd ai agent connection create ...`). +2. Create or update the **toolbox** (`azd ai toolbox create` / `connection add`). +3. Read the endpoint (`azd ai toolbox show --output json`). +4. `azd env set TOOLBOX__MCP_ENDPOINT ""`. +5. Reference it in `/agent.yaml` `environment_variables[]`. +6. `azd deploy`. + +## Env var naming convention + +Uppercase the toolbox name, collapse non-alphanumeric to `_`, prefix `TOOLBOX_`, suffix `_MCP_ENDPOINT`. Examples: `agent-tools` -> `TOOLBOX_AGENT_TOOLS_MCP_ENDPOINT`, `agent.tools.v2` -> `TOOLBOX_AGENT_TOOLS_V2_MCP_ENDPOINT`. + +## Endpoint URL shapes + +- `{project}/toolboxes/{name}/versions/{version}/mcp?api-version=v1` -- version-pinned. What `azd ai toolbox show` returns. +- `{project}/toolboxes/{name}/mcp?api-version=v1` -- default version (consumer). Always serves `default_version`. + +To auto-pick up new default versions without redeploying, drop the `/versions/` segment and store the consumer URL. + +## CLI surface + +| Command | What it does | +|---------|--------------| +| `azd ai toolbox create --from-file ` | Create toolbox + publish v1. File must list at least one connection. | +| `azd ai toolbox connection add [--index ...] [--instance-name ...]` | Attach one; new default version. | +| `azd ai toolbox connection add --from-file ` | Attach many in one call; ONE new version. | +| `azd ai toolbox connection remove ` | Detach; new default version. Refuses to leave zero tools. | +| `azd ai toolbox show [--version ]` | Show toolbox + MCP endpoint URL. | +| `azd ai toolbox list` | List toolboxes. | +| `azd ai toolbox version list ` | List versions. | +| `azd ai toolbox update --default-version ` | Re-point default (rollback). | +| `azd ai toolbox delete [--version ] [--force]` | Delete toolbox or one version. | + +Every mutation publishes a new immutable version and promotes it to default. + +## `--from-file` shape + +```yaml +description: research toolbox # only on `create` +connections: + - name: my-mcp # RemoteTool + - name: my-search # CognitiveSearch -- needs index + index: products + - name: my-bing # GroundingWithCustomSearch -- needs instance_name + instance_name: docs-config + - name: my-a2a # RemoteA2A +``` + +## Recipe: GitHub MCP + +```bash +# 1. Connection +azd ai agent connection create github-mcp-conn \ + --kind remote-tool \ + --target https://api.githubcopilot.com/mcp \ + --auth-type custom-keys \ + --custom-key Authorization="Bearer ghp_xxx..." + +# 2. Toolbox (initial create needs a file; otherwise use `connection add`) +cat > tools.json </agent.yaml`: + +```yaml +environment_variables: + - name: TOOLBOX_AGENT_TOOLS_MCP_ENDPOINT + value: ${TOOLBOX_AGENT_TOOLS_MCP_ENDPOINT} +``` + +Then `azd deploy`. + +## Recipe: Azure AI Search RAG + +```bash +azd ai agent connection create my-search-conn \ + --kind cognitive-search \ + --target https://my-search.search.windows.net/ \ + --auth-type api-key --key "" + +azd ai toolbox connection add agent-tools my-search-conn --index contoso-outdoors +``` + +For multiple indexes, add multiple entries with different `index` values. + +## Recipe: A2A peer agent + +```bash +azd ai agent connection create peer-agent-conn \ + --kind remote-a2a \ + --target https://other-agent.foundry-account.westus2.azure.com/ \ + --auth-type none + +azd ai toolbox connection add agent-tools peer-agent-conn +``` + +For authenticated peers, use `--auth-type project-managed-identity --audience https://ai.azure.com/.default`. + +## Recipe: multi-tool toolbox in one call + +```yaml +# tools.yaml +description: "GitHub MCP + AI Search + A2A peer." +connections: + - name: github-mcp-conn + - name: my-search-conn + index: contoso-outdoors + - name: peer-agent-conn +``` + +```bash +azd ai toolbox create agent-tools --from-file tools.yaml +# OR (existing toolbox): azd ai toolbox connection add agent-tools --from-file tools.yaml +``` + +One new default version regardless of how many connections you attach in one call. + +## Tools the CLI does NOT manage today + +`azd ai toolbox` only handles connection-backed tools (`RemoteTool`, `CognitiveSearch`, `RemoteA2A`, `GroundingWithCustomSearch`). These built-ins have no connection and are NOT addable via this CLI: `web_search`, `code_interpreter`, `file_search`, `function`, `toolbox_search_preview`. + +To include any built-in in a toolbox today, use the Python / .NET / JS SDK or call the REST API directly. + +## Required header (agent code) + +Every MCP request to the toolbox endpoint must include: + +```http +Foundry-Features: Toolboxes=V1Preview +``` + +Token scope: `https://ai.azure.com/.default`. RBAC: the calling identity (developer + agent identity at runtime) needs **Foundry User** on the Foundry project. + +## Agent code (Python, Microsoft Agent Framework) + +```python +import os, httpx +from azure.identity import DefaultAzureCredential +from agent_framework.tools.mcp import MCPStreamableHTTPTool + +_credential = DefaultAzureCredential() + +def _inject_auth(request: httpx.Request) -> None: + # Per-request token refresh -- static tokens expire in ~1 hour. + token = _credential.get_token("https://ai.azure.com/.default").token + request.headers["Authorization"] = f"Bearer {token}" + request.headers["Foundry-Features"] = "Toolboxes=V1Preview" + +tool = MCPStreamableHTTPTool( + name="github", # becomes server_label prefix + url=os.environ["TOOLBOX_AGENT_TOOLS_MCP_ENDPOINT"], + httpx_client=httpx.AsyncClient(event_hooks={"request": [_inject_auth]}), + load_prompts=False, # Foundry doesn't implement prompts/list + approval_mode="never_require", # for require_approval:always tools +) +``` + +Install: `pip install httpx azure-identity agent-framework`. + +## MCP client gotchas + +- **Always stream.** Non-streaming is not supported. +- **Don't call `prompts/list`.** Returns `500`. Pass `load_prompts=False`. +- **Don't `send_ping()`** with generic clients (returns `500`). Agent Framework handles this. +- **Tool names are prefixed with `server_label`.** `name="myserver"` -> tools appear as `myserver.`. +- **`require_approval`** is the client's responsibility -- the toolbox proxy does NOT enforce it. Pass `approval_mode="never_require"` or wire an approval handler. + +## Verify the wire end-to-end + +```bash +azd ai toolbox list --output json +azd ai toolbox show agent-tools --output json +azd deploy +azd ai agent invoke "list the tools you have access to" +``` + +## Troubleshooting + +| Symptom | Likely cause | +|---------|--------------| +| `TOOLBOX__MCP_ENDPOINT` not set | Run `azd ai toolbox show` + `azd env set`. | +| Env var missing in deployed agent | Add to `agent.yaml` `environment_variables[]`, `azd deploy`. | +| `400` mentioning `Toolboxes` | Missing `Foundry-Features: Toolboxes=V1Preview` header. | +| `401` on MCP calls | Expired / wrong-scope token. Use `https://ai.azure.com/.default`; refresh per request. | +| `403 Forbidden` | Caller missing `Foundry User` role. | +| `500` on `prompts/list` / ping | Disable in MCP client (`load_prompts=False`). | +| Empty response, tool never called | `require_approval: always` with no handler. Pass `approval_mode="never_require"`. | +| `tools/list` returns zero | Bad credentials, or toolbox version still provisioning. | +| Tool names don't match | Use `{server_label}.{tool_name}`. | diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/resolve-project-id.ps1 b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/resolve-project-id.ps1 new file mode 100644 index 00000000..233bdc66 --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/resolve-project-id.ps1 @@ -0,0 +1,197 @@ +<# +.SYNOPSIS + Resolves a Foundry project ARM resource ID from a Foundry project endpoint. +.DESCRIPTION + Uses the endpoint only to obtain lookup keys for Azure CLI queries. The + resource ID printed by this script is always the `id` returned by Azure, + never a locally constructed ARM resource ID. +.EXAMPLE + ./resolve-project-id.ps1 -Endpoint "https://my-account.services.ai.azure.com/api/projects/my-project" +.EXAMPLE + ./resolve-project-id.ps1 -Endpoint "https://my-account.services.ai.azure.com/api/projects/my-project" -Output json +#> + +[CmdletBinding()] +param( + [Parameter(Mandatory = $true)] + [string]$Endpoint, + + [string]$Subscription, + + [string]$ResourceGroup, + + [string]$AccountName, + + [string]$ProjectName, + + [ValidateSet("id", "json")] + [string]$Output = "id" +) + +$ErrorActionPreference = "Stop" + +function Stop-Fatal { + param([string]$Message) + [Console]::Error.WriteLine("[ERROR] $Message") + exit 1 +} + +function Normalize-Endpoint { + param([string]$Value) + if (-not $Value) { return "" } + return $Value.Trim().TrimEnd("/") +} + +function Add-SubscriptionArg { + param([string[]]$CommandArgs) + if ($Subscription) { + return $CommandArgs + @("--subscription", $Subscription) + } + return $CommandArgs +} + +function Invoke-AzJson { + param([string[]]$CommandArgs) + $raw = & az @CommandArgs 2>&1 + if ($LASTEXITCODE -ne 0) { + throw "az $($CommandArgs -join ' ') failed: $($raw -join "`n")" + } + if (-not $raw) { return $null } + return (($raw -join "`n") | ConvertFrom-Json -ErrorAction Stop) +} + +function Get-ProjectEndpoints { + param($Project) + $values = @() + if ($Project -and $Project.properties -and $Project.properties.endpoints) { + foreach ($property in $Project.properties.endpoints.PSObject.Properties) { + if ($property.Value -is [string] -and $property.Value) { + $values += (Normalize-Endpoint $property.Value) + } + } + } + return $values +} + +function Endpoint-MatchesProject { + param($Project, [string]$ExpectedEndpoint) + foreach ($candidate in (Get-ProjectEndpoints $Project)) { + if ($candidate -eq $ExpectedEndpoint) { + return $true + } + } + return $false +} + +if (-not (Get-Command az -ErrorAction SilentlyContinue)) { + Stop-Fatal "Azure CLI 'az' was not found on PATH." +} + +$normalizedEndpoint = Normalize-Endpoint $Endpoint +try { + $endpointUri = [System.Uri]$normalizedEndpoint +} catch { + Stop-Fatal "Endpoint is not a valid URI: $Endpoint" +} + +if (-not $endpointUri.Scheme.StartsWith("http")) { + Stop-Fatal "Endpoint must be an http or https URI." +} + +if (-not $ProjectName) { + $segments = @($endpointUri.AbsolutePath.Trim("/").Split("/", [System.StringSplitOptions]::RemoveEmptyEntries)) + for ($i = 0; $i -lt $segments.Count; $i++) { + if ($segments[$i] -ieq "projects" -and ($i + 1) -lt $segments.Count) { + $ProjectName = [System.Uri]::UnescapeDataString($segments[$i + 1]) + break + } + } +} + +if (-not $ProjectName) { + Stop-Fatal "Could not read the project name from the endpoint path. Re-run with -ProjectName." +} + +if (-not $AccountName) { + $hostParts = @($endpointUri.Host.Split(".")) + if ($hostParts.Count -gt 0 -and $endpointUri.Host.EndsWith(".services.ai.azure.com", [System.StringComparison]::OrdinalIgnoreCase)) { + $AccountName = $hostParts[0] + } +} + +if (-not $AccountName) { + Stop-Fatal "Could not read the account name from the endpoint host. Re-run with -AccountName." +} + +if (-not $ResourceGroup) { + try { + $accounts = Invoke-AzJson (Add-SubscriptionArg @("cognitiveservices", "account", "list", "-o", "json")) + } catch { + Stop-Fatal $_.Exception.Message + } + + $matches = @($accounts | Where-Object { + ($_.name -ieq $AccountName) -or + ($_.properties.customSubDomainName -ieq $AccountName) + }) + + if ($matches.Count -eq 0) { + Stop-Fatal "Could not find a Cognitive Services account matching '$AccountName'. Re-run with -ResourceGroup and -AccountName if the endpoint uses a custom host." + } + + if ($matches.Count -gt 1) { + $choices = ($matches | ForEach-Object { "$($_.resourceGroup)/$($_.name)" }) -join ", " + Stop-Fatal "Multiple accounts matched '$AccountName': $choices. Re-run with -ResourceGroup." + } + + $ResourceGroup = $matches[0].resourceGroup + $AccountName = $matches[0].name +} + +$project = $null +try { + $project = Invoke-AzJson (Add-SubscriptionArg @( + "cognitiveservices", "account", "project", "show", + "-g", $ResourceGroup, + "-n", $AccountName, + "--project-name", $ProjectName, + "-o", "json" + )) +} catch { + try { + $projects = Invoke-AzJson (Add-SubscriptionArg @( + "cognitiveservices", "account", "project", "list", + "-g", $ResourceGroup, + "-n", $AccountName, + "-o", "json" + )) + $project = @($projects | Where-Object { Endpoint-MatchesProject $_ $normalizedEndpoint }) | Select-Object -First 1 + } catch { + Stop-Fatal $_.Exception.Message + } +} + +if (-not $project) { + Stop-Fatal "Could not resolve a Foundry project for endpoint '$normalizedEndpoint'." +} + +$projectEndpoints = @(Get-ProjectEndpoints $project) +if ($projectEndpoints.Count -gt 0 -and -not (Endpoint-MatchesProject $project $normalizedEndpoint)) { + Stop-Fatal "Resolved project endpoint metadata did not match '$normalizedEndpoint'." +} + +if (-not $project.id) { + Stop-Fatal "Azure returned a project object without an id." +} + +if ($Output -eq "json") { + [ordered]@{ + id = $project.id + endpoint = if ($projectEndpoints.Count -gt 0) { $projectEndpoints[0] } else { $normalizedEndpoint } + resourceGroup = $ResourceGroup + accountName = $AccountName + projectName = $ProjectName + } | ConvertTo-Json -Depth 5 +} else { + Write-Output $project.id +} diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/resolve-project-id.sh b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/resolve-project-id.sh new file mode 100644 index 00000000..3b21e7e7 --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/resolve-project-id.sh @@ -0,0 +1,296 @@ +#!/usr/bin/env bash +# resolve-project-id.sh +# Resolves a Foundry project ARM resource ID from a Foundry project endpoint. +# The endpoint is used only for Azure lookup keys; the printed ID is the `id` +# returned by Azure CLI, not a locally constructed ARM resource ID. +# +# Usage: +# ./resolve-project-id.sh --endpoint "https://my-account.services.ai.azure.com/api/projects/my-project" +# ./resolve-project-id.sh --endpoint "https://my-account.services.ai.azure.com/api/projects/my-project" --output json + +set -uo pipefail + +ENDPOINT="" +SUBSCRIPTION="" +RESOURCE_GROUP="" +ACCOUNT_NAME="" +PROJECT_NAME="" +OUTPUT="id" +TEMP_FILES=() + +cleanup() { + if [ "${#TEMP_FILES[@]}" -gt 0 ]; then + rm -f "${TEMP_FILES[@]}" + fi +} +trap cleanup EXIT + +usage() { + cat <<'EOF' +Usage: resolve-project-id.sh --endpoint [options] + +Options: + -e, --endpoint Foundry project endpoint. Required. + --subscription Azure subscription ID or name. + -g, --resource-group Resource group for the Foundry account. + -n, --account-name Foundry account name. + --project-name Foundry project name. + -o, --output Output format. Default: id. + -h, --help Show this help. +EOF +} + +fatal() { + echo "[ERROR] $1" >&2 + exit 1 +} + +while [ "$#" -gt 0 ]; do + case "$1" in + -e|--endpoint) + [ "$#" -ge 2 ] || fatal "$1 requires a value." + ENDPOINT="$2" + shift 2 + ;; + --subscription) + [ "$#" -ge 2 ] || fatal "$1 requires a value." + SUBSCRIPTION="$2" + shift 2 + ;; + -g|--resource-group) + [ "$#" -ge 2 ] || fatal "$1 requires a value." + RESOURCE_GROUP="$2" + shift 2 + ;; + -n|--account-name) + [ "$#" -ge 2 ] || fatal "$1 requires a value." + ACCOUNT_NAME="$2" + shift 2 + ;; + --project-name) + [ "$#" -ge 2 ] || fatal "$1 requires a value." + PROJECT_NAME="$2" + shift 2 + ;; + -o|--output) + [ "$#" -ge 2 ] || fatal "$1 requires a value." + OUTPUT="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + fatal "Unknown argument: $1" + ;; + esac +done + +[ -n "$ENDPOINT" ] || fatal "--endpoint is required." +[ "$OUTPUT" = "id" ] || [ "$OUTPUT" = "json" ] || fatal "--output must be 'id' or 'json'." + +command -v az >/dev/null 2>&1 || fatal "Azure CLI 'az' was not found on PATH." +command -v python3 >/dev/null 2>&1 || fatal "python3 was not found on PATH." + +PARSED_ENDPOINT="$( + python3 - "$ENDPOINT" "$ACCOUNT_NAME" "$PROJECT_NAME" <<'PY' +import json +import sys +from urllib.parse import unquote, urlparse + +endpoint = (sys.argv[1] or "").strip().rstrip("/") +account_name = sys.argv[2] or "" +project_name = sys.argv[3] or "" + +parsed = urlparse(endpoint) +if parsed.scheme not in ("http", "https") or not parsed.netloc: + print("Endpoint must be an http or https URI.", file=sys.stderr) + raise SystemExit(1) + +if not project_name: + parts = [unquote(p) for p in parsed.path.strip("/").split("/") if p] + for index, part in enumerate(parts): + if part.lower() == "projects" and index + 1 < len(parts): + project_name = parts[index + 1] + break + +if not account_name: + host = parsed.hostname or "" + suffix = ".services.ai.azure.com" + if host.lower().endswith(suffix): + account_name = host[:-len(suffix)] + +print(json.dumps({ + "endpoint": endpoint, + "accountName": account_name, + "projectName": project_name, +})) +PY +)" || fatal "Could not parse Foundry project endpoint." + +NORMALIZED_ENDPOINT="$(python3 -c 'import json,sys; print(json.loads(sys.stdin.read())["endpoint"])' <<<"$PARSED_ENDPOINT")" +if [ -z "$ACCOUNT_NAME" ]; then + ACCOUNT_NAME="$(python3 -c 'import json,sys; print(json.loads(sys.stdin.read())["accountName"])' <<<"$PARSED_ENDPOINT")" +fi +if [ -z "$PROJECT_NAME" ]; then + PROJECT_NAME="$(python3 -c 'import json,sys; print(json.loads(sys.stdin.read())["projectName"])' <<<"$PARSED_ENDPOINT")" +fi + +[ -n "$ACCOUNT_NAME" ] || fatal "Could not read the account name from the endpoint host. Re-run with --account-name." +[ -n "$PROJECT_NAME" ] || fatal "Could not read the project name from the endpoint path. Re-run with --project-name." + +add_subscription_arg() { + if [ -n "$SUBSCRIPTION" ]; then + printf '%s\n' "--subscription" "$SUBSCRIPTION" + fi +} + +run_az_json() { + local stderr_file + stderr_file="$(mktemp)" + local output + if output="$(az "$@" 2>"$stderr_file")"; then + rm -f "$stderr_file" + printf '%s' "$output" + return 0 + fi + local error_text + error_text="$(cat "$stderr_file")" + rm -f "$stderr_file" + echo "$error_text" >&2 + return 1 +} + +if [ -z "$RESOURCE_GROUP" ]; then + AZ_ARGS=(cognitiveservices account list -o json) + while IFS= read -r arg; do + [ -n "$arg" ] && AZ_ARGS+=("$arg") + done < <(add_subscription_arg) + + ACCOUNTS_JSON="$(run_az_json "${AZ_ARGS[@]}")" || fatal "Failed to list Cognitive Services accounts." + ACCOUNTS_FILE="$(mktemp)" + TEMP_FILES+=("$ACCOUNTS_FILE") + printf '%s' "$ACCOUNTS_JSON" >"$ACCOUNTS_FILE" + MATCHED_ACCOUNT="$( + ACCOUNT_NAME="$ACCOUNT_NAME" python3 - "$ACCOUNTS_FILE" <<'PY' +import json +import os +import sys + +target = os.environ["ACCOUNT_NAME"].lower() +with open(sys.argv[1], encoding="utf-8") as handle: + accounts = json.load(handle) +matches = [] +for account in accounts: + name = (account.get("name") or "") + custom = ((account.get("properties") or {}).get("customSubDomainName") or "") + if name.lower() == target or custom.lower() == target: + matches.append(account) + +if not matches: + print(f"Could not find a Cognitive Services account matching '{os.environ['ACCOUNT_NAME']}'.", file=sys.stderr) + raise SystemExit(1) +if len(matches) > 1: + choices = ", ".join(f"{m.get('resourceGroup')}/{m.get('name')}" for m in matches) + print(f"Multiple accounts matched '{os.environ['ACCOUNT_NAME']}': {choices}. Re-run with --resource-group.", file=sys.stderr) + raise SystemExit(1) + +print(json.dumps({ + "resourceGroup": matches[0].get("resourceGroup") or "", + "accountName": matches[0].get("name") or "", +})) +PY + )" || fatal "Failed to resolve the Foundry account resource group." + + RESOURCE_GROUP="$(python3 -c 'import json,sys; print(json.loads(sys.stdin.read())["resourceGroup"])' <<<"$MATCHED_ACCOUNT")" + ACCOUNT_NAME="$(python3 -c 'import json,sys; print(json.loads(sys.stdin.read())["accountName"])' <<<"$MATCHED_ACCOUNT")" +fi + +PROJECT_JSON="" +AZ_SHOW_ARGS=( + cognitiveservices account project show + -g "$RESOURCE_GROUP" + -n "$ACCOUNT_NAME" + --project-name "$PROJECT_NAME" + -o json +) +while IFS= read -r arg; do + [ -n "$arg" ] && AZ_SHOW_ARGS+=("$arg") +done < <(add_subscription_arg) + +if ! PROJECT_JSON="$(run_az_json "${AZ_SHOW_ARGS[@]}")"; then + AZ_LIST_ARGS=( + cognitiveservices account project list + -g "$RESOURCE_GROUP" + -n "$ACCOUNT_NAME" + -o json + ) + while IFS= read -r arg; do + [ -n "$arg" ] && AZ_LIST_ARGS+=("$arg") + done < <(add_subscription_arg) + + PROJECTS_JSON="$(run_az_json "${AZ_LIST_ARGS[@]}")" || fatal "Failed to list Foundry projects." + PROJECTS_FILE="$(mktemp)" + TEMP_FILES+=("$PROJECTS_FILE") + printf '%s' "$PROJECTS_JSON" >"$PROJECTS_FILE" + PROJECT_JSON="$( + NORMALIZED_ENDPOINT="$NORMALIZED_ENDPOINT" python3 - "$PROJECTS_FILE" <<'PY' +import json +import os +import sys + +expected = os.environ["NORMALIZED_ENDPOINT"].rstrip("/") +with open(sys.argv[1], encoding="utf-8") as handle: + projects = json.load(handle) + +def endpoints(project): + values = ((project.get("properties") or {}).get("endpoints") or {}).values() + return [value.rstrip("/") for value in values if isinstance(value, str) and value] + +for project in projects: + if expected in endpoints(project): + print(json.dumps(project)) + break +else: + print(f"Could not find a Foundry project matching endpoint '{expected}'.", file=sys.stderr) + raise SystemExit(1) +PY + )" || fatal "Failed to resolve the Foundry project from endpoint metadata." +fi + +PROJECT_JSON="$PROJECT_JSON" \ +NORMALIZED_ENDPOINT="$NORMALIZED_ENDPOINT" \ +RESOURCE_GROUP="$RESOURCE_GROUP" \ +ACCOUNT_NAME="$ACCOUNT_NAME" \ +PROJECT_NAME="$PROJECT_NAME" \ +OUTPUT="$OUTPUT" \ +python3 - <<'PY' +import json +import os + +project = json.loads(os.environ["PROJECT_JSON"]) +expected = os.environ["NORMALIZED_ENDPOINT"].rstrip("/") +endpoint_values = ((project.get("properties") or {}).get("endpoints") or {}).values() +endpoints = [value.rstrip("/") for value in endpoint_values if isinstance(value, str) and value] + +if endpoints and expected not in endpoints: + print(f"[ERROR] Resolved project endpoint metadata did not match '{expected}'.", file=__import__("sys").stderr) + raise SystemExit(1) + +resource_id = project.get("id") +if not resource_id: + print("[ERROR] Azure returned a project object without an id.", file=__import__("sys").stderr) + raise SystemExit(1) + +if os.environ["OUTPUT"] == "json": + print(json.dumps({ + "id": resource_id, + "endpoint": endpoints[0] if endpoints else expected, + "resourceGroup": os.environ["RESOURCE_GROUP"], + "accountName": os.environ["ACCOUNT_NAME"], + "projectName": os.environ["PROJECT_NAME"], + }, indent=2)) +else: + print(resource_id) +PY diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/verify-environment.ps1 b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/verify-environment.ps1 new file mode 100644 index 00000000..f27f3705 --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/verify-environment.ps1 @@ -0,0 +1,127 @@ +<# +.SYNOPSIS + Verifies the local environment for creating a hosted Foundry agent with `azd ai`. +.DESCRIPTION + Runs all the read-only checks in one pass and prints a single concise summary, + so the agent does not have to run (and reason over) each azd command separately. + + Output lines are prefixed with [OK], [WARN], or [ACTION]. + Exit code is 0 when no blocking actions remain, 1 when at least one [ACTION] is required. +.EXAMPLE + ./verify-environment.ps1 +#> + +$ErrorActionPreference = "Stop" +$actionRequired = $false + +function Note-Ok { param([string]$m) Write-Output "[OK] $m" } +function Note-Warn { param([string]$m) Write-Output "[WARN] $m" } +function Note-Action { param([string]$m) Write-Output "[ACTION] $m"; $script:actionRequired = $true } + +function Get-AzdJson { + param([string[]]$AzdArgs) + try { + $raw = & azd @AzdArgs 2>$null + if (-not $raw) { return $null } + return ($raw | ConvertFrom-Json -ErrorAction Stop) + } catch { + return $null + } +} + +# Refresh PATH to pick up recently-installed tools (e.g. azd installed in same session) +$env:Path = [System.Environment]::GetEnvironmentVariable("Path", "Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path", "User") + +# 1. azd present + version +# Check PATH first, then probe common install locations (winget, MSI, chocolatey) +if (-not (Get-Command azd -ErrorAction SilentlyContinue)) { + $azdFallbackPaths = @( + "$env:LOCALAPPDATA\Programs\Azure Dev CLI" + "$env:ProgramFiles\Azure Dev CLI" + "${env:ProgramFiles(x86)}\Azure Dev CLI" + "$env:USERPROFILE\.azd\bin" + ) + $found = $false + foreach ($dir in $azdFallbackPaths) { + if (Test-Path "$dir\azd.exe") { + $env:Path = "$dir;$env:Path" + Note-Warn "azd found at '$dir' but was not on PATH. Added automatically for this session." + $found = $true + break + } + } + if (-not $found) { + Note-Action "Azure Developer CLI (azd) is not installed. Install it from https://aka.ms/azd-install, then re-run." + Write-Output "" + Write-Output "Summary: azd missing -- cannot continue." + exit 1 + } +} + +$verJson = Get-AzdJson @("version", "--output", "json") +$azdVersion = if ($verJson -and $verJson.azd -and $verJson.azd.version) { $verJson.azd.version } else { "unknown" } +Note-Ok "azd installed (version $azdVersion)." + +# 2. Required extensions +$extRaw = (& azd extension list --output json 2>$null) -join "`n" +foreach ($ext in @("azure.ai.agents", "azure.ai.projects")) { + if ($extRaw -match [regex]::Escape($ext)) { + Note-Ok "Extension '$ext' is installed." + } else { + Note-Action "Extension '$ext' is missing. Run: azd extension install $ext" + } +} + +# 3. Auth status +& azd auth login --check-status *> $null +if ($LASTEXITCODE -eq 0) { + Note-Ok "Logged in to azd." +} else { + Note-Action "Not logged in. Ask the user to run 'azd auth login' (it opens a browser; never run it for them)." +} + +# 4. Foundry project endpoint (optional at this stage) +# Short-circuit when there's no azd project in cwd: `azd ai project show` / `agent show` +# would just return nothing after a ~3s subprocess each. +if (-not (Test-Path "azure.yaml")) { + Note-Warn "No Foundry project endpoint set yet. A new project will be created at provision/deploy time, or supply an existing project resource ID." + Note-Ok "No agent deployed yet. Proceed with create." +} else { + $projectJson = Get-AzdJson @("ai", "project", "show", "--output", "json") + $endpoint = $null + if ($projectJson) { + foreach ($k in @("endpoint", "projectEndpoint", "aiProjectEndpoint")) { + if ($projectJson.PSObject.Properties.Name -contains $k -and $projectJson.$k) { + $endpoint = $projectJson.$k + break + } + } + } + if ($endpoint) { + Note-Ok "Foundry project endpoint configured: $endpoint" + } else { + Note-Warn "No Foundry project endpoint set yet. A new project will be created at provision/deploy time, or supply an existing project resource ID." + } + + # 5. Agent deployment status + $agentJson = Get-AzdJson @("ai", "agent", "show", "--output", "json") + if ($agentJson) { + $status = if ($agentJson.PSObject.Properties.Name -contains "status" -and $agentJson.status) { $agentJson.status } else { "unknown" } + switch ($status) { + { $_ -in @("active", "deployed") } { Note-Ok "An agent is already deployed (status: $status). Skip to deploy.md to redeploy, or tools to add a tool." } + "not_deployed" { Note-Ok "No agent deployed yet (status: not_deployed). Proceed with create." } + default { Note-Warn "Agent status: $status." } + } + } else { + Note-Ok "No agent deployed yet. Proceed with create." + } +} + +Write-Output "" +if ($actionRequired) { + Write-Output "Summary: action required -- resolve the [ACTION] items above before continuing." + exit 1 +} else { + Write-Output "Summary: environment ready for 'azd ai' hosted-agent creation." + exit 0 +} diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/verify-environment.sh b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/verify-environment.sh new file mode 100644 index 00000000..52ddb68e --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/create/scripts/verify-environment.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +# verify-environment.sh +# Verifies the local environment for creating a hosted Foundry agent with `azd ai`. +# Runs all the read-only checks in one pass and prints a single concise summary, +# so the agent does not have to run (and reason over) each azd command separately. +# +# Usage: +# ./verify-environment.sh +# +# Output: human-readable summary lines, each prefixed with [OK], [WARN], or [ACTION]. +# Exit code: 0 if no blocking actions, 1 if at least one [ACTION] is required. + +set -uo pipefail + +ACTION_REQUIRED=0 + +note_ok() { echo "[OK] $1"; } +note_warn() { echo "[WARN] $1"; } +note_action() { echo "[ACTION] $1"; ACTION_REQUIRED=1; } + +# Refresh PATH to pick up recently-installed tools (e.g. azd installed in same session) +if [ -f /etc/environment ]; then + # shellcheck disable=SC1091 + . /etc/environment 2>/dev/null || true +fi +hash -r 2>/dev/null || true + +# 1. azd present + version +if ! command -v azd >/dev/null 2>&1; then + note_action "Azure Developer CLI (azd) is not installed. Install it from https://aka.ms/azd-install, then re-run." + echo "" + echo "Summary: azd missing -- cannot continue." + exit 1 +fi + +AZD_VERSION="$(azd version --output json 2>/dev/null | python3 -c 'import json,sys; print(json.load(sys.stdin).get("azd",{}).get("version","unknown"))' 2>/dev/null || echo unknown)" +note_ok "azd installed (version ${AZD_VERSION})." + +# 2. Required extensions +EXT_JSON="$(azd extension list --output json 2>/dev/null || echo '[]')" +for ext in azure.ai.agents azure.ai.projects; do + if printf '%s' "$EXT_JSON" | grep -q "$ext"; then + note_ok "Extension '$ext' is installed." + else + note_action "Extension '$ext' is missing. Run: azd extension install $ext" + fi +done + +# 3. Auth status +if azd auth login --check-status >/dev/null 2>&1; then + note_ok "Logged in to azd." +else + note_action "Not logged in. Ask the user to run 'azd auth login' (it opens a browser; never run it for them)." +fi + +# 4. Foundry project endpoint (optional at this stage) +# Short-circuit when there's no azd project in cwd: `azd ai project show` / `agent show` +# would just return nothing after a ~3s subprocess each. +if [ ! -f "azure.yaml" ]; then + note_warn "No Foundry project endpoint set yet. A new project will be created at provision/deploy time, or supply an existing project resource ID." + note_ok "No agent deployed yet. Proceed with create." +else + PROJECT_JSON="$(azd ai project show --output json 2>/dev/null || echo '')" + ENDPOINT="" + if [ -n "$PROJECT_JSON" ]; then + ENDPOINT="$(printf '%s' "$PROJECT_JSON" | python3 -c 'import json,sys +try: + d=json.load(sys.stdin) +except Exception: + print(""); raise SystemExit +if isinstance(d,dict): + for k in ("endpoint","projectEndpoint","aiProjectEndpoint"): + if d.get(k): + print(d[k]); break +' 2>/dev/null)" + fi + if [ -n "$ENDPOINT" ]; then + note_ok "Foundry project endpoint configured: ${ENDPOINT}" + else + note_warn "No Foundry project endpoint set yet. A new project will be created at provision/deploy time, or supply an existing project resource ID." + fi + + # 5. Agent deployment status + AGENT_JSON="$(azd ai agent show --output json 2>/dev/null || echo '')" + if [ -n "$AGENT_JSON" ]; then + STATUS="$(printf '%s' "$AGENT_JSON" | python3 -c 'import json,sys +try: + d=json.load(sys.stdin) +except Exception: + print("unknown"); raise SystemExit +print(d.get("status","unknown") if isinstance(d,dict) else "unknown")' 2>/dev/null)" + case "$STATUS" in + active|deployed) note_ok "An agent is already deployed (status: ${STATUS}). Skip to deploy.md to redeploy, or tools to add a tool." ;; + not_deployed) note_ok "No agent deployed yet (status: not_deployed). Proceed with create." ;; + *) note_warn "Agent status: ${STATUS}." ;; + esac + else + note_ok "No agent deployed yet. Proceed with create." + fi +fi + +echo "" +if [ "$ACTION_REQUIRED" -eq 1 ]; then + echo "Summary: action required -- resolve the [ACTION] items above before continuing." + exit 1 +else + echo "Summary: environment ready for 'azd ai' hosted-agent creation." + exit 0 +fi diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/deploy/deploy.md b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/deploy/deploy.md index 32074b87..ad38aac7 100644 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/deploy/deploy.md +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/deploy/deploy.md @@ -1,490 +1,311 @@ -# Foundry Agent Deploy +# Deploy a Foundry Agent -Create and manage agent deployments in Azure AI Foundry. For hosted agents, this includes the full workflow from containerizing the project to verifying the deployed agent. +Provision Azure resources when needed, deploy the agent, and smoke-test it. + +For **hosted agents** (custom container or code), use `azd deploy`. Prefer **direct code deployment through azd** (no Docker/ACR required): `agent.yaml` must contain `code_configuration:`, so `azd deploy` will use direct code deployment and zip the source and let Foundry build it. Use container/ACR deployment only when the agent truly needs a Dockerfile, custom system packages, or a pre-built image. + +For **prompt agents** (LLM + instructions, no custom code), use the Foundry MCP `agent_update` tool. ## Quick Reference | Property | Value | |----------|-------| -| Agent types | Prompt (LLM-based), Hosted | -| MCP server | `azure` | -| Key Foundry MCP tools | `agent_definition_schema_get`, `agent_update`, `agent_get` | -| CLI tools | `docker`, `az acr` (hosted agents only) | -| Container protocols | `a2a`, `responses`, `invocations`, `invocations_ws`, `mcp` | -| Supported languages | .NET, Node.js, Python, Go, Java | - -## When to Use This Skill - -USE FOR: deploy agent to foundry, push agent to foundry, ship my agent, build and deploy container agent, deploy hosted agent, direct code deployment, upload code deployment, create hosted agent, deploy prompt agent, ACR build, container image for agent, docker build for foundry, redeploy agent, update agent deployment, clone agent, delete agent, azd deploy hosted agent, azd ai agent, azd up for agent, deploy agent with azd. - -> ⚠️ **DO NOT manually run** `azd up`, `azd deploy`, `az acr build`, `docker build`, `agent_update`, or direct-code REST upload commands **without reading this skill first.** This skill orchestrates the full deployment pipeline: project scan → env var collection → deployment method selection → Dockerfile/image build or direct-code metadata upload → agent creation/version update → verification. Running CLI commands or calling MCP tools individually skips critical steps (env var confirmation, schema or REST metadata validation, RBAC setup, invocation verification). - -## MCP Tools - -| Tool | Description | Parameters | -|------|-------------|------------| -| `agent_definition_schema_get` | Get JSON schema for agent definitions | `projectEndpoint` (required), `schemaType` (`prompt`, `hosted`, `tools`, `all`) | -| `agent_update` | Create, update, or clone an agent | `projectEndpoint`, `agentName` (required); `agentDefinition` (JSON), `isCloneRequest`, `cloneTargetAgentName`, `modelName` | -| `agent_get` | List all agents or get a specific agent | `projectEndpoint` (required), `agentName` (optional) | -| `agent_delete` | Delete an agent and clean up hosted-agent runtime resources | `projectEndpoint`, `agentName` (required) | - -## Deployment Method Selection - -Direct code deployment is opt-in only. - -- Prompt agents use [Workflow: Prompt Agent Deployment](#workflow-prompt-agent-deployment). -- Hosted agents use [Workflow: Hosted Agent Deployment](#workflow-hosted-agent-deployment); select the hosted deployment method in Step 3. -- Do not infer direct code deployment just because Docker is unavailable or a Dockerfile is missing. Ask or use the default Docker/ACR workflow guidance. - -If the user explicitly says `using direct code deployment`, `direct-code deployment`, `upload code deployment`, or otherwise clearly asks to deploy by uploading source code, Step 3 reads [Direct Code Deployment Reference](references/direct-code-deployment.md), deploys the agent directly, then proceeds directly to [Step 7: Test the Agent](#step-7-test-the-agent). - -## Workflow: Hosted Agent Deployment - -> ⚠️ **Warning: hosted agent deployment has 8 steps, not 7.** -> -> The single most common failure of this skill is stopping after Step 7 (invocation smoke test) and emitting a "deployment complete" summary. **Step 8 (auto-generate evaluation suite) is mandatory and runs automatically after every deploy — including redeploys, version bumps, and `azd deploy` re-runs.** -> -> Before you write any final summary, Playground link, version table, or deployment success message, you MUST self-verify: -> -> 1. Did Step 8 run to completion (suite generated **or** documented fallback persisted)? -> 2. Is deployment context resolvable from azd or metadata, and was `.foundry` updated only with non-derivable overlay/cache state? -> 3. Did you prompt the user to run an evaluation? -> -> If the answer to any of these is **no**, do not summarize — go run Step 8 now. - -> ⚠️ **`azd deploy` ≠ deployment complete.** `azd deploy` (or any `azd up`/`az acr build`/`agent_update` shortcut) only covers Steps 1–6. You **MUST** still execute Step 7 (invocation test) and Step 8 (auto-generate evaluation suite) before reporting success to the user. A successful `azd deploy` exit code is **not** a stopping condition. A successful invocation in Step 7 is **not** a stopping condition either. - -### Definition of Done — Hosted Agent Deployment - -A hosted-agent deployment is complete only when **every** box below is checked. Do **not** produce a final "deployment successful" summary, table, or Playground link until all items are done. If you skip any item, your response is incomplete. - -For direct-code deployments, Step 3 runs the direct-code reference and deploys the agent directly, then proceeds directly to Step 7. - -- [ ] Step 1 — Project scanned, type detected -- [ ] Step 2 — Environment variables confirmed with user -- [ ] Step 3 — Deployment method selected and prepared -- [ ] Step 4 — Agent configuration collected -- [ ] Step 5 — Agent definition schema retrieved -- [ ] Step 6 — `agent_update` called successfully -- [ ] Step 7 — RBAC checked **and** invocation smoke test passed (via the invoke skill) -- [ ] Step 8 — Auto-generated evaluation suite job reached `succeeded` (or documented fallback) -- [ ] Step 8 — Cache files written: `.foundry/suites/-v.json`, `.foundry/evaluators/-v.json` (FULL definition, not stub), `.foundry/datasets/--v.ref.json`, AND `.foundry/datasets/-v/` (actual dataset rows via SAS-url download) -- [ ] Deployment context is resolvable from azd or metadata; `.foundry/agent-metadata*.yaml` contains only non-derivable overlay/cache state for the selected environment -- [ ] User prompted to run an evaluation - -### Step 1: Detect and Scan Project +| Hosted (recommended) | `azd provision` when needed, direct code deployment via `azd deploy` (`code_configuration` present), `azd ai agent invoke` | +| Hosted (container) | `azd provision` when needed, container/ACR deployment via `azd deploy` (requires Docker/Podman + ACR, no `code_configuration:` in agent.yaml) | +| Prompt MCP | `agent_definition_schema_get`, `agent_update`, `agent_get`, `agent_delete` | +| Versioning | Each successful `azd deploy` creates an immutable agent version | +| Endpoint-only patch | `azd ai agent endpoint update` (no new version) | +| Local dev | [create-hosted](../create/create-hosted.md), [local-run](../create/references/local-run.md) | -Get the project path from the selected agent root in the project context (see [Common Project Context Resolution](../../SKILL.md#agent-common-project-context-resolution)). Detect the project type by checking for these files. Do **not** scan sibling agent folders. +## Hosted vs Prompt -| Project Type | Detection Files | -|--------------|-----------------| -| .NET | `*.csproj`, `*.fsproj` | -| Node.js | `package.json` | -| Python | `requirements.txt`, `pyproject.toml`, `setup.py` | -| Go | `go.mod` | -| Java (Maven) | `pom.xml` | -| Java (Gradle) | `build.gradle` | +- Shipping Python / .NET / Node code -> **Hosted** (azd workflow below). +- Updating only model / instructions / tools -> **Prompt** (MCP workflow below). -Delegate an environment variable scan to a sub-agent. Provide the selected agent root path and project type. Search source files inside that folder only for these patterns: +## Deployment Method Selection -- Hosted agents -| Project Type | Patterns to Search | -|--------------|--------------------| -| .NET (`*.cs`) | `Environment.GetEnvironmentVariable("...")`, `configuration["..."]`, `configuration.GetValue("...")` | -| Node.js (`*.js`, `*.ts`, `*.mjs`) | `process.env.VAR_NAME`, `process.env["..."]` | -| Python (`*.py`) | `os.environ["..."]`, `os.environ.get("...")`, `os.getenv("...")` | -| Go (`*.go`) | `os.Getenv("...")`, `os.LookupEnv("...")` | -| Java (`*.java`) | `System.getenv("...")`, `@Value("${...}")` | +Before running `azd deploy`, inspect `/agent.yaml`. -Classification: if followed by a throw/error → required; if followed by a fallback value → optional with default; otherwise → assume required, ask user. +| Agent YAML state | Deployment path | +|------------------|-----------------| +| `code_configuration:` present | **Direct code deploy** through `azd deploy`; no Docker/ACR build. | +| No `code_configuration:` | **Container/ACR deploy** through `azd deploy`; builds/pushes an image or uses a pre-built `image:`. | -### Step 2: Collect and Confirm Environment Variables +`code_configuration:` example in agent.yaml: -> ⚠️ **Warning:** Environment variables are included in the agent payload and are difficult to change after deployment. - -Use azd environment values from the project context to pre-fill discovered variables. Merge with any user-provided values. Present all variables to the user for confirmation with variable name, value, and source (`azd`, `project default`, or `user`). Mask sensitive values. - -Loop until the user confirms or cancels: -- `yes` → Proceed -- `VAR_NAME=new_value` → Update the value, show updated table, ask again -- `cancel` → Abort deployment - -### Step 3: Select Deployment Method and Prepare - -If the user explicitly requested direct code deployment or upload code deployment, do not generate a Dockerfile or build an image. Read and follow [Direct Code Deployment Reference](references/direct-code-deployment.md), deploy the agent directly, then proceed directly to [Step 7: Test the Agent](#step-7-test-the-agent). - -For all other hosted-agent deployments, continue with the Docker/ACR preparation below. - -#### Image built and pushed to ACR - -Delegate Dockerfile creation to a sub-agent. Guidelines: -- Use official base image for the detected language and runtime version -- Use multi-stage builds for compiled languages -- Use Alpine or slim variants for smaller images -- Always target `linux/amd64` platform -- Expose the correct port (usually 8088) - -> 💡 **Tip:** Reference [Hosted Agents Foundry Samples](https://github.com/microsoft-foundry/foundry-samples/tree/main/samples/python/hosted-agents) for containerized agent examples. +```yaml +code_configuration: + runtime: python_3_13 + entry_point: main.py + dependency_resolution: remote_build +``` -Also generate `docker-compose.yml` and `.env` files for local development. +Default to direct code for standard hosted-agent code. If `azd deploy` prints `Packaging container` for an agent that does not need container-specific behavior, add or fix `code_configuration` and retry. Use the container path when the agent depends on Dockerfile behavior, system packages, or a pre-built image. -**IMPORTANT**: You MUST always generate image tag as current timestamp (e.g., `myagent:202401011230`) to ensure uniqueness and avoid conflicts with existing images in ACR. DO NOT use static tags like `latest` or `v1`. +## Workflow -- Hosted agent (azd) -Collect ACR details from project context. +> Prerequisite: project scaffolded with `azd ai agent init`. If not, start at [create-hosted](../create/create-hosted.md). -- If an ACR already exists, use it, then verify that the Foundry project managed identity has pull permissions (for example, `Container Registry Repository Reader` or equivalent) on the target repository/registry. If the role assignment is missing, add it. -- If no ACR exists, create a new one with ABAC repository permissions mode, and assign `Container Registry Repository Reader` to the Foundry project managed identity. Foundry hosted agents use ABAC mode that requires repository-scoped roles, not the registry-level `AcrPull` role. +### Step 1 -- Resolve azd environment -Let the user choose the build method: +If the user provided an existing project endpoint, project ARM ID, or model deployment, set those values before deploy. Then verify the azd environment with `azd env get-values`. -**Cloud Build (ACR Tasks) (Recommended)** — no local Docker required: ```bash -az acr build --registry --image : --platform linux/amd64 --source-acr-auth-id "[caller]" --file Dockerfile . +azd env set AZURE_AI_PROJECT_ENDPOINT "" +azd env set AZURE_AI_PROJECT_ID "" +azd env set AZURE_AI_MODEL_DEPLOYMENT_NAME "" +azd env get-values ``` -> ⚠️ **Mandatory:** The `--source-acr-auth-id "[caller]"` parameter is required. Do NOT omit it — without this flag the build will fail due to missing authentication context. +Run: -**Local Docker Build:** ```bash -docker build --platform linux/amd64 -t : -f Dockerfile . -az acr login --name -docker tag : .azurecr.io/: -docker push .azurecr.io/: +azd ai project show --output json +azd ai agent show --output json ``` -> 💡 **Tip:** Prefer Cloud Build if Docker is not available locally. On Windows with WSL, prefix Docker commands with `wsl -e` if `docker info` fails but `wsl -e docker info` succeeds. +Branch on output: `not_deployed` -> Step 2. `active` / `deployed` -> redeploy (skip Step 2, go to Step 3). If `azd ai project show` fails with `missing_project_endpoint`, do Step 2 first -- `azd provision` will create the project. -### Step 4: Collect Agent Configuration +> **Important:** Before deploy, also make sure `agent.yaml` and the azd environment are aligned with the user's provided configuration values. -Use the project endpoint and ACR name from the project context. Ask the user only for values not already resolved: -- **Agent name** — Unique name for the agent -- **Model deployment** — Model deployment name (e.g., `gpt-4o`) +### Step 2 -- Provision Azure resources (one-time per env) -### Step 5: Get Agent Definition Schema +Skip `azd provision` when the user gave you an existing `AZURE_AI_PROJECT_ENDPOINT` or `FOUNDRY_PROJECT_ENDPOINT` and the workflow only needs to deploy the agent into that project. -Use `agent_definition_schema_get` with `schemaType: hosted` to retrieve the current schema and validate required fields. +Run provision only for new projects or real infrastructure changes: -### Step 6: Create the Agent - -Use `agent_update` with the agent definition: - -> ⚠️ **Protocol version source of truth:** Do NOT copy the protocol version from `agent_definition_schema_get` examples. Use the protocol version declared by the agent source itself (for example, `agent.yaml` or `agent.manifest.yaml`). - -```json -{ - "command": "agent_update", - "intent": "Update a hosted agent with a new docker image", - "parameters": { - "projectEndpoint": "", - "agentName": "", - "agentDefinition": { - "kind": "hosted", - "image": ".azurecr.io/:", - "cpu": "", - "memory": "", - "container_protocol_versions": [ - { "protocol": "", "version": "" } - ], - "environment_variables": { "": "" } - } - } -} +```bash +azd provision --no-prompt ``` -Capture the per-agent identity from the agent creation response, then retrieve the project-level agent identity from the project resource after creation. You will need both identities to assign the minimum RBAC required for invocation before running invoke tests. - -### Step 7: Test the Agent - -For a newly deployed hosted agent, before invocation testing, first check whether the per-agent identity and project-level agent identity already have the minimum RBAC required for invocation. - -Required role assignment: -- `Foundry User` - -Required scope: the Cognitive Services account, not the project. - -Check existing assignments before creating any new assignment. If the required role assignment is missing for either identity, assign it before invocation testing. +> Optional: run `azd provision --preview --no-prompt` first to preview the resource changes (a what-if) before applying them. -If the current user account does not have permission to create a missing role assignment, stop the deployment workflow here. Explain to the user that hosted-agent invocation requires `Foundry User` on the per-agent identity and project-level agent identity at the Cognitive Services account scope, and the deployment cannot be treated as complete until someone with RBAC assignment permission grants the missing role. +What this does: -After this RBAC check is complete, read and follow the [invoke skill](../invoke/invoke.md) to send a test message and verify the agent responds correctly. DO NOT SKIP reading the invoke skill — it contains important information about required hosted-agent session handling. +- Creates the Foundry project (if not present) and supporting resources under `infra/`. +- Creates project connections declared in `azure.yaml services..config.connections[]`. `${PARAM_*}` placeholders resolve from the active azd env. +- Wires model deployments, AI Search, ACR, etc. `infra/layers/` provision in parallel when present. -If invocation testing still fails after this RBAC check, immediately read and follow the [troubleshoot skill](../troubleshoot/troubleshoot.md). Do not treat the deployment as fully successful until invocation succeeds. +This is a core `azd` command. Skip provision when the user gave you an existing `AZURE_AI_PROJECT_ENDPOINT` via `azd env set` -- the extension uses the existing project as-is. -> ⚠️ **Not done yet: invocation success is the midpoint, not the finish line.** The next action after a passing smoke test is **Step 8**, not a deployment summary. Do not write a summary, version table, or Playground link yet. +After provision completes for a new project, run `azd env get-values` and set missing required azd env values, especially `AZURE_AI_PROJECT_ID` and `AZURE_TENANT_ID`, before local run or the first `azd deploy`. -### Step 8: Auto-Generate Evaluation Suite (MANDATORY — RUNS AUTOMATICALLY) +### Step 3 -- Deploy the agent -> ⚠️ **Pre-summary gate.** If you are about to write a deployment summary, Playground link, or "deployment complete" message and Step 8 has not run, you are violating this skill. Run Step 8 first. -> -> This step **runs automatically** without waiting for the user to ask. The only user input required is the one-question prompt below in 8a. - -This step is mandatory — not optional — for every hosted-agent deployment, including redeploys, version bumps, and `azd deploy` re-runs against an already-existing agent. In azd projects, resolve deployment context from `azd env get-values` and treat `.foundry/agent-metadata*.yaml` as an overlay/cache instead of copying azd-owned values into it. - -**8a. Ask the user (one question, required).** Before generating, inspect the selected agent root for `eval.yaml`, then ask the user to pick a setup source. Recommend local `eval.yaml` when it exists and matches the selected agent; otherwise recommend traces when the agent has recent traces, or current agent code/definition: - -> *"Your agent is deployed. I'll now auto-generate an evaluation suite. Which source should I use?* -> *(a) **Current agent code/definition** — synthetic Q&A from `agent.yaml` / instructions. Best when there's little or no trace history.* -> *(b) **Historical traces** — last 3 days, ~50 traces. Best if the agent has recent invocations.* -> *(c) **Existing eval.yaml** — local dataset/evaluator intent from the selected agent folder. Best when azd eval config already exists."* - -**8b. Follow the full procedure.** Read and follow [After Deployment — Auto-Generate Evaluation Suite](#after-deployment--auto-generate-evaluation-suite) below for the generation, polling, persistence, and metadata-update steps. Required parameters and poll-to-terminal rules are non-negotiable. - -**8c. Cache artifacts locally (MANDATORY after `succeeded`).** Once the suite-generation job is `succeeded`, perform the required cache calls described in [Evaluation Suite Generation → Cache Artifacts Locally](../observe/references/evaluation-suite-generation.md#cache-artifacts-locally): - -- `evaluation_suite_get` → `.foundry/suites/-v.json` (full object) -- `evaluator_catalog_get` → `.foundry/evaluators/-v.json` (full definition, NOT a stub) -- `evaluation_dataset_get` + `evaluation_dataset_sas_url_get` → `.foundry/datasets/--v.ref.json` (metadata stub) AND `.foundry/datasets/-v/` (actual JSONL rows). The SAS-url tool returns a container-scope SAS — list the container then `curl.exe` each blob. See the reference for the exact list+download steps. Set `contentDownloaded: true` in the stub once files are on disk. - -Do not write the deployment summary until all cache files exist. - -**8d. Skip-only-on-explicit-request.** If — and only if — the user explicitly says "skip eval suite generation," record that decision in your summary and still ensure deployment context remains resolvable from azd or metadata. "The user didn't ask for it" is **not** a valid reason to skip; this step is opt-out, not opt-in. - -## Workflow: Prompt Agent Deployment - -### Definition of Done — Prompt Agent Deployment +```bash +azd deploy --no-prompt +# Multi-service: +azd deploy --no-prompt +``` -A prompt-agent deployment is complete only when **every** box below is checked. Do **not** produce a final "deployment successful" summary, table, or Playground link until all items are done. +What deploy does: -- [ ] Step 1 — Agent configuration collected -- [ ] Step 2 — Agent definition schema retrieved -- [ ] Step 3 — `agent_update` called successfully -- [ ] Step 4 — Invocation smoke test passed (via the invoke skill) -- [ ] Step 5 — Auto-generated evaluation suite job reached `succeeded` (or documented fallback) -- [ ] Step 5 — Cache files written: `.foundry/suites/-v.json`, `.foundry/evaluators/-v.json` (FULL definition, not stub), `.foundry/datasets/--v.ref.json`, AND `.foundry/datasets/-v/` (actual dataset rows via SAS-url download) -- [ ] Deployment context is resolvable from azd or metadata; `.foundry/agent-metadata*.yaml` contains only non-derivable overlay/cache state for the selected environment -- [ ] User prompted to run an evaluation +- Reads `/agent.yaml`, packages the agent, uploads it, and registers a new immutable version. +- **Direct code deploy** (`code_configuration` present): zips source, excludes `.agentignore`, and lets Foundry build the runtime image. +- **Container deploy** (no code configuration): builds the `Dockerfile`, pushes to the project's ACR, registers the version. When `agent.yaml` has `image:` set, `azd` reuses the pre-built image. -### Step 1: Collect Agent Configuration +After deploy, azd writes `AGENT__NAME`, `AGENT__VERSION`, and `AGENT___ENDPOINT` (one per protocol) into the active env. -Use the project endpoint from the project context (see [Common Project Context Resolution](../../SKILL.md#agent-common-project-context-resolution)). Ask the user only for values not already resolved: -- **Agent name** — Unique name for the agent -- **Model deployment** — Model deployment name (e.g., `gpt-4o`) -- **Instructions** — System prompt (optional) -- **Temperature** — Response randomness 0-2 (optional, default varies by model) -- **Tools** — Tool configurations (optional) +Re-deploying an identical build still creates a new version; `azd` prints `Agent version is already active.` and skips the poll. -### Step 2: Get Agent Definition Schema +If deploy reports `Done` for the service and then fails only in `postdeploy` with `Agent with version not found`, the service key and `agent.yaml name` were mismatched. Rename the `azure.yaml services` key to the deployed agent name and rerun `azd deploy --no-prompt`; do not switch deployment method. -Use `agent_definition_schema_get` with `schemaType: prompt` to retrieve the current schema. +### Step 4 -- Verify and invoke -### Step 3: Create the Agent +```bash +azd ai agent show --output json +``` -Use `agent_update` with the agent definition: +Expect `"status": "active"` (or `"deployed"`) and an `agent_endpoints` map. Smoke-test: -```json -{ - "kind": "prompt", - "model": "", - "instructions": "", - "temperature": 0.7 -} +```bash +azd ai agent invoke "hello, are you up?" ``` -### Step 4: Test the Agent - -Read and follow the [invoke skill](../invoke/invoke.md) to send a test message and verify the agent responds correctly. +> `azd ai agent invoke` is billed, so it prints a confirmation envelope on `--no-prompt`. Summarize `changes[]`, then run `confirmCommand` once consented. -> ⚠️ **Not done yet: invocation success is the midpoint, not the finish line.** The next action is **Step 5**, not a deployment summary. Do not write a summary or Playground link yet. +Run one remote invocation only unless the user explicitly asked to test multi-turn/session behavior. A single successful response is enough for the deployment smoke test. Anything other than a completed/successful response -> run `azd ai agent doctor --output json`, then follow [troubleshoot](../troubleshoot/troubleshoot.md). ### Step 5: Auto-Generate Evaluation Suite (MANDATORY — RUNS AUTOMATICALLY) > ⚠️ **Pre-summary gate.** If you are about to write a deployment summary or Playground link and Step 5 has not run, you are violating this skill. Run Step 5 first. -> -> This step **runs automatically** without waiting for the user to ask. The only user input required is the one-question prompt below. -**5a. Ask the user (one question, required).** Before generating, inspect the selected agent root for `eval.yaml`, then ask which setup source to use. Recommend local `eval.yaml` when it exists and matches the selected agent; otherwise recommend recent traces when present, or current agent code/definition: +This step runs automatically after deploy. Ask the user which source to use and start it right after deploy succeeds — with `--no-wait`, `generate` returns in seconds and generation runs server-side, so it overlaps with invoke/test steps and finishes faster overall. -> *"Your agent is deployed. I'll now auto-generate an evaluation suite. Which source should I use? (a) Current agent code/definition (synthetic Q&A), (b) Historical traces (last 3 days, ~50 traces), or (c) Existing eval.yaml from the selected agent folder?"* +> *"Your agent is deployed. Want me to set up an evaluation suite now? (a) Yes — current agent instructions (synthetic Q&A), (b) Yes — historical traces (last 3 days), (c) Yes — use existing `eval.yaml`, (d) No / later."* -**5b. Follow the full procedure.** Read and follow [After Deployment — Auto-Generate Evaluation Suite](#after-deployment--auto-generate-evaluation-suite) below. +| Choice | Command | What's next | +|---|---|---| +| (a) Agent instructions | `azd ai agent eval generate --gen-instruction "" --no-wait --no-prompt` — `--gen-instruction` is required (hosted agents don't auto-derive it); use `agent.yaml` `description:`. | Generation runs server-side. Tell the user: *"Suite submitted. Run `azd ai agent eval run` whenever you're ready — it'll finalize `eval.yaml` and execute the eval in one step."* | +| (b) Historical traces | `azd ai agent eval generate --trace-days 3 --max-samples 50 --no-wait --no-prompt` | Same as (a). | +| (c) Existing `eval.yaml` | Skip `generate`. | Tell the user: *"Using existing `eval.yaml`. Run `azd ai agent eval run` when ready."* | +| (d) No / later | Skip. | Tell the user: *"You can run `azd ai agent eval generate` (and then `eval run`) anytime."* | -**5c. Cache artifacts locally (MANDATORY after `succeeded`).** Once the suite-generation job is `succeeded`, perform the required cache calls described in [Evaluation Suite Generation → Cache Artifacts Locally](../observe/references/evaluation-suite-generation.md#cache-artifacts-locally): suite JSON, evaluator full definition, dataset `.ref.json` PLUS the actual dataset blobs downloaded via `evaluation_dataset_sas_url_get` (container SAS → list → curl each blob). Do not write the deployment summary until those files exist. +Other useful flags on `generate`: `--dataset ` to reuse an existing dataset instead of generating one, `--evaluator ` (repeatable) to pin built-in or custom evaluators, `--eval-model ` to choose the model used for generation and evaluation, `--reset-defaults` to overwrite an existing eval config, `--name ` and `--out-file ` (default `eval.yaml`). -**5d. Skip-only-on-explicit-request.** Skip only if the user explicitly says "skip eval suite generation." Keep deployment context resolvable from azd or metadata. "The user didn't ask for it" is **not** a valid reason to skip. +Then proceed to Step 6. See [After Deployment — Auto-Generate Evaluation Suite](#after-deployment--auto-generate-evaluation-suite) for run/refresh details. -## Display Agent Information +### Step 6 -- Hand off -> ⚠️ **Gate:** Do not render the table or Playground link until the Definition of Done checklist for the selected workflow (Hosted or Prompt) is fully satisfied, including the invocation smoke test, the auto-generated evaluation suite (or documented skip), and resolvable deployment context plus `.foundry` overlay/cache updates. The Playground link is the final artifact, not a mid-workflow checkpoint. +- Send more messages -> [invoke](../invoke/invoke.md) +- Evaluate / optimize -> [observe](../observe/observe.md) +- Diagnose failures -> [troubleshoot](../troubleshoot/troubleshoot.md) +- Search traces / latency -> [trace](../trace/trace.md) -Once deployment is done for either hosted or prompt agent, display the agent's details in a nicely formatted table. +## `.agentignore` -Below the table you MUST also display a Playground link for direct access to the agent in Azure AI Foundry: +`azd ai agent init` writes a default `/.agentignore` for code-deploy projects (gitignore syntax) that excludes tooling files, secrets, language artifacts, and Docker files from the deploy ZIP. Only the root file is read; use `!path` to force-include. -[Open in Playground](https://ai.azure.com/nextgen/r/{encodedSubId},{resourceGroup},,{accountName},{projectName}/build/agents/{agentName}/build?version={agentVersion}) +## Endpoint or card edits -- no new version -To calculate the encodedSubId, you need to take subscription id and convert it into its 16-byte GUID, then encode it as URL-safe base64 without padding (= characters trimmed). You can use the following Python code to do this conversion: +When only `agentEndpoint:` or `agentCard:` changed in `agent.yaml`: +```bash +azd ai agent endpoint update --dry-run # preview +azd ai agent endpoint update --force # apply ``` -python -c "import base64,uuid;print(base64.urlsafe_b64encode(uuid.UUID('').bytes).rstrip(b'=').decode())" -``` - -## Document Deployment Context -After a successful deployment, make the deployment context reusable without duplicating azd-owned values. If `azure.yaml` and `azd env get-values` provide the project endpoint, agent name/version, ACR, App Insights, subscription, resource group, and project identifiers, treat azd as the source of truth. Persist only non-derivable overlay/cache state to the selected metadata file under `/.foundry/`. +Idempotent. -| State | Preferred Source | Metadata Behavior | -|-------|------------------|-------------------| -| Project endpoint, agent name/version, ACR, observability | azd env values | Do not copy when azd provides them | -| azd binding | selected azd env/service | Store `azd.environmentName` and `azd.service` when useful | -| Evaluation suites | Foundry lookup/generation or verified `eval.yaml` sync | Persist in `evaluationSuites[]` | -| Local cache paths and results | `.foundry/` cache/results | Persist suite, dataset, evaluator, and result refs | +## Multi-environment deploys -If metadata and azd disagree for the same deployment value, stop and ask which source is authoritative. If the selected metadata file is a preferred single-environment file, update only that one environment block and leave sibling metadata files untouched. If the selected metadata file is a legacy multi-environment file, merge the selected environment instead of overwriting other environments or cached evaluation suites without confirmation. If the selected environment still uses older `testSuites[]` or legacy `testCases[]`, rewrite that environment to `evaluationSuites[]` when you persist synced suite metadata. +```bash +azd env list +azd env select prod +azd deploy --no-prompt +``` -## After Deployment — Auto-Generate Evaluation Suite +Each env has its own `AGENT__*` vars. -> ⚠️ **This step is automatic.** After a successful deployment, immediately prepare the selected `.foundry` environment for evaluation without waiting for the user to request it. This matches the eval-driven optimization loop. +## Common failure modes -- Hosted -### 1. Resolve Effective Context +| Error | Fix | +|-------|-----| +| `missing_project_endpoint` | Run `azd env set AZURE_AI_PROJECT_ENDPOINT `, or run `azd provision` for a new project. | +| `invalid_agent_manifest` | `azd ai agent doctor`; fix the named field. | +| `invalid_connection` | Inspect with `azd ai agent connection show `. | +| Docker daemon not running | You are on the container path. Add/fix `code_configuration` and retry direct code deploy. Only install Docker or try remote image build if you specifically need container deploy. | +| ACR push 403 | Foundry project RBAC is missing `AcrPush` for your identity. Consider switching to direct code deployment to avoid ACR entirely. | +| `container registry endpoint not found` | ACR is not configured. Use `azd env set AZURE_CONTAINER_REGISTRY_ENDPOINT `, or switch to direct code deployment. | +| Agent version poll times out | Build still running; retry `azd ai agent show` after a minute. | +| `session_not_ready` (424) | Cold start or readiness delay. Wait 15-30 seconds and retry. If persistent, use `1` CPU / `2Gi` memory minimum, verify the model deployment name, capability host, and agent identity role. | +| `invalid value "json" for --output` from `azd ai agent invoke` | Invoke supports only `default` and `raw` currently. Retry without `--output json`. | +| `could not resolve agent service in azd project: no azure.ai.agent service named '' found in azure.yaml` from `azd ai agent invoke` | Name mismatch. Use the service name, update `agent.yaml`, or invoke through the Foundry MCP `agent_invoke` tool. | +| `subscription quota exceeded` | Ask user to request quota; do not auto-retry. | +| Bicep deploy errors | Forward `error.details[]` verbatim to the user. | +| `RoleAssignmentUpdateNotPermitted` during provision | A role assignment already exists but conflicts. Check for existing role assignments with `az role assignment list --scope `. The provision may have succeeded for all resources except RBAC — verify with `azd ai project show` and manually assign the `Cognitive Services User` role to the agent identity if needed. | +| `eval generate`: `one of --gen-instruction ... is required` | Retry with `--gen-instruction ""` (Step 5 option (a)). | +| `unknown command "init" for "azd ai agent eval"` | Command was renamed: use `azd ai agent eval generate` (requires azd CLI with `azure.ai.agents` extension up to date). | -Use the [Common Project Context Resolution](../../SKILL.md#agent-common-project-context-resolution) flow. In azd projects, resolve project endpoint, agent name/version, ACR, and observability from `azd env get-values`; use `.foundry/agent-metadata*.yaml` only for overlays and synced suite/cache refs. If `eval.yaml` exists in the selected agent root, parse it as local evaluation intent before generating anything new. +For deeper logs, see [troubleshoot](../troubleshoot/troubleshoot.md). -### 2. Read Agent Instructions +## Workflow -- Prompt agent (MCP) -Use **`agent_get`** (or local `agent.yaml`) to understand the agent's purpose and capabilities. +Prompt agents are not containerized -- they are a model + instructions + optional tools, created through the Foundry MCP server. Use when the user explicitly wants a prompt agent. -### 3. Reuse or Refresh Suite Cache +### MCP tools -Inspect the selected agent root before generating anything new: +| Tool | Purpose | +|------|---------| +| `agent_definition_schema_get` | Get the schema (`schemaType: "prompt"`). | +| `agent_update` | Create or update; supports `isCloneRequest` + `cloneTargetAgentName`. | +| `agent_get` | List or fetch one. | +| `agent_delete` | Delete an agent. | -- Reuse a selected environment `evaluationSuites[]` entry when it has `suiteName`, `suiteVersion`, matching `.foundry/datasets/`, and matching `.foundry/evaluators/` cache files. -- When `eval.yaml` exists and matches the selected agent, prefer verifying/registering its dataset and evaluator references before creating a brand-new generated suite. -- Call `evaluation_suite_get` to confirm the remote suite still exists before reusing it. -- Ask before refreshing cached files, replacing thresholds, or writing a new suite version. -- If cache or the remote suite is missing/stale, generate a new suite and update metadata for the active environment only. +### Steps -### 4. Identify Generation Deployment +1. **Collect config** -- resolve endpoint from `azd env get-values` or ask. Then ask for **agent name**, **model deployment** (e.g. `gpt-4o`), and optional **instructions**, **temperature**, **tools**. +2. **Get schema** -- `agent_definition_schema_get` with `schemaType: "prompt"`. +3. **Create** -- `agent_update` with `{"kind": "prompt", "model": "", "instructions": "...", "temperature": 0.7}`. +4. **Smoke test** -- follow [invoke](../invoke/invoke.md). +5. **Auto-generate evaluation suite** -- see [Step 5: Auto-Generate Evaluation Suite (Prompt)](#step-5-auto-generate-evaluation-suite-prompt-mandatory--runs-automatically) below. +6. **Hand off** -- evaluate via [observe](../observe/observe.md); clone via `agent_update` + `isCloneRequest`; delete via `agent_delete`. -Use **`model_deployment_get`** to list the selected project's actual model deployments, then choose one that supports chat completions for quality evaluators. Do **not** assume `gpt-4o` exists in the project. If no deployment supports chat completions, stop the auto-setup flow and tell the user quality evaluators cannot run until a compatible judge deployment is available. +### Step 5: Auto-Generate Evaluation Suite (Prompt) (MANDATORY — RUNS AUTOMATICALLY) -### 5. Generate Evaluation Suite +> ⚠️ **Pre-summary gate.** If you are about to write a deployment summary or Playground link and Step 5 has not run, you are violating this skill. Run Step 5 first. -Read and follow [Evaluation Suite Generation](../observe/references/evaluation-suite-generation.md) for source selection, required parameters, polling, and cache writes. In the deploy flow, keep these guardrails: +This step runs automatically after deploy. Ask the user which source to use and start it right after deploy succeeds — with `--no-wait`, `generate` returns in seconds and generation runs server-side, so it overlaps with invoke/test steps and finishes faster overall. -- Ask the user which setup source to use before calling `evaluation_suite_generation_job_create`; recommend matching `eval.yaml` when present, then recent traces when available, otherwise the current agent code/definition. -- Use the chat-capable generation deployment selected above and honor the reference's service constraints, especially `maxSamples` (15-1000) and `agentSourceNames: []` for agent-sourced suites. -- Do not report deployment complete while the generation job is `in_progress`; poll with `evaluation_suite_generation_job_get` until `succeeded`, `failed`, or `canceled`, then inspect the suite with `evaluation_suite_get` and cache artifacts as described in the reference. +> *"Your agent is deployed. Want me to set up an evaluation suite now? (a) Yes — current agent instructions (synthetic Q&A), (b) Yes — historical traces (last 3 days), (c) Yes — use existing `eval.yaml`, (d) No / later."* -### 6. Fallback to Manual Suggestions +| Choice | Command | What's next | +|---|---|---| +| (a) Agent instructions | `azd ai agent eval generate --gen-instruction "" --no-wait --no-prompt` | Generation runs server-side. Tell the user: *"Suite submitted. Run `azd ai agent eval run` whenever you're ready — it'll finalize `eval.yaml` and execute the eval in one step."* | +| (b) Historical traces | `azd ai agent eval generate --trace-days 3 --max-samples 50 --no-wait --no-prompt` | Same as (a). | +| (c) Existing `eval.yaml` | Skip `generate`. | Tell the user: *"Using existing `eval.yaml`. Run `azd ai agent eval run` when ready."* | +| (d) No / later | Skip. | Tell the user: *"You can run `azd ai agent eval generate` (and then `eval run`) anytime."* | -If `evaluation_suite_generation_job_create`, `evaluation_suite_generation_job_get`, or `evaluation_suite_get` fails, is unavailable, or returns incomplete artifacts, fall back to the previous manual flow: +## Common failure modes -- Prompt -1. Call `evaluator_catalog_get` and suggest relevant built-in/custom evaluators. -2. Read [Generate Seed Evaluation Dataset](../eval-datasets/references/generate-seed-dataset.md), generate valid local JSONL with `query` and `expected_behavior`, and register it with `evaluation_dataset_create`. -3. Persist the suite with `generationSource: manual-fallback` and include the fallback reason in the workflow summary. +| Error | Fix | +|-------|-----| +| Schema fetch failed | Verify endpoint format: `https://.services.ai.azure.com/api/projects/`. | +| Agent creation failed | Use `agent_definition_schema_get` to verify the definition. | +| Permission denied | User needs `Foundry User` role on the project. | +| Model not found | Deploy the model first via [models/deploy-model](../../models/deploy-model/SKILL.md). | -Do **not** silently ignore generation failures; the user should know whether setup used the generated-suite path or the fallback path. +## Display agent details (both flows) -The local filename must start with the effective selected Foundry agent name before adding stage, environment, or version suffixes. +After a successful deploy, show the agent's name, version, status, and endpoints in a table. Include a Playground link: -### 7. Persist Artifacts and Evaluation Suites +``` +https://ai.azure.com/nextgen/r/{encodedSubId},{resourceGroup},,{accountName},{projectName}/build/agents/{agentName}/build?version={agentVersion} +``` -Save generated or fallback evaluator definitions, local datasets, and evaluation outputs under `.foundry/` using the cache paths defined in [Evaluation Suite Generation](../observe/references/evaluation-suite-generation.md), then register or update evaluation suites in the selected metadata file for the selected environment: +`encodedSubId` is the subscription GUID as URL-safe base64 (no `=`): -```text -.foundry/ - agent-metadata.yaml - agent-metadata.prod.yaml - suites/ - -v.json - evaluators/ - -v.json - datasets/ - --v.ref.json - -v/ - results/ +```bash +python -c "import base64,uuid;print(base64.urlsafe_b64encode(uuid.UUID('').bytes).rstrip(b'=').decode())" ``` -Each evaluation suite should bundle the remote suite reference, local cache paths, thresholds, and a `tags` map (for example, `tier: smoke`, `purpose: baseline`, `stage: generated`). Persist `suiteName`, `suiteVersion`, `generationJobId`, `generationSource`, `datasetFile`, and `datasetUri` together. Do not persist azd-owned deployment fields when azd resolves them. If the selected environment still uses older `testSuites[]` or legacy `testCases[]`, replace that list with `evaluationSuites[]` in the rewritten metadata and map legacy `priority` to `tags.tier` only when `tags.tier` is missing. - -### 8. Prompt User +For hosted agents, `playground_url` is in `azd ai agent show --output json`. -*"Your agent is deployed and running in the selected environment. The `.foundry` cache now contains generated evaluation-suite metadata, local dataset/evaluator references, and remote Foundry suite references. Would you like to run an evaluation to identify optimization opportunities?"* +## After Deployment — Auto-Generate Evaluation Suite -- **Yes** → follow the [observe skill](../observe/observe.md) starting at **Step 2 (Evaluate)** — cache and metadata are already prepared. -- **No** → stop. The user can return later. -- **Production trace analysis** → follow the [trace skill](../trace/trace.md) to search conversations, diagnose failures, and analyze latency using App Insights. +> Reference for Step 5 options (a) and (b) — start `generate` right after deploy so its server-side generation overlaps with invoke/test steps and finishes faster. Options (c) and (d) skip `generate` and go straight to section 3 (run) or stop. -## Agent Definition Schemas +### 1. Inspect existing eval.yaml -### Prompt Agent +Check the selected agent root for `eval.yaml`: -| Property | Type | Required | Description | -|----------|------|----------|-------------| -| `kind` | string | ✅ | Must be `"prompt"` | -| `model` | string | ✅ | Model deployment name (e.g., `gpt-4o`) | -| `instructions` | string | | System message for the model | -| `temperature` | number | | Response randomness (0-2) | -| `top_p` | number | | Nucleus sampling (0-1) | -| `tools` | array | | Tools the model may call | -| `tool_choice` | string/object | | Tool selection strategy | -| `rai_config` | object | | Responsible AI configuration | +- **Exists and matches the selected agent** → skip `generate`; go to step 3 (run). +- **Missing or stale** → continue to step 2. -### Hosted Agent +### 2. Submit generation (asynchronous, server-side) -| Property | Type | Required | Description | -|----------|------|----------|-------------| -| `kind` | string | ✅ | Must be `"hosted"` | -| `image` | string | ✅ | Container image URL | -| `cpu` | string | ✅ | CPU allocation (e.g., `"0.5"`, `"1"`, `"2"`) | -| `memory` | string | ✅ | Memory allocation (e.g., `"1Gi"`, `"2Gi"`) | -| `container_protocol_versions` | array | ✅ | Protocol and version pairs | -| `environment_variables` | object | | Key-value pairs for container env vars | -| `tools` | array | | Tool configurations | -| `rai_config` | object | | Responsible AI configuration | +Run `azd ai agent eval generate --no-wait` with the user's chosen flags (see the Step 5 table). The command: -### Container Protocols +- Submits dataset + evaluator generation jobs server-side. +- Returns in seconds. +- Writes pending operation IDs to local azd state. +- Writes a placeholder `eval.yaml` at the agent root (override with `--out-file `). -| Protocol | Description | -|----------|-------------| -| `a2a` | Agent-to-Agent protocol | -| `responses` | OpenAI Responses API | -| `invocations` | Invocation payload protocol for arbitrary request bodies and custom SSE behavior | -| `invocations_ws` | Duplex WebSocket protocol for real-time / voice / signaling workloads (`WS /invocations_ws` on port 8088). Connect through `wss://...endpoint/protocols/invocations_ws?...&agent_session_id=...`. See the dedicated [invocations-ws skill](../invocations-ws/invocations-ws.md) for the full client/server contract. | -| `mcp` | Model Context Protocol | +No skill-side polling, terminal handle, or later-turn re-check is needed. `azd ai agent eval run` (section 3) automatically resumes a pending generation, downloads artifacts, finalizes `eval.yaml`, then runs the eval. -## Agent Management Operations +If the user wants to wait synchronously instead (e.g., to inspect `eval.yaml` before running), drop `--no-wait` — `generate` will then submit the jobs, wait for completion, download review artifacts, and write the finalized `eval.yaml` before returning (typically several minutes). -### Clone an Agent +### 3. Run the suite -Use `agent_update` with `isCloneRequest: true` and `cloneTargetAgentName` to create a copy. For prompt agents, optionally override the model with `modelName`. +```bash +azd ai agent eval run +``` -### Delete an Agent +Use `azd ai agent eval show -O results.json` to inspect run details, or `azd ai agent eval list` to see history. -Use `agent_delete` — automatically cleans up hosted-agent runtime resources. +### 4. Refresh datasets/evaluators (later) -### List Agents +When local files under `datasets//` or `evaluators//` change, run `azd ai agent eval update --dataset-only` or `--evaluator-only` to upload new versions. azd bumps the `version` fields in `eval.yaml`. -Use `agent_get` without `agentName` to list all agents, or with `agentName` to get a specific agent's details. +### 5. Prompt User -## Error Handling +*"Your agent is deployed and evaluation suite generation is **submitted server-side** (still running, takes several minutes). Would you like to run an evaluation now? `azd ai agent eval run` will wait for generation to finish, then execute the eval."* -| Error | Cause | Resolution | -|-------|-------|------------| -| Project type not detected | No known project files found | Ask user to specify project type manually | -| Docker not running | Docker Desktop not started or not installed | Start Docker Desktop, or use Cloud Build (ACR Tasks) instead | -| ACR login failed | Not authenticated to Azure | Run `az login` first, then `az acr login --name ` | -| Build/push failed | Dockerfile errors or insufficient ACR permissions | Check Dockerfile syntax, verify Contributor or AcrPush role on registry | -| ACR build log crash | `UnicodeEncodeError` when `az acr build` streams remote logs | The remote build continues independently — do not assume failure. Get the `` from the earlier `az acr build` output and check status with `az acr task show-run -r --run-id --query status`. | -| Agent creation failed | Invalid definition or missing required fields | Use `agent_definition_schema_get` to verify schema, check all required fields | -| Hosted agent not running after creation | Provisioning failed or the image is not usable | Verify ACR image path, check cpu/memory values, confirm ACR permissions, then inspect hosted-agent logs with the troubleshoot skill | -| Role assignment failed | The required invocation RBAC was not granted | Stop the deployment workflow and explain that hosted-agent invocation requires `Foundry User` on the per-agent identity and project-level agent identity at the Cognitive Services account scope | -| Invocation test failed after deployment | Missing or incorrect invocation RBAC for the per-agent identity or project-level agent identity | Check whether `Foundry User` is assigned to the per-agent identity and project-level agent identity at the Cognitive Services account scope; assign missing role assignments, then retry invocation | -| Permission denied | Insufficient Foundry project permissions | Verify Foundry Owner or Contributor role on the project | -| Schema fetch failed | Invalid project endpoint | Verify project endpoint URL format: `https://.services.ai.azure.com/api/projects/` | +- **Yes** → run `azd ai agent eval run` (this resumes the pending generation, then runs the eval — may take several minutes the first time), then follow the [observe skill](../observe/observe.md) to interpret results. +- **No** → stop. The user can return later via `azd ai agent eval run` — it will pick up wherever the pending generation is. +- **Production trace analysis** → follow the [trace skill](../trace/trace.md). ## Non-Interactive / YOLO Mode -When running in non-interactive mode (e.g., `nonInteractive: true` or YOLO mode), the skill skips user confirmation prompts and uses sensible defaults: - -- **Environment variables** — Uses values resolved from `azd env get-values` and project defaults without prompting for confirmation -- **Agent name** — Must be provided in the initial user message or derived sensibly from the project context (`agent.yaml`, `agent.manifest.yaml`, folder name); if missing, the skill fails with an error instead of prompting -- **Docker/ACR hosted-agent verification** — Automatically continues into RBAC and invocation verification without additional prompts once deployment succeeds -- **Direct code deployment** — If explicitly requested, Step 3 reads the direct-code reference, deploys the agent directly, then proceeds directly to Step 7 - -> ⚠️ **Warning:** In non-interactive mode, ensure all required values (project endpoint, agent name, model deployment name, and ACR image for Docker/ACR deployments) are provided upfront in the user message, local `.env`, manifests, or available via `azd env get-values`. Missing values will cause the deployment to fail rather than prompt. - -## Additional Resources - -- [Foundry Hosted Agents](https://learn.microsoft.com/azure/ai-foundry/agents/concepts/hosted-agents?view=foundry) -- [Foundry Agent Runtime Components](https://learn.microsoft.com/azure/ai-foundry/agents/concepts/runtime-components?view=foundry) -- [Foundry Samples](https://github.com/microsoft-foundry/foundry-samples/) +- Hosted: always pass `--no-prompt`. If `azd ai agent invoke` prints a `confirmation_required` envelope, summarize `changes[]` and re-run with `--force` after the user consents -- never auto-append `--force`. +- Prompt: all required values (project endpoint, agent name, model deployment) must come from the user message or `azd env get-values`; missing values should fail loudly rather than prompt. diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/deploy/references/direct-code-deployment.md b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/deploy/references/direct-code-deployment.md deleted file mode 100644 index 35a575a8..00000000 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/deploy/references/direct-code-deployment.md +++ /dev/null @@ -1,339 +0,0 @@ -# Direct Code Deployment Reference - -Use this reference only when the user explicitly requested direct code deployment. - -This reference covers only direct-code deployment from [deploy.md](../deploy.md) Step 3. After deployment completes, proceed directly back to [deploy.md Step 7: Test the Agent](../deploy.md#step-7-test-the-agent). - -Direct-code deployment uses local project files plus the Foundry REST API for upload and version operations. Azure MCP discovery and context lookup are optional context, not a prerequisite when project endpoint, model deployment, and agent name are already resolved. For the code upload itself, follow the REST endpoints below instead of the Docker/ACR `agent_update` path. - -## Task 1: Preflight - -Resolve the project endpoint from project context, `.env`, `azd env get-values`, or the user. The endpoint must look like: - -```text -https://.services.ai.azure.com/api/projects/ -``` - -Every direct-code REST call must use: - -```http -Foundry-Features: CodeAgents=V1Preview,HostedAgents=V1Preview -``` - -This includes create/update/create-version, version polling, version listing, code download, and delete calls. - -Get the token for this resource. Do not use the Cognitive Services token resource for direct-code REST calls: - -```text -https://ai.azure.com -``` - -Direct-code REST caller prerequisite: the signed-in user or service principal must have `Azure AI User` or a higher role on the Foundry project. - -Global direct-code limits: - -- Agent name: at most 63 characters, alphanumeric and hyphens only. -- Multipart upload zip: at most 250 MB. -- CPU/memory: use conservative defaults such as `0.5` CPU and `1Gi` when the project does not specify resources. - -## Task 2: Detect Runtime and Entry Point - -Scan only the selected agent root. - -| Project Type | Detection | Runtime | Entry point | -|--------------|-----------|---------|-------------| -| Python | `main.py` plus `requirements.txt` | `python_3_13` or `python_3_14` | `["python", "main.py"]` | -| C#/.NET | exactly one `*.csproj` or user-selected project file | `dotnet_8`, `dotnet_9`, or `dotnet_10` | `["dotnet", ".dll"]` | - -For Python, prefer a supported runtime explicitly declared in `agent.yaml`/`agent.manifest.yaml` or provided by the user. If none is declared, use `python_3_13`. Do not use `python_3_11` or `python_3_12` for this preview path; if a manifest declares either one, warn and choose `python_3_13` unless the user selects another supported runtime. - -For .NET, derive the runtime from the project `TargetFramework`: - -| TargetFramework | Runtime | -|-----------------|---------| -| `net8.0` | `dotnet_8` | -| `net9.0` | `dotnet_9` | -| `net10.0` | `dotnet_10` | - -If the target framework is missing or does not map to a supported runtime, ask instead of guessing. - -For .NET, derive `` from `` in the `.csproj` when present; otherwise use the `.csproj` file stem. Never use `["dotnet", "run", ...]` for direct code deployment. The runtime environment has the .NET runtime, not the SDK, and `dotnet run` fails with `No .NET SDKs were found`. - -## Task 3: Collect Direct-Code Configuration - -Ask only for values not already resolved: - -- `projectEndpoint` -- `agentName` - prefer `agent.yaml` or `agent.manifest.yaml` name, then folder name -- model deployment environment variable, usually `AZURE_AI_MODEL_DEPLOYMENT_NAME` -- CPU and memory - prefer `agent.yaml` resources, otherwise use conservative defaults (`0.5` CPU, `1Gi` memory) -- protocol/version - prefer `agent.yaml` protocols, otherwise `responses` `1.0.0` - -Do not put `FOUNDRY_PROJECT_ENDPOINT` in `environment_variables`; the platform injects it for hosted agents. Include only custom variables that the agent code reads at runtime, such as `AZURE_AI_MODEL_DEPLOYMENT_NAME`. - -### Dependency Packaging Mode - -Use remote dependency packaging by default (`dependency_resolution: "remote_build"` in `metadata.json`). In this mode, upload source files plus dependency manifests such as `requirements.txt` or `.csproj`; Foundry installs dependencies during the remote build. - -Use bundled local dependencies only when the user explicitly asks for it. In bundled mode, package Linux-compatible dependencies that match the selected runtime. - -For remote packaging, keep the user's dependency files unchanged. Do not slim, pin, or remove packages just to make deployment smoother. If the service fails while installing dependencies, report the exact error and ask before changing dependencies. - -## Task 4: Create `metadata.json` - -Create the parent directory and write `.foundry/direct-code/metadata.json`. Do not write `metadata.json` before the parent directory exists. - -Use the user's platform or language tooling. For example, Python works consistently across common shells: - -```python -from pathlib import Path - -metadata = Path(".foundry/direct-code/metadata.json") -metadata.parent.mkdir(parents=True, exist_ok=True) -# Build the JSON object, then write it to metadata. -``` - -Example Python metadata: - -```json -{ - "description": "Direct code deployment hosted agent", - "definition": { - "kind": "hosted", - "protocol_versions": [ - { - "protocol": "responses", - "version": "1.0.0" - } - ], - "cpu": "0.5", - "memory": "1Gi", - "environment_variables": { - "AZURE_AI_MODEL_DEPLOYMENT_NAME": "" - }, - "code_configuration": { - "runtime": "python_3_13", - "entry_point": ["python", "main.py"], - "dependency_resolution": "remote_build" - } - } -} -``` - -Example C#/.NET metadata: - -```json -{ - "description": "Direct code deployment C# hosted agent", - "definition": { - "kind": "hosted", - "protocol_versions": [ - { - "protocol": "responses", - "version": "1.0.0" - } - ], - "cpu": "0.5", - "memory": "1Gi", - "environment_variables": { - "AZURE_AI_MODEL_DEPLOYMENT_NAME": "" - }, - "code_configuration": { - "runtime": "dotnet_10", - "entry_point": ["dotnet", ".dll"], - "dependency_resolution": "remote_build" - } - } -} -``` - -## Task 5: Create a Flat Code Zip - -The zip must be flat at the root. Do not include a top-level wrapper folder such as `my-agent/`. The entry point path in `metadata.json` must resolve from the zip root. - -Before upload, inspect the archive entries and verify the required files are at the zip root. A wrapper folder, raw wheel files, Windows binaries, or a published output nested under `publish/` will usually fail at version build or session startup. - -Exclude local/development artifacts: - -```text -.env -.foundry/ -.git/ -.vscode/ -.venv/ -__pycache__/ -bin/ -obj/ -Dockerfile -.dockerignore -docker-compose.yml -Properties/launchSettings.json -``` - -### Remote Packaging (Default) - -The examples below use Python's `zipfile` module so they work across common shells and operating systems. Use equivalent platform zip tooling if Python is unavailable. - -Python remote-packaging zip should include `main.py`, `requirements.txt`, and any imported local source modules/packages needed by the entry point. Do not include `packages/`; the service installs dependencies from `requirements.txt`. - -```python -from pathlib import Path -from zipfile import ZipFile, ZIP_DEFLATED - -zip_path = Path(".foundry/direct-code/agent-code.zip") -zip_path.parent.mkdir(parents=True, exist_ok=True) -files = ["main.py", "requirements.txt"] - -with ZipFile(zip_path, "w", ZIP_DEFLATED) as zf: - for name in files: - path = Path(name) - if path.exists(): - zf.write(path, name) -``` - -C#/.NET remote-packaging zip should include the project file, source files, and appsettings files. Do not include `bin/`, `obj/`, `.env`, Docker assets, or local launch settings. The `.csproj` `TargetFramework` must match the selected `dotnet_*` runtime. - -```python -from pathlib import Path -from zipfile import ZipFile, ZIP_DEFLATED - -zip_path = Path(".foundry/direct-code/agent-code.zip") -zip_path.parent.mkdir(parents=True, exist_ok=True) -files = ["StorytellerAgent.csproj", "Program.cs", "appsettings.json", "appsettings.Development.json"] - -with ZipFile(zip_path, "w", ZIP_DEFLATED) as zf: - for name in files: - path = Path(name) - if path.exists(): - zf.write(path, name) -``` - -### Bundled Local Dependencies (Only When Requested) - -Bundled Python zip should include `main.py`, `requirements.txt`, and a `packages/` directory containing extracted Linux-compatible modules. Do not include raw `.whl` files, Windows `.pyd`/`.dll` binaries, or packages built without the target Linux platform flags. - -```text -python -m pip install -r requirements.txt --target packages --platform manylinux2014_x86_64 --python-version 3.13 --implementation cp --only-binary=:all: -``` - -Match `--python-version` to the selected `python_*` runtime and avoid Windows binaries. - -Bundled .NET zip is the output of `dotnet publish -c Release -r linux-x64 --self-contained false`, rooted directly at the publish output. It should contain `.dll`, `.runtimeconfig.json`, and the rest of the publish output at the zip root, not inside a `publish/` wrapper folder. - -```text -dotnet publish -c Release -r linux-x64 --self-contained false -o publish -``` - -Then create the zip from the publish output: - -```python -from pathlib import Path -from zipfile import ZipFile, ZIP_DEFLATED - -root = Path("publish") -zip_path = Path(".foundry/direct-code/agent-code.zip") -zip_path.parent.mkdir(parents=True, exist_ok=True) - -with ZipFile(zip_path, "w", ZIP_DEFLATED) as zf: - for path in root.rglob("*"): - if path.is_file(): - zf.write(path, path.relative_to(root).as_posix()) -``` - -## Task 6: Upload Code and Create or Update the Agent - -Use the user's current platform and shell syntax. The examples below use literal placeholders and can be translated to any shell or HTTP client. Always keep `?api-version=...` in the final request URL. - -Resolve these values: - -- `` -- `` -- `` - usually `.foundry/direct-code/metadata.json` -- `` - usually `.foundry/direct-code/agent-code.zip` -- `` - from `az account get-access-token --resource https://ai.azure.com --query accessToken -o tsv` -- `` - SHA-256 of the zip file - -Calculate the zip SHA-256 with any platform tool. Python example: - -```python -import hashlib -from pathlib import Path - -with Path(".foundry/direct-code/agent-code.zip").open("rb") as f: - print(hashlib.sha256(f.read()).hexdigest()) -``` - -Use `` as the base URL. Append paths directly to that project endpoint; do not strip `/api/projects/` from it. - -Direct-code deployment uses these REST operations: - -| Purpose | Method and endpoint | When to use | -|---------|---------------------|-------------| -| Check whether the agent exists | `GET /agents/?api-version=2025-11-15-preview` | Run first. `404` means create the agent; `200` means deploy a new version by default | -| Create a new agent | `POST /agents?api-version=2025-11-15-preview` | Use only when the existence check returned `404` | -| Create a new version for an existing agent | `POST /agents//versions?api-version=2025-11-15-preview` | Default path when the agent already exists | -| Update an existing agent in place | `POST /agents/?api-version=2025-11-15-preview` | Use only when the user explicitly asks for an in-place update | - -If any GET/POST returns `Missing required query parameter: api-version`, the request URL was malformed. Fix the URL construction and retry the same REST call before continuing; do not interpret that response as "agent exists", "agent missing", or a version/build problem. - -All write requests use `multipart/form-data` with: - -- `metadata`: the JSON metadata file, content type `application/json` -- `code`: the flat zip file, content type `application/zip`, with `filename=.zip` -- `x-ms-code-zip-sha256`: SHA-256 of the zip file - -Create-agent request shape: - -```http -POST /agents?api-version=2025-11-15-preview -Authorization: Bearer -Accept: application/json -Foundry-Features: CodeAgents=V1Preview,HostedAgents=V1Preview -x-ms-agent-name: -x-ms-code-zip-sha256: -Content-Type: multipart/form-data - -metadata=; type=application/json -code=; type=application/zip; filename=.zip -``` - -Create-version request shape: - -```http -POST /agents//versions?api-version=2025-11-15-preview -Authorization: Bearer -Accept: application/json -Foundry-Features: CodeAgents=V1Preview,HostedAgents=V1Preview -x-ms-code-zip-sha256: -Content-Type: multipart/form-data - -metadata=; type=application/json -code=; type=application/zip; filename=.zip -``` - -Do not send `x-ms-agent-name` on `POST /agents//versions` or `POST /agents/`. Send it only on `POST /agents` because the agent name is not in that route. - -Update agent and create version are idempotent on zip SHA-256 plus agent definition. If both are unchanged from the latest version, the service can return the existing version instead of creating a duplicate. To force a new version, change the zip contents or definition. - -Other useful REST operations: - -| Purpose | Method and endpoint | Notes | -|---------|---------------------|-------| -| List versions | `GET /agents//versions?api-version=2025-11-15-preview` | Use when the write response does not clearly return a version | -| Download code | `GET /agents//code:download?api-version=2025-11-15-preview` | Add `agent_version=` when downloading a specific version; compare the `x-ms-code-zip-sha256` response header with the local SHA | -| Delete agent | `DELETE /agents/?api-version=2025-11-15-preview` | Deletes the agent and all versions; pull logs before deletion if needed | - -## Task 7: Poll Version Status - -Use the version from the create/version response. If the response does not clearly include it, list versions and pick the newest version returned for the agent. - -```http -GET /agents//versions/?api-version=2025-11-15-preview -Authorization: Bearer -Foundry-Features: CodeAgents=V1Preview,HostedAgents=V1Preview -``` - -Loop until the version status is no longer `creating`. - -- `active` -> proceed directly back to [deploy.md Step 7: Test the Agent](../deploy.md#step-7-test-the-agent). -- `failed` -> read the error from the version object. There is no runtime session yet, so `:logstream` will not help. diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/invoke/invoke.md b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/invoke/invoke.md index f5b719c1..416293f6 100644 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/invoke/invoke.md +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/foundry-agent/invoke/invoke.md @@ -57,7 +57,21 @@ Key difference: `responses` takes a natural language `inputText` message with pl Use `agent_get` to verify the agent exists. For hosted agents, also verify the targeted version is `active`. -### Step 2: Create Session (Hosted Agents) +### Step 2: Fast smoke test for azd-deployed agents + +When the current folder is an azd agent project and deployment just completed, prefer the azd CLI first: + +```bash +azd ai agent invoke "hello, are you up?" +``` + +Use `azd ai agent show --output json` only when you need structured status, version, endpoints, or troubleshooting details; a successful remote invocation is the fast smoke test. + +If `azd ai agent invoke` returns a `confirmation_required` envelope, summarize the change and proceed only when the user already requested remote invocation or explicitly consents. Prefer the returned `confirmCommand` over inventing flags. If azd cannot resolve the service or agent name, fall back to the MCP workflow below with the resolved `projectEndpoint` and `agentName`. + +For a post-deploy smoke test, invoke once unless the user explicitly asked to validate multi-turn/session behavior. If that single invoke returns a successful response, the smoke test passes; + +### Step 3: Create Session (Hosted Agents) For hosted agents, create a session before invoking using `session_create` with `projectEndpoint` and `agentName`. Optionally provide a `sessionId` (must match `^[A-Za-z0-9_-]{8,128}$`). Store the returned `sessionId` for subsequent calls. @@ -65,7 +79,7 @@ For hosted agents, create a session before invoking using `session_create` with For full session lifecycle details, see [Session Management](references/session-management.md). -### Step 3: Invoke Agent +### Step 4: Invoke Agent Use the project endpoint and agent name from the project context. Use `agent_invoke` with: - `projectEndpoint`, `agentName`, `inputText` (required) @@ -88,17 +102,17 @@ agent_invoke(projectEndpoint, agentName, inputText: "{\"message\":\"hello\"}", p See [Invocations Protocol Guide](references/invocations-protocol.md) for full details and examples. -### Step 4: Multi-Turn Conversations +### Step 5: Multi-Turn Conversations **Responses protocol** → Pass `conversationId` from previous response to continue the thread. Platform manages history. **Invocations protocol** → Reuse same `sessionId`; conversation state is agent-managed via `$HOME`. Do **not** pass `conversationId` — it has no effect for invocations. -### Step 5: File Operations (Hosted Agents) +### Step 6: File Operations (Hosted Agents) Upload/download files to pass data to and retrieve results from agents. All file operations require an active session. See [File Operations](references/file-operations.md). -### Step 6: Clean Up +### Step 7: Clean Up Use `session_delete` to release compute resources when done. Undeleted sessions expire per platform policies. @@ -119,7 +133,9 @@ Use `session_delete` to release compute resources when done. Undeleted sessions | Agent not found | Invalid name or endpoint | Use `agent_get` to list agents | | Hosted agent not active | Version still provisioning or failed | Check version status via `agent_get` | | Session not found | Invalid ID or expired | Create new session with `session_create` | -| `424 FailedDependency` or `session_not_ready` | Hosted agent session is still warming up or readiness has not completed | Wait 15-30 seconds, check `session_logstream` if needed, then retry `agent_invoke` with the same `sessionId` if one was returned; if no `sessionId` was returned, retry `session_create` | +| `424 FailedDependency` or `session_not_ready` | Hosted agent session is still warming up or readiness has not completed | Wait 15-30 seconds, check `session_logstream` if needed, then retry `agent_invoke` with the same `sessionId` if one was returned; if no `sessionId` was returned, retry `session_create`. If this persists across 3+ retries (with exponential backoff: 15s, 30s, 60s), the container likely cannot start within the readiness probe deadline — redeploy with higher CPU/memory (recommended minimum: `1` CPU / `2Gi` for direct-code deployments). Also verify the model deployment name is correct via `model_deployment_get`. | +| `could not resolve agent service in azd project: no azure.ai.agent service named '' found in azure.yaml` from `azd ai agent invoke` | Name mismatch. | Update the agent name to the deployed agent name. | +| `invalid value "json" for --output` from `azd ai agent invoke` | Invoke supports only `default` and `raw` currently. | Retry without `--output json`. | | Invocation failed | Model error, timeout, or invalid input | Check agent logs, verify model deployment | | Invocations schema mismatch | Request body does not match what the agent expects | Inspect agent's route handler or API docs for the correct JSON schema; do not guess | | File operation failed | Session not active or invalid path | Verify session with `session_get` | diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/models/deploy-model/SKILL.md b/.github/plugins/azure-skills/skills/microsoft-foundry/models/deploy-model/SKILL.md index 46b9f01e..226ecbb2 100644 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/models/deploy-model/SKILL.md +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/models/deploy-model/SKILL.md @@ -9,6 +9,8 @@ metadata: # Deploy Model +> **Scope — read this first.** This skill creates model deployments **out-of-band** via Azure CLI / MCP / portal. For azd-managed Foundry projects (those scaffolded from `azd-ai-starter-basic` or via `azd ai agent init`), declare deployments in `azure.yaml services..config.deployments[]` instead — `azd ai agent init` writes the entry from the sample manifest and `azd provision` creates the deployment through Bicep. See [foundry-agent/create/create-hosted.md](../../foundry-agent/create/create-hosted.md) for the Golden Path. Use this skill only for: (a) Foundry projects not managed by an azd project, (b) ad-hoc deployments outside the azd lifecycle. + Unified entry point for all Azure OpenAI model deployment workflows. Analyzes user intent and routes to the appropriate deployment mode. ## Quick Reference diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/project/create/create-foundry-project.md b/.github/plugins/azure-skills/skills/microsoft-foundry/project/create/create-foundry-project.md index e4dc4881..a9eea65f 100644 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/project/create/create-foundry-project.md +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/project/create/create-foundry-project.md @@ -9,7 +9,7 @@ allowed-tools: Read, Write, Bash, AskUserQuestion # Create Azure AI Foundry Project -Create a new Azure AI Foundry project using azd. Provisions: Foundry account, project, Application Insights, managed identity, and RBAC permissions. Optionally enables hosted agents (capability host + Container Registry). +Create a new Azure AI Foundry project using azd. Provisions: Foundry account, project, Application Insights, managed identity, and RBAC permissions. Optionally enables hosted-agent deployment (adds an Azure Container Registry, and — only when the **Standard Setup** capability-host flag is also enabled — a `capabilityHosts/agents` resource). **Table of Contents:** [Prerequisites](#prerequisites) · [Workflow](#workflow) · [Best Practices](#best-practices) · [Troubleshooting](#troubleshooting) · [Related Skills](#related-skills) · [Resources](#resources) @@ -49,13 +49,25 @@ azd auth login --check-status If not logged in, run `azd auth login` and complete browser auth. -### Step 2: Ask User for Project Details - -Use AskUserQuestion for: - -1. **Project name** — used as azd environment name and resource group (`rg-`). Must contain only alphanumeric characters and hyphens. Examples: `my-ai-project`, `dev-agents` +### Step 2: Resolve Project Details + +Collect only values the user has not already provided. For values not specified, use defaults: + +1. **Project name** — used as azd environment name and resource group (`rg-`). Must contain only alphanumeric characters and hyphens. + - If the user provided a name, use it as-is. + - If the user did NOT provide a name, **auto-generate a unique name** using the pattern `ai-project-` where `` is a short random suffix (6-8 lowercase alphanumeric characters). Generate the suffix with a platform-appropriate method: + ```bash + # bash/zsh + echo "ai-project-$(openssl rand -hex 4)" + ``` + ```powershell + # PowerShell + "ai-project-$(-join ((48..57)+(97..122) | Get-Random -Count 8 | ForEach-Object {[char]$_}))" + ``` + - Show the generated name to the user before proceeding, but do not block on confirmation — proceed unless the user objects. + - Examples: `ai-project-3f8a1b2c`, `my-ai-project`, `dev-agents` 2. **Azure location** (optional) — defaults to North Central US (required for hosted agents preview) -3. **Enable hosted agents?** (yes/no) — provisions a capability host and Container Registry for deploying hosted agents. Defaults to no. +3. **Enable hosted agents?** (yes/no) — enables hosted-agent deployment and provisions an Azure Container Registry. A capability host (`capabilityHosts/agents`, used by Foundry's **Standard Agent Setup** for bring-your-own storage) is also created only when `ENABLE_CAPABILITY_HOST=true`. Defaults to no. See [Step 3](#step-3-create-directory-and-initialize) for how the two flags interact. ### Step 3: Create Directory and Initialize @@ -79,9 +91,14 @@ If user chose to enable hosted agents: ```bash azd env set ENABLE_HOSTED_AGENTS true +azd env set ENABLE_CAPABILITY_HOST false ``` -This provisions a capability host (`capabilityHosts/agents`) on the Foundry account and auto-adds an Azure Container Registry for hosted agent deployments. +`ENABLE_HOSTED_AGENTS=true` enables hosted-agent deployment and creates an Azure Container Registry for the container image. A capability host (`capabilityHosts/agents`, used by Foundry's **Standard Agent Setup** for bring-your-own storage) is **also** created only when `ENABLE_CAPABILITY_HOST=true`. The default `azd ai agent` flow targets **Basic Agent Setup**, so it sets `ENABLE_CAPABILITY_HOST=false` automatically. The two flags are independent. + +> ⚠️ **Warning:** The Bicep template parameter `enableCapabilityHost` defaults to `true`. If you set `ENABLE_HOSTED_AGENTS` by hand without also setting `ENABLE_CAPABILITY_HOST=false`, you will accidentally provision Standard Setup (with the capability host). Use `azd ai agent init` to set both flags correctly. + +See the canonical env-var docs: [azure-dev/cli/azd/docs/environment-variables.md](https://github.com/Azure/azure-dev/blob/main/cli/azd/docs/environment-variables.md). ### Step 4: Provision Infrastructure @@ -89,7 +106,7 @@ This provisions a capability host (`capabilityHosts/agents`) on the Foundry acco azd provision --no-prompt ``` -Takes 5–10 minutes. Creates resource group, Foundry account/project, Application Insights, managed identity, and RBAC roles. If hosted agents enabled, also creates Container Registry and capability host. +Takes 5–10 minutes. Creates resource group, Foundry account/project, Application Insights, managed identity, and RBAC roles. If `ENABLE_HOSTED_AGENTS=true`, also creates an Azure Container Registry. A `capabilityHosts/agents` resource is created **only** when `ENABLE_CAPABILITY_HOST=true` (Standard Setup); the default Basic Setup uses `ENABLE_CAPABILITY_HOST=false` and no capability host is provisioned — its absence is correct. ### Step 5: Retrieve Project Details @@ -101,8 +118,19 @@ Capture `AZURE_AI_PROJECT_ID`, `AZURE_AI_PROJECT_ENDPOINT`, and `AZURE_RESOURCE_ ### Step 6: Next Steps -- Deploy an agent → `agent/deploy` skill -- Browse models → `foundry_models_list` MCP tool +> **Next — azd Golden Path:** create a hosted agent with [foundry-agent/create/create-hosted.md](../../foundry-agent/create/create-hosted.md). For headless / scripted flows, **pre-bootstrap the workspace with core `azd init`** so subscription + location are populated before model resolution runs: +> +> ```bash +> azd init -t Azure-Samples/azd-ai-starter-basic . -e --subscription -l +> azd ai agent init -m --no-prompt --deploy-mode code --runtime python_3_13 --entry-point main.py +> ``` +> +> Core `azd init` accepts `--subscription` and `-l/--location`; `azd ai agent init` does not. `azd ai agent init` then resolves the model from the chosen sample's manifest and writes it into `azure.yaml services..config.deployments[]`; the next `azd provision` creates the deployment through Bicep. **You do not need to deploy a model separately for this path** — no `az cognitiveservices` calls, no `azd env set AI_PROJECT_DEPLOYMENTS`. +> +> Use [models/deploy-model](../../models/deploy-model/SKILL.md) **only** for out-of-band scenarios: adding models to a Foundry project that is not managed by this azd project, or ad-hoc deployments outside the azd lifecycle. + +- Deploy an existing agent → [foundry-agent/deploy/deploy.md](../../foundry-agent/deploy/deploy.md) +- Browse model catalog → `foundry_models_list` MCP tool - Manage project → https://ai.azure.com ## Best Practices diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/references/standard-agent-setup.md b/.github/plugins/azure-skills/skills/microsoft-foundry/references/standard-agent-setup.md index 3bf849e1..8f62a231 100644 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/references/standard-agent-setup.md +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/references/standard-agent-setup.md @@ -1,5 +1,7 @@ # Standard Agent Setup +> ⚠️ **Warning:** This page covers Foundry's **Standard Agent Setup** (capability host + bring-your-own Cosmos DB / Azure Storage / Azure AI Search). The default `azd ai agent` flow uses **Basic Agent Setup** and does **not** provision a `capabilityHosts/agents` resource — *stop reading this page* if you arrived from `azd ai agent`. See [foundry-agent/create/create-hosted.md](../foundry-agent/create/create-hosted.md) and the canonical env vars in [environment-variables.md](https://github.com/Azure/azure-dev/blob/main/cli/azd/docs/environment-variables.md). + > **MANDATORY:** Read [Standard Agent Setup docs](https://learn.microsoft.com/en-us/azure/foundry/agents/concepts/standard-agent-setup?view=foundry) before proceeding with standard setup. ## Overview diff --git a/.github/plugins/azure-skills/skills/python-appservice-deploy/SKILL.md b/.github/plugins/azure-skills/skills/python-appservice-deploy/SKILL.md new file mode 100644 index 00000000..ac25db69 --- /dev/null +++ b/.github/plugins/azure-skills/skills/python-appservice-deploy/SKILL.md @@ -0,0 +1,36 @@ +--- +name: python-appservice-deploy +description: "Deploy Python (Flask/Django/FastAPI) code to Azure App Service Linux. WHEN: \"Flask App Service\", \"Django App Service\", \"FastAPI App Service\", \"deploy Python to App Service\". DO NOT USE FOR: Container Apps, Functions, non-Python, Terraform/Bicep/IaC, full infra — use azure-prepare." +license: MIT +metadata: + author: Microsoft + version: "1.0.1" +--- + +# Python on Azure App Service — Code Deploy + +Deploys Python (Flask, Django, FastAPI, generic) code to Azure App Service Linux (P0v3, Python 3.14). Creates RG + Plan + Web App if missing. Hand off to `azure-prepare` for VNet, Key Vault, databases, or IaC. + +**MCP tools used**: `mcp_azure_mcp_subscription_list`, `mcp_azure_mcp_group_list`, `mcp_azure_mcp_appservice`, `mcp_azure_mcp_azd` (when `azure.yaml` is present). + +## Workflow + +1. **Resolve context — smart defaults, minimal prompts.** Only the app name is interactive; RG (`-rg`), Plan (`-plan`), region (current `az` default or `eastus2`), subscription are derived. [create-app.md](references/create-app.md) §1. +2. **Detect framework** (advisory, never blocks). [detect.md](references/detect.md). +3. **Choose path** — `azure.yaml` host: appservice → [deploy-azd.md](references/deploy-azd.md); else [deploy-azcli.md](references/deploy-azcli.md). +4. **Ensure RG → Plan (`P0v3 --is-linux`) → Web App (`--runtime "PYTHON:3.14"`)** exist. On transient ARM errors, follow [transient-retry.md](references/transient-retry.md). [create-app.md](references/create-app.md). +5. **Set startup** — Flask/Django: none (Oryx auto-detects). FastAPI: always `python -m uvicorn main:app --host 0.0.0.0`. Other: warn. [startup-commands.md](references/startup-commands.md). +6. **Set `SCM_DO_BUILD_DURING_DEPLOYMENT=true`**. +7. **Deploy** — `azd deploy` or `az webapp deploy --type zip --track-status false`. +8. **STOP. Print the post-deploy message** ([post-deploy-message.md](references/post-deploy-message.md)) and end the turn. + +### Hard rules + +- ⛔ **NO POST-DEPLOY VERIFICATION** — after deploy returns, do not run `az webapp log tail`, `curl`, `Invoke-WebRequest`, or any health probe. App Service needs 2–3 min to warm; a quiet log or early 5xx is not failure. +- ⛔ **SHELL SAFETY** — for `--runtime` always use `"PYTHON:3.14"` (colon). Never `"PYTHON|3.14"` (pipe is a shell operator). +- ⛔ **NEVER `az webapp up`** — deprecated. Use Step 7 commands. +- ✅ **URL FORMAT** — present endpoints as `https://...` URLs. + +## Error Handling + +See [errors.md](references/errors.md) for the full symptom → cause → fix matrix. Quick triage: missing plan/app → re-run Step 4; container ping timeout on 8000 → fix startup (Step 5); `ModuleNotFoundError` after deploy → ensure Step 6 ran, redeploy. diff --git a/.github/plugins/azure-skills/skills/python-appservice-deploy/references/create-app.md b/.github/plugins/azure-skills/skills/python-appservice-deploy/references/create-app.md new file mode 100644 index 00000000..891186a0 --- /dev/null +++ b/.github/plugins/azure-skills/skills/python-appservice-deploy/references/create-app.md @@ -0,0 +1,133 @@ +# Create RG + App Service Plan + Web App (Linux, P0v3) + +Creates resources only when missing — every step is `show || create` (idempotent against existing user-supplied names). + +> 💡 **Shell note**: Bash blocks below use `\` line continuation, `||`, `2>/dev/null`, `$(…)`. PowerShell equivalents are shown alongside where the bash form doesn't round-trip — substitute `` ` `` for `\`, `2>$null` for `2>/dev/null`, and `$LASTEXITCODE` checks for `||`. + +## 1. Resolve Azure context — minimize prompts + +**Ask the user at most ONE question (the app name).** Derive everything else. If the user's request already names an RG / Plan / region / subscription (e.g. *"deploy to my-team-rg in westus3"*), use those values verbatim and skip the corresponding derivation — the `show || create` flow below works against existing resources. + +### 1a. Subscription +```bash +az account show --query id -o tsv +``` +If unset, prompt the user to `az login`. Only call `ask_user` if multiple subscriptions are configured and no default is set. + +### 1b. App name +Ask the user **once**: +> "What name would you like for your App Service? (Press Enter to auto-generate one.)" + +If empty / "any" / "you choose", call the generator script — it implements the slug rules (lowercase, hyphen-collapse, ≤ 40 chars, `^[a-z][a-z0-9-]{1,38}[a-z0-9]$`) and an 8-hex-char GUID suffix: + +- Bash / zsh: [`scripts/generate-app-name.sh`](../scripts/generate-app-name.sh) → `APP_NAME=$(./scripts/generate-app-name.sh [folder])` +- PowerShell: [`scripts/generate-app-name.ps1`](../scripts/generate-app-name.ps1) → `$appName = & .\scripts\generate-app-name.ps1 [-FolderName ]` + +Example: folder `my-flask-app/` → `my-flask-app-a3f9c1d2`. + +### 1c. Derived names (use only when user did not specify) +| Resource | Default | +|---|---| +| Resource group | `-rg` | +| App Service Plan | `-plan` | + +### 1d. Region +1. If user specified a region, use it. +2. Else read the CLI default. Suppress the "Configuration is not set" stderr line **only** when paired with an exit-code check — never blindly drop stderr, since it would also swallow auth/transport errors: + ```bash + REGION=$(az config get defaults.location -o tsv 2>/dev/null) || REGION="" + ``` + ```powershell + $region = az config get defaults.location -o tsv 2>$null; if ($LASTEXITCODE -ne 0) { $region = "" } + ``` +3. Else default to `eastus2`. +4. Only call `ask_user` if `az group create` later fails with a region/quota/availability error. + +### 1e. Show the defaults summary BEFORE creating +Print one concise block so the user can interrupt to override. + +**Example** (illustrative — substitute the actual derived values, do not print verbatim): + +``` +Using these defaults for your Python App Service deployment: + • App name : flask-app-demo-27may + • Resource group : flask-app-demo-27may-rg (auto-derived) + • App Service Plan: flask-app-demo-27may-plan (auto-derived) + • Region : eastus2 (CLI default) + • Plan SKU : P0v3 Linux + • Runtime : PYTHON:3.14 + +Proceeding with create. Reply "stop" within the next message to change any value. +``` + +Do **not** call `ask_user` for confirmation here — just print and proceed. + +### 1f. Transient error handling +On connection-level or 429/5xx errors from any `az ... create` in §§2–4, see [transient-retry.md](transient-retry.md). Configuration errors (`AuthorizationFailed`, `SkuNotAvailable`, `QuotaExceeded`, etc.) must **not** be retried — surface them. + +## 2. Resource Group + +```bash +az group show -n --only-show-errors 2>/dev/null || \ + az group create -n -l +``` +```powershell +az group show -n --only-show-errors 2>$null +if ($LASTEXITCODE -ne 0) { az group create -n -l } +``` + +## 3. App Service Plan — **Linux, P0v3 by default** + +> ⚠️ **MANDATORY**: Use `--is-linux` and `--sku P0v3`. Do not change OS or SKU unless the user explicitly requests it. + +```bash +az appservice plan show -n -g --only-show-errors 2>/dev/null || \ + az appservice plan create -n -g --is-linux --sku P0v3 -l +``` +```powershell +az appservice plan show -n -g --only-show-errors 2>$null +if ($LASTEXITCODE -ne 0) { + az appservice plan create -n -g --is-linux --sku P0v3 -l +} +``` + +## 4. Web App — Python 3.14 runtime (Linux) + +> ⚠️ **Shell safety**: Always use the **colon** form `PYTHON:3.14` — never the pipe form `PYTHON|3.14`. The pipe character is a shell operator in PowerShell, Bash, and cmd, and breaks the command even when quoted in some contexts. The colon form is fully supported by `az webapp create --runtime` and is shell-safe everywhere. + +```bash +az webapp show -n -g --only-show-errors 2>/dev/null || \ + az webapp create -n -g -p --runtime "PYTHON:3.14" +``` +```powershell +az webapp show -n -g --only-show-errors 2>$null +if ($LASTEXITCODE -ne 0) { + az webapp create -n -g -p --runtime "PYTHON:3.14" +} +``` + +The 8-hex-char GUID suffix from §1b is sufficient for global hostname uniqueness; the optional `--domain-name-scope TenantReuse` flag (Azure CLI ≥ 2.76, July 2025) is intentionally omitted to stay compatible with older CLIs. + +### Discover available runtimes + +If `PYTHON:3.14` is unavailable in the region: + +```bash +az webapp list-runtimes --os linux --query "[?contains(@, 'PYTHON')]" -o tsv +``` + +The output uses the pipe form (e.g., `PYTHON|3.14`) — **convert to colon form** before passing to `--runtime`. Prefer 3.14; fall back to 3.13, then 3.12. + +## 5. Verify + +```bash +az webapp show -n -g --query "{name:name, state:state, host:defaultHostName, linuxFx:siteConfig.linuxFxVersion}" -o table +``` + +Expected: `state: Running`, `linuxFx: PYTHON|3.14` (Azure stores it in pipe form internally — normal), `host: .azurewebsites.net`. + +## Notes + +- ⛔ Never use `az webapp up` — deprecated. See [deploy-azcli.md](deploy-azcli.md). +- If the user requests a different SKU (e.g. `B1` for dev/test), respect it but warn that **P0v3** is the documented default. +- If a Windows plan is requested, hand off to `azure-prepare`. diff --git a/.github/plugins/azure-skills/skills/python-appservice-deploy/references/deploy-azcli.md b/.github/plugins/azure-skills/skills/python-appservice-deploy/references/deploy-azcli.md new file mode 100644 index 00000000..89e8d0d6 --- /dev/null +++ b/.github/plugins/azure-skills/skills/python-appservice-deploy/references/deploy-azcli.md @@ -0,0 +1,112 @@ +# Deploy via `az` CLI + +Use this path when there is **no** `azure.yaml` or it doesn't target `appservice`. + +> ⛔ **NEVER USE `az webapp up`** — this command is deprecated. Use the explicit create + deploy commands below. + +## Prerequisites + +- `az login` complete +- Subscription, resource group, region, and app name decided (see [create-app.md](create-app.md)) +- App Service Plan (Linux, P0v3) and Web App (Python runtime) exist (see [create-app.md](create-app.md)) + +## 1. Enable server-side build + +```bash +az webapp config appsettings set \ + -n -g \ + --settings SCM_DO_BUILD_DURING_DEPLOYMENT=true +``` +```powershell +az webapp config appsettings set ` + -n -g ` + --settings SCM_DO_BUILD_DURING_DEPLOYMENT=true +``` + +This tells Oryx to run `pip install -r requirements.txt` during deploy. + +## 2. Startup command — skip for Flask/Django, always set for FastAPI + +Azure App Service (Oryx) auto-detects **Flask** and **Django** — **do not set a startup command** for these. Skip this step entirely. + +For **FastAPI**, always set the uvicorn startup command, regardless of the Python runtime version. This skill does **not** rely on Oryx FastAPI auto-detection, so the behavior is identical on every supported runtime (3.12, 3.13, 3.14, …): + +```bash +az webapp config set -n -g \ + --startup-file "python -m uvicorn main:app --host 0.0.0.0" +``` +```powershell +az webapp config set -n -g ` + --startup-file "python -m uvicorn main:app --host 0.0.0.0" +``` + +(Replace `main:app` if the FastAPI entry point differs — e.g., `app.main:app`.) + +For other frameworks (generic WSGI / ASGI / unknown), **skip this step** and emit the manual-startup warning. See [startup-commands.md](startup-commands.md). + +## 3. Package the code + +Zip the project (excluding venv, caches, git, node_modules): + +```bash +# bash +zip -r app.zip . \ + -x ".git/*" -x ".venv/*" -x "venv/*" -x "__pycache__/*" \ + -x "*.pyc" -x ".env" -x "node_modules/*" +``` + +```powershell +# PowerShell +$exclude = @('.git','.venv','venv','__pycache__','node_modules') +$items = Get-ChildItem -Force | Where-Object { $exclude -notcontains $_.Name } +Compress-Archive -Path $items -DestinationPath app.zip -Force +``` + +## 4. Deploy the zip + +```bash +az webapp deploy \ + -n -g \ + --src-path app.zip \ + --type zip \ + --track-status false +``` +```powershell +az webapp deploy ` + -n -g ` + --src-path app.zip ` + --type zip ` + --track-status false +``` + +> 💡 `--track-status false` returns once the ZIP is **accepted by the SCM endpoint** — this is **not** the same as "Oryx build succeeded". The server-side `pip install` / startup-command rendering happens asynchronously after the CLI returns. A zero exit code only confirms the upload + a deployment record. If the site never starts, inspect the build outcome via `az webapp log deployment list/show` — that is the only authoritative confirmation that the build itself succeeded. + +## 5. Stop. Report the endpoint to the user. + +After `az webapp deploy` returns, the skill is done. + +> ℹ️ `az webapp deploy` does **not** initiate a cold start by pinging the site. With `--track-status false`, it returns as soon as the SCM endpoint accepts the ZIP; the Oryx build and container restart happen asynchronously on the SCM side. The container only warms up when an inbound HTTP request actually hits `https://.azurewebsites.net` — which is why the post-deploy message tells the user to expect a 2–3 minute wait on their first visit. + +> ⛔ **Do NOT run** `az webapp log tail`, `curl`, `Invoke-WebRequest`, `wget`, or any other "verify startup" command. App Service routinely needs **2–3 minutes** to warm the container; a quiet log stream or a 5xx in the first couple of minutes is **not** a failure signal, and running these probes here will mislead the user. + +Resolve the host name without hitting the site: + +```bash +HOST=$(az webapp show -n -g --query defaultHostName -o tsv) +echo "https://$HOST" +``` +```powershell +$host_ = az webapp show -n -g --query defaultHostName -o tsv +"https://$host_" +``` + +Then print the post-deploy message from [post-deploy-message.md](post-deploy-message.md) and end the turn. The user will run `az webapp log tail -n -g ` themselves if they want to watch logs. + +## Common pitfalls + +| Pitfall | Fix | +|---------|-----| +| Deployed code missing dependencies | `SCM_DO_BUILD_DURING_DEPLOYMENT=true` not set — re-run step 1 then redeploy | +| Container ping timeout on port 8000 | Wrong startup command — see [startup-commands.md](startup-commands.md) | +| Zip too large (>500 MB) | Exclude `.venv`, caches; consider `.deployment` `.gitignore`-style file | +| `webapp up` examples in older docs | Replace with `az webapp create` + `az webapp deploy` (this file) | diff --git a/.github/plugins/azure-skills/skills/python-appservice-deploy/references/deploy-azd.md b/.github/plugins/azure-skills/skills/python-appservice-deploy/references/deploy-azd.md new file mode 100644 index 00000000..f95976c2 --- /dev/null +++ b/.github/plugins/azure-skills/skills/python-appservice-deploy/references/deploy-azd.md @@ -0,0 +1,83 @@ +# Deploy via `azd` + +Use this path when the workspace already has an `azure.yaml` whose service host is `appservice`. + +## When to use + +| Condition | Use azd? | +|---|---| +| `azure.yaml` exists AND `services..host: appservice` | ✅ Yes | +| `azure.yaml` exists but targets Container Apps / Functions / etc. | ❌ Hand off to `azure-prepare` | +| No `azure.yaml` | ❌ Use [deploy-azcli.md](deploy-azcli.md) | + +## Confirm host target + +```bash +# Look for `host: appservice` under services in azure.yaml +grep -E "host:\s*appservice" azure.yaml +``` +```powershell +# Look for `host: appservice` under services in azure.yaml +Select-String -Path azure.yaml -Pattern 'host:\s*appservice' +``` + +If no match → use the az CLI path. + +## Authenticate + +```bash +azd auth login --check-status || azd auth login +``` +```powershell +azd auth login --check-status +if ($LASTEXITCODE -ne 0) { azd auth login } +``` + +## Provision (first time only) + +If no `azd` environment exists in this folder: + +```bash +azd env new +azd env set AZURE_LOCATION +# Optional: override default SKU via the template's parameters +azd env set APP_SERVICE_SKU P0v3 +``` + +Then provision + deploy in one call: + +```bash +azd up +``` + +## Deploy code only (subsequent deploys) + +```bash +azd deploy +``` + +After `azd deploy` returns, **stop**. Do not run `azd monitor`, `az webapp log tail`, or any HTTP probe. + +## Report endpoint to the user + +> ⛔ **Do NOT probe the endpoint** (no `curl`, no `Invoke-WebRequest`) and **do NOT tail logs** as a verification step. App Service can take 2–3 minutes after deploy before the site responds. + +Read the endpoint from azd without hitting the site: + +```bash +# bash +azd env get-values | grep -E '^SERVICE_.*_URI=' +``` + +```powershell +# PowerShell +azd env get-values | Select-String "SERVICE_.*_URI" +``` + +Present the URL with the `https://` prefix and the post-deploy message from [post-deploy-message.md](post-deploy-message.md), then end the turn. + +## Notes + +- ⛔ Do **not** run `azd init -t