diff --git a/.gitignore b/.gitignore index 322a4fc..bc7c648 100644 --- a/.gitignore +++ b/.gitignore @@ -15,7 +15,28 @@ venv/ .terraform.lock.hcl terraform.tfvars +# Claude Code activator (generated by scripts/configure-claude-code.*) +claude-code.env.ps1 +claude-code.env.sh +# VS Code workspace settings populated by the postprovision hook with +# user-specific deployment info. Other .vscode/ files (launch.json, +# extensions.json, ...) remain trackable. +.vscode/settings.json + # IDE .vscode/ .idea/ .DS_Store + +# Local-only scratch area (helper scripts, logs, env backups not referenced by README). +# Anything in here stays on disk and never gets pushed. +local-only/ + +# Claude Code per-workspace permission cache (runtime state, not for sharing). +.claude/ + +# Workspace-scoped Azure CLI config + MSAL token cache. Set by +# AZURE_CONFIG_DIR in the activators and .vscode/settings.json so that +# 'az login' / 'azd' done in this workspace never touch ~/.azure and +# never leak into other VS Code windows. Pure runtime state — do not commit. +.azure-cli/ diff --git a/Get-ClaudeRegions.ps1 b/Get-ClaudeRegions.ps1 index c8ea5ad..b552d65 100644 --- a/Get-ClaudeRegions.ps1 +++ b/Get-ClaudeRegions.ps1 @@ -46,6 +46,19 @@ param( $ErrorActionPreference = 'Stop' +# When invoked via `pwsh -File ... -Regions a,b,c`, parameter binding can pass +# the comma-joined string as a single element instead of an array. Normalize +# any string that contains commas into its comma-split parts. +$Regions = @( + foreach ($r in $Regions) { + if ($r -is [string] -and $r -match ',') { + $r.Split(',') | ForEach-Object { $_.Trim() } | Where-Object { $_ } + } else { + $r + } + } +) + # Verify az login context try { $ctx = az account show -o json 2>$null | ConvertFrom-Json diff --git a/README.md b/README.md index 6e0403e..6fc0165 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ > Short link: **** -Provision a [Microsoft Foundry](https://learn.microsoft.com/azure/ai-foundry/) account with a **Claude** model deployment, then call it with the **[Claude SDK](https://docs.claude.com/en/api/client-sdks)** using Microsoft Entra ID — end-to-end via [Azure Developer CLI (`azd`)](https://learn.microsoft.com/azure/developer/azure-developer-cli/). +Provision a [Microsoft Foundry](https://learn.microsoft.com/azure/ai-foundry/) account with a **Claude** model deployment, then call it with the **[Claude SDK](https://docs.claude.com/en/api/client-sdks)** using Microsoft Entra ID — end-to-end via [Azure Developer CLI (`azd`)](https://learn.microsoft.com/azure/developer/azure-developer-cli/). `azd up` also wires up **[Claude Code](https://learn.microsoft.com/azure/foundry/foundry-models/how-to/configure-claude-code)** so you can run the agentic CLI against your fresh deployment immediately. Two equivalent IaC variants ship side-by-side. Pick one and `azd up`: @@ -13,10 +13,12 @@ Two equivalent IaC variants ship side-by-side. Pick one and `azd up`: The Python sample under [`src/`](./src/) works against either. +> **Looking for something more advanced?** Jump to: [Claude Code post-deploy setup](#claude-code-post-deploy-setup) · [auto-refreshing Entra ID tokens for long-running processes](#advanced-long-running-processes-auto-refreshing-the-entra-id-token) · [preprovision preflight](#preprovision-preflight-marketplace-catalog--quota) · [check Claude quota & capacity programmatically](#advanced-check-claude-quota--capacity-programmatically). + ## Prerequisites - An Azure subscription [eligible to deploy Claude in Foundry](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/use-foundry-models-claude#prerequisites), with `Contributor` on the target subscription/resource group (see [Required permissions](#required-permissions) for the full breakdown, including the data-plane role you need to call the model). -- Region: `eastus2` or `swedencentral` (or `westus2` for `claude-opus-*`). +- Region: `eastus2` or `swedencentral` host all three Claude families (haiku / sonnet / opus). `westus2` is sonnet + opus only. - Tools: [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli), [azd](https://learn.microsoft.com/azure/developer/azure-developer-cli/install-azd), Python ≥ 3.10, and [Terraform](https://developer.hashicorp.com/terraform/install) ≥ 1.6 (Terraform variant only). ## Quickstart @@ -25,21 +27,55 @@ The Python sample under [`src/`](./src/) works against either. git clone https://github.com/Azure-Samples/claude.git cd claude/infra-terraform # or: cd claude/infra-bicep -azd auth login -azd env new my-claude +# If your Claude-eligible subscription lives in a non-default tenant, pass --tenant-id: +azd auth login # or: azd auth login --tenant-id + +azd env new my-claude # answer 'y' when asked "Set new environment ... as default?" + # (if you already created the env, run `azd env select my-claude`) azd env set CLAUDE_ORGANIZATION_NAME "Contoso" -azd env set AZURE_LOCATION "eastus2" +azd env set AZURE_LOCATION "swedencentral" + +# Pick which Claude families to deploy. Empty = skip that family. +# Defaults below = all three; comment out any line to deploy a subset. +azd env set CLAUDE_HAIKU_MODEL "claude-haiku-4-5" +azd env set CLAUDE_SONNET_MODEL "claude-sonnet-4-6" +azd env set CLAUDE_OPUS_MODEL "claude-opus-4-6" + +# Optional — skip the interactive subscription picker on first `azd up`: +# azd env set AZURE_SUBSCRIPTION_ID +# Optional — also install the Claude Code CLI as part of postprovision: +# azd env set CLAUDE_CODE_AUTO_INSTALL true azd up +``` + +> **Want just one family?** Set only that one (e.g. just `CLAUDE_OPUS_MODEL`) and leave the others unset. Want to override capacity per family? Set `CLAUDE_HAIKU_CAPACITY` / `CLAUDE_SONNET_CAPACITY` / `CLAUDE_OPUS_CAPACITY` (TPM ÷ 1000, default 50 each). See [Choosing which models to deploy](#choosing-which-models-to-deploy). + +`azd up` provisions Foundry + the Claude deployment, then a **postprovision** hook ([`scripts/configure-claude-code.ps1`](./scripts/configure-claude-code.ps1)) writes a `claude-code.env.ps1` / `claude-code.env.sh` activator at the repo root and a `.vscode/settings.json` for the Claude Code VS Code extension. See [Claude Code post-deploy setup](#claude-code-post-deploy-setup) for details. + +### Use Claude Code + +```powershell +# from the repo root +. ./claude-code.env.ps1 # PowerShell. macOS/Linux: source ./claude-code.env.sh +claude +``` -# Export endpoint + deployment name to a shared .env.local at repo root -azd env get-values > ..\.env.local +If `claude` isn't installed yet, the postprovision hook prints the one-line installer command for your platform (or set `CLAUDE_CODE_AUTO_INSTALL=true` before `azd up` to run it automatically). To verify the wiring see [Verify Claude Code is wired up](#verify-claude-code-is-wired-up). + +### Or use the Python sample + +```powershell +# from infra-bicep/ or infra-terraform/ (so `azd env get-values` works) +# Use Out-File so the file is UTF-8 (Windows PowerShell 5.1's `>` writes UTF-16, which python-dotenv mis-parses). +azd env get-values | Out-File -Encoding utf8 ..\.env.local +# macOS/Linux: azd env get-values > ../.env.local -# Run the Python sample cd .. python -m venv .venv && . .venv/Scripts/Activate.ps1 # macOS/Linux: source .venv/bin/activate pip install -r requirements.txt -python src/hello_claude.py -python src/chat_stream.py +python src/hello_claude.py # one-shot Messages call (Entra ID) +python src/chat_stream.py # interactive streaming chat — type a message, `exit` to quit +python src/hello_claude_token_refresh.py # long-running variant with per-request token refresh ```
@@ -48,9 +84,10 @@ python src/chat_stream.py If you don't have a data-plane role on the Foundry account yet, you can run a quick check with an API key. Prefer Entra ID for anything beyond local testing — keys can't be scoped per-user and rotate manually. ```powershell +# FOUNDRY_ACCOUNT_NAME and AZURE_RESOURCE_GROUP are emitted by `azd env get-values` $env:CLAUDE_API_KEY = (az cognitiveservices account keys list ` - --name ` - --resource-group --query key1 -o tsv) + --name $env:FOUNDRY_ACCOUNT_NAME ` + --resource-group $env:AZURE_RESOURCE_GROUP --query key1 -o tsv) python src/hello_claude_apikey.py ``` @@ -61,13 +98,142 @@ python src/hello_claude_apikey.py | Var | Required | Default | Notes | |---|---|---|---| | `CLAUDE_ORGANIZATION_NAME` | yes | — | Surfaced via `modelProviderData` | -| `AZURE_LOCATION` | yes | — | `eastus2` / `swedencentral` / `westus2` | +| `AZURE_LOCATION` | yes | — | `eastus2` / `swedencentral` (all 3 families) / `westus2` (sonnet + opus) | +| `CLAUDE_HAIKU_MODEL` | no | *(empty)* | Haiku family model id (e.g. `claude-haiku-4-5`). Empty = skip. | +| `CLAUDE_SONNET_MODEL` | no | *(empty)* | Sonnet family model id (e.g. `claude-sonnet-4-6`). Empty = skip. | +| `CLAUDE_OPUS_MODEL` | no | *(empty)* | Opus family model id (e.g. `claude-opus-4-6`). Empty = skip. | +| `CLAUDE_HAIKU_CAPACITY` | no | `25` | Haiku TPM / 1000 | +| `CLAUDE_SONNET_CAPACITY` | no | `25` | Sonnet TPM / 1000 | +| `CLAUDE_OPUS_CAPACITY` | no | `25` | Opus TPM / 1000 | | `CLAUDE_COUNTRY_CODE` | no | `US` | 2-letter ISO | | `CLAUDE_INDUSTRY` | no | `technology` | **lowercase**: `technology`, `finance`, `healthcare`, `education`, `retail`, `manufacturing`, `government`, `media`, `other` | -| `CLAUDE_MODEL_NAME` | no | `claude-sonnet-4-6` | Run `./Get-ClaudeRegions.ps1` to see availability | -| `CLAUDE_MODEL_VERSION` | no | `1` | | -| `CLAUDE_MODEL_CAPACITY` | no | `50` | TPM / 1000 | -| `ASSIGN_RBAC` | no | `false` | `true` to grant Azure AI User to `AZURE_PRINCIPAL_ID` (needs `roleAssignments/write`) | +| `CLAUDE_MODEL_VERSION` | no | `1` | Applies to all deployed families. | +| `CLAUDE_MODEL_NAME` | no | `claude-sonnet-4-6` | **Legacy.** Only used when all three `CLAUDE_*_MODEL` vars are empty (single-deployment fallback). | +| `CLAUDE_MODEL_CAPACITY` | no | `25` | **Legacy.** Capacity for the legacy single-deployment fallback. | +| `ASSIGN_RBAC` | no | `false` | `true` to grant `Foundry User` + `Foundry Project Manager` to `AZURE_PRINCIPAL_ID` (needs `roleAssignments/write`) | +| `CLAUDE_CODE_AUTO_INSTALL` | no | `false` | `true` to let the postprovision hook run the official Claude Code installer ([`install.ps1`](https://claude.ai/install.ps1) / [`install.sh`](https://claude.ai/install.sh)) when `claude` isn't already on PATH | + +### Choosing which models to deploy + +Set one, two, or all three of `CLAUDE_HAIKU_MODEL` / `CLAUDE_SONNET_MODEL` / `CLAUDE_OPUS_MODEL` — each non-empty value deploys that family into the same Foundry account. The postprovision hook writes one `ANTHROPIC_DEFAULT__MODEL` env var per deployed family into the activator + `.vscode/settings.json`, so Claude Code can route across all three. + +| Goal | Set | +|---|---| +| All three families (recommended) | `CLAUDE_HAIKU_MODEL=claude-haiku-4-5`, `CLAUDE_SONNET_MODEL=claude-sonnet-4-6`, `CLAUDE_OPUS_MODEL=claude-opus-4-6` | +| Just sonnet | `CLAUDE_SONNET_MODEL=claude-sonnet-4-6` (leave the others unset) | +| Just opus | `CLAUDE_OPUS_MODEL=claude-opus-4-7` (or `-4-6` if quota is tight) | +| Single legacy model (back-compat) | `CLAUDE_MODEL_NAME=...` and leave all `CLAUDE_*_MODEL` vars empty | + +Run [`./Get-ClaudeRegions.ps1`](./Get-ClaudeRegions.ps1) to see the live catalog and pick model versions matching your region. + +## Claude Code post-deploy setup + +After `azd up` succeeds, the **postprovision** hook ([`scripts/configure-claude-code.ps1`](./scripts/configure-claude-code.ps1), with [`configure-claude-code.sh`](./scripts/configure-claude-code.sh) as a POSIX fallback) configures [Claude Code](https://learn.microsoft.com/azure/foundry/foundry-models/how-to/configure-claude-code) for the freshly-deployed Foundry resource. It does three things: + +1. Writes a project-scoped activator at the repo root (`claude-code.env.ps1` and `claude-code.env.sh`, both gitignored) containing the [environment variables](https://learn.microsoft.com/azure/foundry/foundry-models/how-to/configure-claude-code?tabs=bash#configure-claude-code-for-foundry) Claude Code expects: + - `CLAUDE_CODE_USE_FOUNDRY=1` + - `ANTHROPIC_FOUNDRY_RESOURCE=` + - One `ANTHROPIC_DEFAULT__MODEL=` per deployed family (`HAIKU` / `SONNET` / `OPUS`). Only the families you actually deployed get a line. + - **`AZURE_CONFIG_DIR=/.azure-cli`** — scopes `az login` (and `azd`) to this workspace only. See [Workspace-scoped `az login`](#workspace-scoped-az-login) below. +2. Writes (or merges into) `.vscode/settings.json` with `claudeCode.environmentVariables` (the array-of-`{name,value}` schema the extension actually reads — the display name in the Settings UI is *"Claude Code: Environment Variables"*) and `claudeCode.disableLoginPrompt: true` so the [Claude Code VS Code extension](https://marketplace.visualstudio.com/items?itemName=anthropic.claude-code) skips the Anthropic-account login and uses your Foundry deployment via Entra ID. It also sets `terminal.integrated.env.{windows,linux,osx}.AZURE_CONFIG_DIR` so every terminal VS Code spawns in this workspace inherits the scoped Azure config automatically — you don't even have to source the activator first. +3. Checks whether `claude` is on PATH. If not, prints the platform-appropriate one-liner install command. Set `CLAUDE_CODE_AUTO_INSTALL=true` *before* `azd up` to run [the official installer](https://claude.ai/install.ps1) automatically. + +Authentication uses Microsoft Entra ID through your existing `az login` session — no API keys to manage. If the Foundry resource lives in a non-default tenant, run `az login --tenant ` first so the [token tenant matches the resource tenant](https://learn.microsoft.com/azure/foundry/foundry-models/how-to/configure-claude-code?tabs=bash#troubleshooting). + + + +> **Workspace-scoped `az login`.** Both the activators and `.vscode/settings.json` set `AZURE_CONFIG_DIR=/.azure-cli` so that any `az login` (or `azd auth login`) you do here writes its token cache and config to **`./.azure-cli/`** inside the repo — never to the global `~/.azure`. The benefits: +> +> - Other VS Code windows / shells keep their own existing `~/.azure` login (different tenant, different account — whatever) and are not affected. +> - Logging out (`az logout`) or `rm -rf .azure-cli` only nukes this workspace's credentials. +> - The directory is gitignored, so credentials never reach the repo. +> +> VS Code applies the env var automatically to any terminal it opens inside this folder. If you launch a terminal outside VS Code, source the activator first (`. ./claude-code.env.ps1` or `source ./claude-code.env.sh`) before running `az login`. Verify with `az config get core` — the `config_path` should point inside the repo. + +To run Claude Code in a fresh shell at any time: + +```powershell +. ./claude-code.env.ps1 # PowerShell. macOS/Linux: source ./claude-code.env.sh +claude /status # verify "API provider: Microsoft Foundry" +``` + +### Verify Claude Code is wired up + +Four ways to confirm the CLI is talking to your fresh Foundry deployment, easiest first. + +**0. One-command end-to-end check** — runs every check in this section plus an SDK round trip in one shot: + +```powershell +pwsh -File scripts/verify-claude-code.ps1 # all checks + claude -p per deployed family +pwsh -File scripts/verify-claude-code.ps1 -SkipClaudeCall # config checks only (no token cost) +pwsh -File scripts/verify-claude-code.ps1 -RunPythonSample # also runs python src/hello_claude.py +``` + +macOS/Linux: + +```bash +bash scripts/verify-claude-code.sh # default +bash scripts/verify-claude-code.sh --skip-claude-call # config only +bash scripts/verify-claude-code.sh --run-python-sample # adds the Python Entra ID round trip +``` + +The verify script checks the activator file, env vars, `.vscode/settings.json` shape, `az` login + tenant, `claude` on PATH (with `-AutoInstall` / `--auto-install` to install it if missing), then runs a non-interactive `claude -p` per deployed family. Exits non-zero on any hard failure so you can wire it into CI. + +The rest of this section is the same checks broken out manually. + +**1. One-shot prompt (non-interactive)** — fastest manual check: + +```powershell +. ./claude-code.env.ps1 +'who are you?' | claude -p +``` + +You should see a one-line reply that identifies the deployed model (e.g. *"I'm Claude Sonnet 4.6, built by Anthropic."*). macOS/Linux: + +```bash +source ./claude-code.env.sh +echo 'who are you?' | claude -p +``` + +**2. Interactive REPL** — the normal way to use it: + +```powershell +. ./claude-code.env.ps1 +claude +``` + +Useful slash commands once inside: + +| Command | What it shows | +|---|---| +| `/status` | API provider (should say **Microsoft Foundry**), deployment name | +| `/model` | Confirms the Anthropic family wired up | +| `/help` | Full command list | + +**3. VS Code extension** — install once, picks up `.vscode/settings.json` automatically: + +```powershell +code --install-extension anthropic.claude-code +``` + +Then open the Command Palette → **"Claude Code: Start"** (or click the Claude icon in the activity bar). No extra config is needed — the postprovision hook already populated `claudeCode.environmentVariables` and `claudeCode.disableLoginPrompt` in [.vscode/settings.json](./.vscode/settings.json). + +> **Still seeing a "Sign in to Claude" prompt?** Reload the window (Command Palette → **"Developer: Reload Window"**) so the extension re-reads `.vscode/settings.json`. If you used an older version of the hook that wrote a `"Claude Code: Environment Variables"` key, just re-run `pwsh -File scripts/configure-claude-code.ps1` — it strips the stale key and writes the correct `claudeCode.environmentVariables` schema. + +> **Auth error?** If you see `401` / `Token tenant doesn't match resource tenant`, refresh your Azure login against the right tenant: +> ```powershell +> az login --tenant # the tenant that owns the Foundry resource +> ``` + +You can also re-run the hook standalone: + +```powershell +pwsh -File scripts/configure-claude-code.ps1 +# or: +bash scripts/configure-claude-code.sh +``` + +> **Multi-family support.** Set any combination of `CLAUDE_HAIKU_MODEL` / `CLAUDE_SONNET_MODEL` / `CLAUDE_OPUS_MODEL` and the template deploys each family as a sibling deployment under the same Foundry account. The hook writes one `ANTHROPIC_DEFAULT__MODEL` per deployed family into the activator + `.vscode/settings.json` automatically. See [Choosing which models to deploy](#choosing-which-models-to-deploy). ## SDK call shape @@ -123,8 +289,9 @@ If the Anthropic SDK ever accepts a callable for `auth_token`, this shim becomes - **Microsoft Foundry** account (`Microsoft.CognitiveServices/accounts`, kind `AIServices`, SKU `S0`, `allowProjectManagement = true`) - **Foundry project** -- A **Claude model deployment** (`GlobalStandard`) with the required `modelProviderData` block -- *Optional* RBAC: *Azure AI User* + *Azure AI Project Manager* on the deploying principal (set `ASSIGN_RBAC=true`) +- One **Claude deployment per requested family** (`GlobalStandard`, with the required `modelProviderData` block) — set `CLAUDE_HAIKU_MODEL` / `CLAUDE_SONNET_MODEL` / `CLAUDE_OPUS_MODEL` to control which families. Sonnet/Opus deployments chain on the prior to avoid Foundry's per-account 409s on concurrent create. +- *Optional* RBAC: *Foundry User* + *Foundry Project Manager* on the deploying principal (set `ASSIGN_RBAC=true`). *(These roles were previously called `Azure AI User` / `Azure AI Project Manager`; Azure renamed them — the underlying role GUIDs are unchanged.)* + - **Heads up:** without this (or a manual post-deploy grant), the Python SDK and `claude` CLI will return `401 PermissionDenied` even though `azd up` succeeded. See [Granting data-plane roles after `azd up`](#granting-data-plane-roles-after-azd-up).
@@ -135,11 +302,19 @@ If the Anthropic SDK ever accepts a callable for `auth_token`, this shim becomes claude/ ├── infra-bicep/ # azd template — Bicep variant ├── infra-terraform/ # azd template — Terraform variant +├── scripts/ +│ ├── preflight-claude.ps1 # `azd up` preflight: catalog + quota check +│ ├── preflight-claude.sh # POSIX equivalent +│ ├── configure-claude-code.ps1 # postprovision hook: configure Claude Code for the new Foundry resource +│ ├── configure-claude-code.sh # POSIX equivalent +│ ├── verify-claude-code.ps1 # post-deploy smoke test: activator + env + `claude -p` round trip +│ └── verify-claude-code.sh # POSIX equivalent ├── src/ │ ├── hello_claude.py # One-shot Messages call (Entra ID) │ ├── hello_claude_apikey.py # Same, but with an API key (dev/test only) │ ├── hello_claude_token_refresh.py # Long-running variant with auto-refreshing Entra token -│ └── chat_stream.py # Streaming multi-turn chat loop +│ ├── chat_stream.py # Streaming multi-turn chat loop +│ └── check_claude_quota.py # Inspect Claude quota + capacity via ARM (see Advanced) ├── Get-ClaudeRegions.ps1 ├── requirements.txt └── .env.sample @@ -156,9 +331,18 @@ claude/ | `404 Not Found` on inference | Base URL must end in `/anthropic` — `https://.services.ai.azure.com/anthropic`. | | `401 Unauthorized` | Token scope must be `https://ai.azure.com/.default`. Re-run `az login`. | | `401 Unauthorized` after ~1 hour of running | The Entra ID token captured at startup has expired. The plain `Anthropic` client doesn't auto-refresh — see the [advanced section](#advanced-long-running-processes-auto-refreshing-the-entra-id-token) for [src/hello_claude_token_refresh.py](./src/hello_claude_token_refresh.py), which uses an `AnthropicIdentity` shim to refresh per request. | -| `403 Forbidden` | Missing a data-plane role on the Foundry account. Grant `Cognitive Services User`, `Azure AI User`, or `Azure AI Developer` (see permissions details below). | +| `403 Forbidden` | Missing a data-plane role on the Foundry account. Grant `Cognitive Services User`, `Foundry User` (formerly `Azure AI User`), or `Azure AI Developer` (see permissions details below). | | `Region not available` | Deploy to `eastus2` or `swedencentral` (or `westus2` for opus-only). | -| Subscription can't deploy Claude | Confirm subscription eligibility per the [official docs](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/use-foundry-models-claude#prerequisites). | +| Subscription can't deploy Claude | Confirm subscription eligibility per the [official docs](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/use-foundry-models-claude#prerequisites). The [preprovision preflight](#preprovision-preflight-marketplace-catalog--quota) warns about this before `azd up` calls the RP. | +| `Error occurred when subscribing to Marketplace: Marketplace Subscription purchase eligibility check failed` | Your subscription cannot purchase the Anthropic offer (no entitlement, sandbox sub, paid-offer policy denial, etc.). Either use a subscription with Claude-on-Foundry entitlement, or pre-accept the agreement explicitly with `az term accept --publisher anthropic --product anthropic--offer --plan anthropic--plan-new`. | +| Opaque `400 715-123420 "An error occurred. Please reach out to support for additional assistance."` on the Terraform deployment step (RG / Foundry account / project all succeed) | **Insufficient quota.** Terraform's `azapi_resource` bypasses ARM preflight validation and the Cognitive Services RP returns this generic code instead of `InsufficientQuota`. **Fix:** check `az cognitiveservices usage list -l --query "[?contains(name.value,'')]"` — if `currentValue + requestedCapacity > limit`, lower `CLAUDE_SONNET_CAPACITY` / `CLAUDE_HAIKU_CAPACITY` / `CLAUDE_OPUS_CAPACITY` via `azd env set`, delete unused deployments to free capacity, or request a quota increase in the Foundry portal. **Also check for soft-deleted accounts** still holding quota — see [Free quota held by soft-deleted accounts](#free-quota-held-by-soft-deleted-accounts). To confirm it really is quota, re-run on the Bicep variant which surfaces the clearer `InsufficientQuota` error. | +| Bicep: `InsufficientQuota: This operation require N new capacity in quota Tokens Per Minute (thousands) - Claude , which is bigger than the current available capacity X. The current quota usage is U and the quota limit is L.` | Same root cause as `715-123420` above, just with a clear message because Bicep goes through ARM preflight. Lower the capacity env var(s) or free up quota. | +| Preflight: `Marketplace offer ... not found` | `CLAUDE_MODEL_NAME` is misspelled, the model isn't in the Anthropic-on-Foundry catalog yet, or Anthropic changed the plan-name convention. | +| Preflight: `Quota insufficient` (exit 6) | Requested `CLAUDE_*_CAPACITY` plus existing usage exceeds the per-region quota limit. Lower the requested capacity, free up quota by deleting unused deployments, or [purge soft-deleted accounts](#free-quota-held-by-soft-deleted-accounts) that may still be holding TPM. | +| Quota looks full but you have no live deployments (`az cognitiveservices usage list` shows `currentValue > 0`, deployment still fails with `715-123420` / `InsufficientQuota`) | **Soft-deleted Cognitive Services accounts still reserve quota for 48 h.** A previous `azd down` (or any RG / account delete) puts the AIServices account in a recoverable state that keeps holding TPM. **Fix:** list and purge them: `az cognitiveservices account list-deleted -o table` then `az cognitiveservices account purge --name --location --resource-group ` for each. See [Free quota held by soft-deleted accounts](#free-quota-held-by-soft-deleted-accounts). | +| `401 PermissionDenied: Principal does not have access to API/Operation` intermittently — same code passes seconds later | Data-plane RBAC propagation lag on a freshly-granted role (`Cognitive Services User` / `Foundry User` / `Azure AI Developer`). The grant can take a few minutes to land on the Foundry data plane even after `az role assignment create` returns. Wait a minute and retry; if it still fails consistently, verify the role with `az role assignment list --assignee --scope -o table`. | +| Windows: `UnicodeEncodeError: 'charmap' codec can't encode character '\U0001f60a'` printing the model's response | The Foundry sample apps happily return emoji and other non-CP1252 characters; the default Windows console (cp1252) can't render them. Either set `$env:PYTHONIOENCODING = "utf-8"` before running, or switch the console to UTF-8 with `chcp 65001`. The Python samples already handle this gracefully, but third-party tooling may not. | +| `check_claude_quota.py` exits with `Could not resolve a subscription id ... [WinError 2] The system cannot find the file specified` | The script falls back to `az account show` to find a subscription, but the Azure CLI isn't on `PATH` in the active shell. Either set `$env:AZURE_SUBSCRIPTION_ID = ""` or pass `--subscription ` explicitly. |
Why modelProviderData matters @@ -169,22 +353,188 @@ The Terraform variant uses `azapi_resource` for both the Foundry account and the
+
+Preprovision preflight: Marketplace catalog & quota + +Both IaC variants run [`scripts/preflight-claude.ps1`](./scripts/preflight-claude.ps1) (with [`preflight-claude.sh`](./scripts/preflight-claude.sh) as a POSIX fallback) from the `preprovision` hook in `azure.yaml`, to give you a fast, descriptive error for the most common misconfigurations before `azd up` calls the Cognitive Services RP. + +What the preflight does, and does not, do: + +| Check | Behavior | +|---|---| +| `CLAUDE_ORGANIZATION_NAME` / `AZURE_LOCATION` set | Hard fail (exit 1) if missing. | +| Marketplace offer/plan resolves | Hard fail (exit 4) on 400 "offer not found" — catches `CLAUDE_MODEL_NAME` typos and unreleased SKUs. The script queries publisher `anthropic` with offer/plan naming `anthropic--offer` / `anthropic--plan-new`. | +| Marketplace agreement `properties.accepted == true` | Warns only. The Cognitive Services RP auto-signs the agreement during deployment on eligible subs, so an unsigned status is informational. Pre-accept manually if your sub blocks RP-initiated subscribes. | +| `az cognitiveservices usage list` quota headroom for the SKU | **Hard fail (exit 6)** if `currentValue + requested > limit`. This is the most common cause of deployment failures and the preflight blocks `azd up` early with an actionable message. | + +> **Why a quota check?** The Cognitive Services RP returns an opaque `400 715-123420 "An error occurred. Please reach out to support for additional assistance."` when there isn't enough TPM quota for the requested capacity. Worse, Terraform's `azapi_resource` skips ARM preflight validation, so the user sees this opaque code with no hint that quota is the cause. (Bicep / `az deployment group create` surface the real `InsufficientQuota` error.) The preflight catches the same condition before the deployment is even attempted, with a clear message and remediation instructions. + +Run it standalone any time: + +```powershell +$env:CLAUDE_ORGANIZATION_NAME = "Contoso" +$env:AZURE_LOCATION = "eastus2" +$env:CLAUDE_MODEL_NAME = "claude-sonnet-4-6" +$env:CLAUDE_SONNET_CAPACITY = "25" # default 50; lower if quota is tight +pwsh -File scripts/preflight-claude.ps1 +``` + +If the quota check fails, see what's used: + +```powershell +az cognitiveservices usage list -l eastus2 --query "[?contains(name.value,'claude-sonnet-4-6')].{quota:name.value, used:currentValue, limit:limit}" -o table +``` + +To list all Anthropic agreements (signed or not) visible on the active subscription: + +```powershell +$sub = az account show --query id -o tsv +az rest --method get --url "https://management.azure.com/subscriptions/$sub/providers/Microsoft.MarketplaceOrdering/agreements?api-version=2021-01-01" --query "value[?properties.publisher=='anthropic']" +``` + +To pre-accept explicitly (rarely needed thanks to the RP auto-accept; useful for restricted-subscription scenarios): + +```powershell +az term accept --publisher anthropic --product anthropic-claude-sonnet-4-6-offer --plan anthropic-claude-sonnet-4-6-plan-new +``` + +
+ +
+Free quota held by soft-deleted Cognitive Services accounts + +When you `azd down` (or otherwise delete) a Foundry / AIServices account, Azure does **not** immediately release the TPM quota it reserved. The account moves to a *soft-deleted* state and **continues to count against your per-model quota** for up to 48 hours, after which it is permanently purged automatically. + +In day-to-day testing — where you may create and destroy several Foundry accounts in the same region in quick succession — this is the most common cause of "quota looks full but I have no live deployments" failures (which surface as opaque `715-123420` from Terraform or `InsufficientQuota` from Bicep). + +**List soft-deleted accounts in the active subscription:** + +```powershell +az cognitiveservices account list-deleted --query "[].{name:name, location:location, deletionDate:properties.deletionDate}" -o table +``` + +**Purge them one at a time** (the original RG name is part of the deleted-account id and must be passed verbatim — the RG itself does not have to still exist): + +```powershell +az cognitiveservices account purge ` + --name ` + --location ` + --resource-group +``` + +**Purge all of them in parallel** (faster — each purge is a slow LRO): + +```powershell +$accounts = az cognitiveservices account list-deleted -o json | ConvertFrom-Json +$jobs = foreach ($a in $accounts) { + $rg = ($a.id -split '/')[8] # /subscriptions//providers/Microsoft.CognitiveServices/locations//resourceGroups//deletedAccounts/ + Start-Job -ScriptBlock { + param($n,$l,$r) + az cognitiveservices account purge --name $n --location $l --resource-group $r + } -ArgumentList $a.name, $a.location, $rg +} +$jobs | Wait-Job | Receive-Job +$jobs | Remove-Job +``` + +POSIX equivalent: + +```bash +az cognitiveservices account list-deleted -o tsv \ + --query "[].[name, location, id]" | while IFS=$'\t' read -r name location id; do + rg=$(echo "$id" | awk -F'/' '{print $9}') + az cognitiveservices account purge --name "$name" --location "$location" --resource-group "$rg" & +done +wait +``` + +After all purges complete, re-check quota: + +```powershell +az cognitiveservices usage list -l --query "[?contains(name.value,'claude-')]" -o table +``` + +
+ +
+Advanced: check Claude quota & capacity programmatically + +[`src/check_claude_quota.py`](./src/check_claude_quota.py) queries the Azure Resource Manager APIs documented for Foundry quota — the [Usages API](https://learn.microsoft.com/azure/foundry/openai/how-to/quota?tabs=python#programmatically-check-quota-and-capacity) and the Model Capacities API — and prints a single merged table keyed on `(model, region)` with TPM utilization, derived RPM limits, deployable capacity, and model version. + +Requirements: + +- Caller authenticated via `az login` / `azd auth login` (or any other `DefaultAzureCredential` source). +- `Cognitive Services Usages Reader` (or `Reader`) at subscription scope. Without it, the calls return `403`. +- The subscription must be Enterprise or MCA-E for Claude quota lines to appear (per the [official prerequisites](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/use-foundry-models-claude#prerequisites)). + +Run it: + +```powershell +python src/check_claude_quota.py # current subscription, default regions +python src/check_claude_quota.py --regions eastus2 swedencentral # explicit regions +python src/check_claude_quota.py --subscription --tenant +python src/check_claude_quota.py --json # machine-readable +``` + +Flags: + +| Flag | Default | Notes | +|---|---|---| +| `--subscription` | current `az` subscription / `AZURE_SUBSCRIPTION_ID` | Subscription to query. | +| `--tenant` | caller's home tenant | Use when the subscription lives in a different tenant. Auth chain becomes `AzureCliCredential` + `AzureDeveloperCliCredential` scoped to that tenant. | +| `--regions` | `eastus2 swedencentral` | Regions to query for usages. | +| `--models` | all known Claude models | Filter capacity lookup. | +| `--json` | off | Emit raw JSON instead of the merged table. | + +Notes on the output: + +- **RPM is not a separate quota line** in the Usages API for Claude — only TPM is allocated. The `RPM Limit*` column is **derived** from the per-model RPM:TPM ratios published in the [Foundry Claude docs](https://learn.microsoft.com/azure/foundry/foundry-models/how-to/use-foundry-models-claude#api-quotas-and-limits) (e.g. Sonnet 4.5 ships at 2 RPM per 1 kTPM; everything else at 1:1). +- **TPM Limit values are reported in thousands** by the underlying API; the script multiplies by 1,000 so the table reads in raw tokens-per-minute. +- The **Model Capacities API requires `modelVersion`**, not just `modelName`. The script discovers active versions automatically from `locations/{region}/models` filtered to `format=Anthropic`. +- The `Def RPM` / `Def TPM` columns are the **public non-EA defaults** (always 0/0 because Claude is gated to Enterprise + MCA-E subscriptions); the `TPM Used` / `TPM Limit` / `RPM Limit*` / `Capacity` columns are the values your EA/MCA-E subscription is actually getting. + +
+ ## Required permissions | Action | Role | Scope | |---|---|---| | Provision Foundry + Claude deployment | `Contributor` (or `Cognitive Services Contributor`) | Resource group / subscription | | Assign RBAC inside this template (`ASSIGN_RBAC=true`) | `User Access Administrator` or `Owner` | Resource group / subscription | -| Call the Messages API with Entra ID | `Azure AI User` *(or `Azure AI Developer` — see note)* | Foundry account | +| Call the Messages API with Entra ID | `Foundry User` *(or `Azure AI Developer` — see note)* | Foundry account | If you do not have `Microsoft.Authorization/roleAssignments/write`, leave `ASSIGN_RBAC=false` (the default) and ask an admin to grant one of the roles below on the Foundry account afterwards. + + +**Granting data-plane roles after `azd up`** (one-liner if you own RBAC on the Foundry account): + +```powershell +$acct = (azd env get-value FOUNDRY_ACCOUNT_NAME) +$rg = (azd env get-value AZURE_RESOURCE_GROUP) +$oid = (az ad signed-in-user show --query id -o tsv) +$scope = "/subscriptions/$(az account show --query id -o tsv)/resourceGroups/$rg/providers/Microsoft.CognitiveServices/accounts/$acct" +az role assignment create --assignee-object-id $oid --assignee-principal-type User --role "Cognitive Services User" --scope $scope +``` + +POSIX equivalent: + +```bash +acct=$(azd env get-value FOUNDRY_ACCOUNT_NAME) +rg=$(azd env get-value AZURE_RESOURCE_GROUP) +oid=$(az ad signed-in-user show --query id -o tsv) +scope="/subscriptions/$(az account show --query id -o tsv)/resourceGroups/$rg/providers/Microsoft.CognitiveServices/accounts/$acct" +az role assignment create --assignee-object-id "$oid" --assignee-principal-type User --role "Cognitive Services User" --scope "$scope" +``` + +Wait 1–3 minutes for the role to propagate to the Foundry data plane before retrying — see the [intermittent 401 troubleshooting row](#troubleshooting). + **Roles that work for Claude inference:** | Role | Data action(s) | Notes | |---|---|---| | `Cognitive Services User` | `Microsoft.CognitiveServices/*/read` + inference action | The minimum role recommended by [the official docs](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/use-foundry-models-claude#troubleshooting). | -| `Azure AI User` | `Microsoft.CognitiveServices/*` | Broadest data-plane access; what this template assigns when `ASSIGN_RBAC=true`. | +| `Foundry User` | `Microsoft.CognitiveServices/*` | Broadest data-plane access; what this template assigns when `ASSIGN_RBAC=true`. **Previously named `Azure AI User`** — Azure renamed it, GUID `53ca6127-db72-4b80-b1b0-d745d6d5456d` is unchanged. | | `Azure AI Developer` | includes `Microsoft.CognitiveServices/accounts/MaaS/*` | Sufficient for Claude because Claude routes through the **MaaS** data path as a partner/marketplace model. (It is **not** sufficient for first-party Foundry models that route through `accounts/AIServices/*`.) | > The role `Azure AI Developer` was historically called out as *insufficient* for Foundry inference. That guidance still applies to first-party `AIServices` models, but Claude/Anthropic deployments dispatch through `Microsoft.CognitiveServices/accounts/MaaS/*`, which `Azure AI Developer` already grants. Verified against `claude-sonnet-4-6` on `2025-10-01-preview`. diff --git a/infra-bicep/azure.yaml b/infra-bicep/azure.yaml index c4d40ed..d128fc5 100644 --- a/infra-bicep/azure.yaml +++ b/infra-bicep/azure.yaml @@ -10,15 +10,24 @@ hooks: preprovision: windows: shell: pwsh + run: pwsh -NoProfile -ExecutionPolicy Bypass -File ../scripts/preflight-claude.ps1 + posix: + shell: sh run: | - if (-not $env:CLAUDE_ORGANIZATION_NAME) { - Write-Error "CLAUDE_ORGANIZATION_NAME is required. Run: azd env set CLAUDE_ORGANIZATION_NAME 'Your Org'" - exit 1 - } + if command -v pwsh >/dev/null 2>&1; then + pwsh -NoProfile -File ../scripts/preflight-claude.ps1 + else + bash ../scripts/preflight-claude.sh + fi + postprovision: + windows: + shell: pwsh + run: pwsh -NoProfile -ExecutionPolicy Bypass -File ../scripts/configure-claude-code.ps1 posix: shell: sh run: | - if [ -z "$CLAUDE_ORGANIZATION_NAME" ]; then - echo "CLAUDE_ORGANIZATION_NAME is required. Run: azd env set CLAUDE_ORGANIZATION_NAME 'Your Org'" - exit 1 + if command -v pwsh >/dev/null 2>&1; then + pwsh -NoProfile -File ../scripts/configure-claude-code.ps1 + else + bash ../scripts/configure-claude-code.sh fi diff --git a/infra-bicep/infra/foundry.bicep b/infra-bicep/infra/foundry.bicep index 02e74e9..47c4726 100644 --- a/infra-bicep/infra/foundry.bicep +++ b/infra-bicep/infra/foundry.bicep @@ -1,12 +1,22 @@ -// Foundry account + project + Claude deployment + optional RBAC. +// Foundry account + project + per-family Claude deployments + optional RBAC. +// +// Each of haikuModel / sonnetModel / opusModel is independent. Empty string +// means "skip that family". The three deployments share the same Foundry +// account; the per-family capacity controls TPM allocation. param location string param tags object param accountName string param projectName string -param deploymentName string -param modelName string +param suffix string + +param haikuModel string +param sonnetModel string +param opusModel string +param haikuCapacity int +param sonnetCapacity int +param opusCapacity int param modelVersion string -param modelCapacity int + param claudeOrganizationName string param claudeCountryCode string param claudeIndustry string @@ -14,10 +24,19 @@ param principalId string param assignRbac string var rbacEnabled = toLower(assignRbac) == 'true' && !empty(principalId) +var nameSuffix = take(suffix, 6) + +// Pre-compute deployment names so outputs work even when a family is skipped. +var haikuDeploymentNameVar = empty(haikuModel) ? '' : '${haikuModel}-${nameSuffix}' +var sonnetDeploymentNameVar = empty(sonnetModel) ? '' : '${sonnetModel}-${nameSuffix}' +var opusDeploymentNameVar = empty(opusModel) ? '' : '${opusModel}-${nameSuffix}' // Built-in role definition IDs. -var azureAiUserRoleId = '53ca6127-db72-4b80-b1b0-d745d6d5456d' -var azureAiProjectManagerRoleId = 'eadc314b-1a2d-4efa-be10-5d325db5065e' +// NOTE: Azure renamed these roles. The GUIDs are stable. +// 53ca6127-... : "Azure AI User" -> "Foundry User" (data-plane access) +// eadc314b-... : "Azure AI Project Manager" -> "Foundry Project Manager" +var foundryUserRoleId = '53ca6127-db72-4b80-b1b0-d745d6d5456d' +var foundryProjectManagerRoleId = 'eadc314b-1a2d-4efa-be10-5d325db5065e' resource account 'Microsoft.CognitiveServices/accounts@2025-10-01-preview' = { name: accountName @@ -49,21 +68,74 @@ resource project 'Microsoft.CognitiveServices/accounts/projects@2025-10-01-previ properties: {} } -resource claudeDeployment 'Microsoft.CognitiveServices/accounts/deployments@2025-10-01-preview' = { +resource haikuDeployment 'Microsoft.CognitiveServices/accounts/deployments@2025-10-01-preview' = if (!empty(haikuModel)) { + parent: account + name: haikuDeploymentNameVar + sku: { + name: 'GlobalStandard' + capacity: haikuCapacity + } + properties: { + model: { + format: 'Anthropic' + name: haikuModel + version: modelVersion + } + modelProviderData: { + organizationName: claudeOrganizationName + countryCode: claudeCountryCode + industry: claudeIndustry + } + versionUpgradeOption: 'OnceNewDefaultVersionAvailable' + raiPolicyName: 'Microsoft.DefaultV2' + } + dependsOn: [ + project + ] +} + +resource sonnetDeployment 'Microsoft.CognitiveServices/accounts/deployments@2025-10-01-preview' = if (!empty(sonnetModel)) { + parent: account + name: sonnetDeploymentNameVar + sku: { + name: 'GlobalStandard' + capacity: sonnetCapacity + } + properties: { + model: { + format: 'Anthropic' + name: sonnetModel + version: modelVersion + } + modelProviderData: { + organizationName: claudeOrganizationName + countryCode: claudeCountryCode + industry: claudeIndustry + } + versionUpgradeOption: 'OnceNewDefaultVersionAvailable' + raiPolicyName: 'Microsoft.DefaultV2' + } + // Foundry serializes deployments under one account; chain them to avoid + // 409s on concurrent create. + dependsOn: [ + project + haikuDeployment + ] +} + +resource opusDeployment 'Microsoft.CognitiveServices/accounts/deployments@2025-10-01-preview' = if (!empty(opusModel)) { parent: account - name: deploymentName + name: opusDeploymentNameVar sku: { name: 'GlobalStandard' - capacity: modelCapacity + capacity: opusCapacity } properties: { model: { - // `Anthropic` is the on-the-wire format literal in the Foundry catalog. format: 'Anthropic' - name: modelName + name: opusModel version: modelVersion } - // REQUIRED for Claude. `industry` must be lowercase. modelProviderData: { organizationName: claudeOrganizationName countryCode: claudeCountryCode @@ -74,24 +146,25 @@ resource claudeDeployment 'Microsoft.CognitiveServices/accounts/deployments@2025 } dependsOn: [ project + sonnetDeployment ] } -resource aiUserAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (rbacEnabled) { - name: guid(account.id, principalId, azureAiUserRoleId) +resource foundryUserAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (rbacEnabled) { + name: guid(account.id, principalId, foundryUserRoleId) scope: account properties: { - roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', azureAiUserRoleId) + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', foundryUserRoleId) principalId: principalId principalType: 'User' } } -resource aiProjectManagerAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (rbacEnabled) { - name: guid(account.id, principalId, azureAiProjectManagerRoleId) +resource foundryProjectManagerAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (rbacEnabled) { + name: guid(account.id, principalId, foundryProjectManagerRoleId) scope: account properties: { - roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', azureAiProjectManagerRoleId) + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', foundryProjectManagerRoleId) principalId: principalId principalType: 'User' } @@ -99,5 +172,7 @@ resource aiProjectManagerAssignment 'Microsoft.Authorization/roleAssignments@202 output claudeBaseUrl string = 'https://${account.name}.services.ai.azure.com/anthropic' output foundryProjectEndpoint string = 'https://${account.name}.services.ai.azure.com/api/projects/${project.name}' -output claudeDeploymentName string = claudeDeployment.name output foundryAccountName string = account.name +output haikuDeploymentName string = haikuDeploymentNameVar +output sonnetDeploymentName string = sonnetDeploymentNameVar +output opusDeploymentName string = opusDeploymentNameVar diff --git a/infra-bicep/infra/main.bicep b/infra-bicep/infra/main.bicep index 478ed64..e960424 100644 --- a/infra-bicep/infra/main.bicep +++ b/infra-bicep/infra/main.bicep @@ -5,13 +5,19 @@ // MUST be lowercase to match the Foundry portal dropdown. // `allowProjectManagement = true` is required to create projects under the // Foundry account. +// +// Per-family deployment mode: +// Set any of CLAUDE_HAIKU_MODEL / CLAUDE_SONNET_MODEL / CLAUDE_OPUS_MODEL to +// deploy that family (empty = skip). Each family gets its own capacity var. +// If all three family vars are empty, falls back to legacy CLAUDE_MODEL_NAME +// single-deployment behavior. // ============================================================================ targetScope = 'subscription' @description('azd environment name. Used for resource group + tagging.') param environmentName string -@description('Azure region. Claude in Foundry: eastus2 or swedencentral (or westus2 for opus).') +@description('Azure region. All three families coexist in eastus2 or swedencentral.') @allowed([ 'eastus2' 'swedencentral' @@ -22,25 +28,38 @@ param location string @description('Object id of the deploying user/SP. Empty disables RBAC.') param principalId string = '' -@description('Whether to assign Azure AI User / Project Manager to principalId. Set to "true" to enable.') +@description('Whether to assign Foundry User + Foundry Project Manager (formerly Azure AI User / Project Manager) to principalId. Set to "true" to enable.') param assignRbac string = 'false' @description('Short prefix for resource names.') param baseName string = 'claude' -@allowed([ - 'claude-haiku-4-5' - 'claude-sonnet-4-5' - 'claude-sonnet-4-6' - 'claude-opus-4-1' - 'claude-opus-4-5' - 'claude-opus-4-6' - 'claude-opus-4-7' -]) -param modelName string = 'claude-sonnet-4-6' +// --- Per-family model selection (preferred) --------------------------------- +@description('Haiku family model id. Empty = do not deploy haiku.') +param haikuModel string = '' +@description('Sonnet family model id. Empty = do not deploy sonnet.') +param sonnetModel string = '' +@description('Opus family model id. Empty = do not deploy opus.') +param opusModel string = '' + +@description('Haiku deployment capacity (TPM / 1000). Default 25 is a low-risk value that fits most subscriptions; raise via `azd env set CLAUDE_HAIKU_CAPACITY ` when quota allows.') +param haikuCapacity int = 25 +@description('Sonnet deployment capacity (TPM / 1000). Default 25 is a low-risk value that fits most subscriptions; raise via `azd env set CLAUDE_SONNET_CAPACITY ` when quota allows.') +param sonnetCapacity int = 25 +@description('Opus deployment capacity (TPM / 1000). Default 25 is a low-risk value that fits most subscriptions; raise via `azd env set CLAUDE_OPUS_CAPACITY ` when quota allows.') +param opusCapacity int = 25 + +@description('Model version for each family deployment.') param modelVersion string = '1' -param modelCapacity int = 50 +// --- Legacy single-model fallback ------------------------------------------- +// Only used when none of haikuModel / sonnetModel / opusModel are set. +@description('Legacy single-model name. Ignored when any of the per-family vars are set.') +param modelName string = 'claude-sonnet-4-6' +@description('Legacy single-model capacity. Ignored when any of the per-family vars are set.') +param modelCapacity int = 25 + +// --- modelProviderData ------------------------------------------------------ @description('Organization name surfaced via modelProviderData.') param claudeOrganizationName string @description('Two-letter ISO country code.') @@ -67,7 +86,21 @@ var tags = { var suffix = take(uniqueString(subscription().id, environmentName), 8) var accountName = '${baseName}-foundry-${suffix}' var projectName = '${baseName}-proj-${suffix}' -var deploymentName = '${modelName}-${take(suffix, 6)}' + +// Resolve effective per-family models. If no family vars are set, route the +// legacy modelName into its matching slot for back-compat. +var anyFamilySet = !empty(haikuModel) || !empty(sonnetModel) || !empty(opusModel) +var legacyLower = toLower(modelName) +var legacyIsHaiku = contains(legacyLower, 'haiku') +var legacyIsSonnet = contains(legacyLower, 'sonnet') +var legacyIsOpus = contains(legacyLower, 'opus') + +var effectiveHaikuModel = anyFamilySet ? haikuModel : (legacyIsHaiku ? modelName : '') +var effectiveSonnetModel = anyFamilySet ? sonnetModel : (legacyIsSonnet ? modelName : '') +var effectiveOpusModel = anyFamilySet ? opusModel : (legacyIsOpus ? modelName : '') +var effectiveHaikuCapacity = anyFamilySet ? haikuCapacity : modelCapacity +var effectiveSonnetCapacity = anyFamilySet ? sonnetCapacity : modelCapacity +var effectiveOpusCapacity = anyFamilySet ? opusCapacity : modelCapacity resource rg 'Microsoft.Resources/resourceGroups@2024-03-01' = { name: 'rg-${environmentName}' @@ -83,10 +116,14 @@ module foundry 'foundry.bicep' = { tags: tags accountName: accountName projectName: projectName - deploymentName: deploymentName - modelName: modelName + suffix: suffix + haikuModel: effectiveHaikuModel + sonnetModel: effectiveSonnetModel + opusModel: effectiveOpusModel + haikuCapacity: effectiveHaikuCapacity + sonnetCapacity: effectiveSonnetCapacity + opusCapacity: effectiveOpusCapacity modelVersion: modelVersion - modelCapacity: modelCapacity claudeOrganizationName: claudeOrganizationName claudeCountryCode: claudeCountryCode claudeIndustry: claudeIndustry @@ -97,7 +134,15 @@ module foundry 'foundry.bicep' = { output CLAUDE_BASE_URL string = foundry.outputs.claudeBaseUrl output FOUNDRY_PROJECT_ENDPOINT string = foundry.outputs.foundryProjectEndpoint -output CLAUDE_DEPLOYMENT_NAME string = foundry.outputs.claudeDeploymentName output FOUNDRY_ACCOUNT_NAME string = foundry.outputs.foundryAccountName output AZURE_RESOURCE_GROUP string = rg.name output AZURE_LOCATION string = location + +// Per-family deployment names. Empty string when that family wasn't deployed. +output CLAUDE_HAIKU_DEPLOYMENT_NAME string = foundry.outputs.haikuDeploymentName +output CLAUDE_SONNET_DEPLOYMENT_NAME string = foundry.outputs.sonnetDeploymentName +output CLAUDE_OPUS_DEPLOYMENT_NAME string = foundry.outputs.opusDeploymentName + +// Legacy single-deployment-name output. Set to the first non-empty family +// deployment so older configure-claude-code scripts continue to work. +output CLAUDE_DEPLOYMENT_NAME string = !empty(foundry.outputs.sonnetDeploymentName) ? foundry.outputs.sonnetDeploymentName : (!empty(foundry.outputs.opusDeploymentName) ? foundry.outputs.opusDeploymentName : foundry.outputs.haikuDeploymentName) diff --git a/infra-bicep/infra/main.parameters.json b/infra-bicep/infra/main.parameters.json index 410c86c..1717aae 100644 --- a/infra-bicep/infra/main.parameters.json +++ b/infra-bicep/infra/main.parameters.json @@ -7,6 +7,12 @@ "principalId": { "value": "${AZURE_PRINCIPAL_ID=}" }, "assignRbac": { "value": "${ASSIGN_RBAC=false}" }, "baseName": { "value": "${AZURE_BASE_NAME=claude}" }, + "haikuModel": { "value": "${CLAUDE_HAIKU_MODEL=}" }, + "sonnetModel": { "value": "${CLAUDE_SONNET_MODEL=}" }, + "opusModel": { "value": "${CLAUDE_OPUS_MODEL=}" }, + "haikuCapacity": { "value": "${CLAUDE_HAIKU_CAPACITY=50}" }, + "sonnetCapacity": { "value": "${CLAUDE_SONNET_CAPACITY=50}" }, + "opusCapacity": { "value": "${CLAUDE_OPUS_CAPACITY=50}" }, "modelName": { "value": "${CLAUDE_MODEL_NAME=claude-sonnet-4-6}" }, "modelVersion": { "value": "${CLAUDE_MODEL_VERSION=1}" }, "modelCapacity": { "value": "${CLAUDE_MODEL_CAPACITY=50}" }, diff --git a/infra-terraform/azure.yaml b/infra-terraform/azure.yaml index ed1f0c6..5e5cafc 100644 --- a/infra-terraform/azure.yaml +++ b/infra-terraform/azure.yaml @@ -10,15 +10,24 @@ hooks: preprovision: windows: shell: pwsh + run: pwsh -NoProfile -ExecutionPolicy Bypass -File ../scripts/preflight-claude.ps1 + posix: + shell: sh run: | - if (-not $env:CLAUDE_ORGANIZATION_NAME) { - Write-Error "CLAUDE_ORGANIZATION_NAME is required. Run: azd env set CLAUDE_ORGANIZATION_NAME 'Your Org'" - exit 1 - } + if command -v pwsh >/dev/null 2>&1; then + pwsh -NoProfile -File ../scripts/preflight-claude.ps1 + else + bash ../scripts/preflight-claude.sh + fi + postprovision: + windows: + shell: pwsh + run: pwsh -NoProfile -ExecutionPolicy Bypass -File ../scripts/configure-claude-code.ps1 posix: shell: sh run: | - if [ -z "$CLAUDE_ORGANIZATION_NAME" ]; then - echo "CLAUDE_ORGANIZATION_NAME is required. Run: azd env set CLAUDE_ORGANIZATION_NAME 'Your Org'" - exit 1 + if command -v pwsh >/dev/null 2>&1; then + pwsh -NoProfile -File ../scripts/configure-claude-code.ps1 + else + bash ../scripts/configure-claude-code.sh fi diff --git a/infra-terraform/infra/main.tf b/infra-terraform/infra/main.tf index cf195b4..38c7730 100644 --- a/infra-terraform/infra/main.tf +++ b/infra-terraform/infra/main.tf @@ -4,8 +4,11 @@ # - Foundry account is created via `azapi_resource` so we can set # `allowProjectManagement = true` (required for child projects, not yet # exposed by `azurerm_cognitive_account`). -# - Claude deployment is also `azapi_resource` because `modelProviderData` +# - Claude deployments are also `azapi_resource` because `modelProviderData` # isn't yet exposed by `azurerm_cognitive_deployment` (issue #31140). +# - Per-family deployment mode: set any of haiku_model / sonnet_model / +# opus_model to deploy that family (empty = skip). All three families +# share one Foundry account. # ============================================================================ locals { @@ -14,6 +17,22 @@ locals { } account_name = "${var.base_name}-foundry-${random_string.suffix.result}" project_name = "${var.base_name}-proj-${random_string.suffix.result}" + name_suffix = substr(random_string.suffix.result, 0, 6) + + # Resolve effective per-family models. If no family vars are set, route the + # legacy model_name into its matching slot for back-compat. + any_family_set = var.haiku_model != "" || var.sonnet_model != "" || var.opus_model != "" + legacy_lower = lower(var.model_name) + legacy_is_haiku = strcontains(local.legacy_lower, "haiku") + legacy_is_sonnet = strcontains(local.legacy_lower, "sonnet") + legacy_is_opus = strcontains(local.legacy_lower, "opus") + + effective_haiku_model = local.any_family_set ? var.haiku_model : (local.legacy_is_haiku ? var.model_name : "") + effective_sonnet_model = local.any_family_set ? var.sonnet_model : (local.legacy_is_sonnet ? var.model_name : "") + effective_opus_model = local.any_family_set ? var.opus_model : (local.legacy_is_opus ? var.model_name : "") + effective_haiku_capacity = local.any_family_set ? tonumber(var.haiku_capacity) : tonumber(var.model_capacity) + effective_sonnet_capacity = local.any_family_set ? tonumber(var.sonnet_capacity) : tonumber(var.model_capacity) + effective_opus_capacity = local.any_family_set ? tonumber(var.opus_capacity) : tonumber(var.model_capacity) } resource "random_string" "suffix" { @@ -76,26 +95,28 @@ resource "azapi_resource" "project" { response_export_values = ["name"] } -# --- Claude deployment ---------------------------------------------------- -resource "azapi_resource" "claude" { +# --- Per-family Claude deployments ---------------------------------------- +# Each family is conditional on its model var being non-empty. Sonnet and Opus +# chain on the prior deployment to avoid Foundry's per-account serialization +# 409s on concurrent create. +resource "azapi_resource" "claude_haiku" { + count = local.effective_haiku_model == "" ? 0 : 1 type = "Microsoft.CognitiveServices/accounts/deployments@2025-10-01-preview" - name = "${var.model_name}-${substr(random_string.suffix.result, 0, 6)}" + name = "${local.effective_haiku_model}-${local.name_suffix}" parent_id = azapi_resource.foundry.id schema_validation_enabled = false # required to allow modelProviderData body = { sku = { name = "GlobalStandard" - capacity = tonumber(var.model_capacity) + capacity = local.effective_haiku_capacity } properties = { model = { - # `Anthropic` is the on-the-wire format literal in the Foundry catalog. format = "Anthropic" - name = var.model_name + name = local.effective_haiku_model version = var.model_version } - # REQUIRED for Claude. `industry` MUST be lowercase. modelProviderData = { organizationName = var.claude_organization_name countryCode = var.claude_country_code @@ -107,24 +128,87 @@ resource "azapi_resource" "claude" { } response_export_values = ["name"] + depends_on = [azapi_resource.project] +} + +resource "azapi_resource" "claude_sonnet" { + count = local.effective_sonnet_model == "" ? 0 : 1 + type = "Microsoft.CognitiveServices/accounts/deployments@2025-10-01-preview" + name = "${local.effective_sonnet_model}-${local.name_suffix}" + parent_id = azapi_resource.foundry.id + schema_validation_enabled = false - depends_on = [azapi_resource.project] + body = { + sku = { + name = "GlobalStandard" + capacity = local.effective_sonnet_capacity + } + properties = { + model = { + format = "Anthropic" + name = local.effective_sonnet_model + version = var.model_version + } + modelProviderData = { + organizationName = var.claude_organization_name + countryCode = var.claude_country_code + industry = var.claude_industry + } + versionUpgradeOption = "OnceNewDefaultVersionAvailable" + raiPolicyName = "Microsoft.DefaultV2" + } + } + + response_export_values = ["name"] + depends_on = [azapi_resource.project, azapi_resource.claude_haiku] +} + +resource "azapi_resource" "claude_opus" { + count = local.effective_opus_model == "" ? 0 : 1 + type = "Microsoft.CognitiveServices/accounts/deployments@2025-10-01-preview" + name = "${local.effective_opus_model}-${local.name_suffix}" + parent_id = azapi_resource.foundry.id + schema_validation_enabled = false + + body = { + sku = { + name = "GlobalStandard" + capacity = local.effective_opus_capacity + } + properties = { + model = { + format = "Anthropic" + name = local.effective_opus_model + version = var.model_version + } + modelProviderData = { + organizationName = var.claude_organization_name + countryCode = var.claude_country_code + industry = var.claude_industry + } + versionUpgradeOption = "OnceNewDefaultVersionAvailable" + raiPolicyName = "Microsoft.DefaultV2" + } + } + + response_export_values = ["name"] + depends_on = [azapi_resource.project, azapi_resource.claude_sonnet] } # --- Optional RBAC -------------------------------------------------------- -# Set `assign_rbac = true` (via azd: `azd env set ASSIGN_RBAC true`) to grant -# Azure AI User + Azure AI Project Manager to `principal_id`. Requires the -# deployer to have Microsoft.Authorization/roleAssignments/write. -resource "azurerm_role_assignment" "ai_user" { +# NOTE: Azure renamed "Azure AI User" -> "Foundry User" and +# "Azure AI Project Manager" -> "Foundry Project Manager". The GUIDs are +# unchanged, but azurerm matches by name, so we use the current names. +resource "azurerm_role_assignment" "foundry_user" { count = lower(var.assign_rbac) == "true" && var.principal_id != "" ? 1 : 0 scope = azapi_resource.foundry.id - role_definition_name = "Azure AI User" + role_definition_name = "Foundry User" principal_id = var.principal_id } -resource "azurerm_role_assignment" "ai_project_manager" { +resource "azurerm_role_assignment" "foundry_project_manager" { count = lower(var.assign_rbac) == "true" && var.principal_id != "" ? 1 : 0 scope = azapi_resource.foundry.id - role_definition_name = "Azure AI Project Manager" + role_definition_name = "Foundry Project Manager" principal_id = var.principal_id } diff --git a/infra-terraform/infra/main.tfvars.json b/infra-terraform/infra/main.tfvars.json index 0e7d4ca..d506adb 100644 --- a/infra-terraform/infra/main.tfvars.json +++ b/infra-terraform/infra/main.tfvars.json @@ -5,6 +5,12 @@ "principal_id": "${AZURE_PRINCIPAL_ID=}", "assign_rbac": "${ASSIGN_RBAC=false}", "base_name": "${AZURE_BASE_NAME=claude}", + "haiku_model": "${CLAUDE_HAIKU_MODEL=}", + "sonnet_model": "${CLAUDE_SONNET_MODEL=}", + "opus_model": "${CLAUDE_OPUS_MODEL=}", + "haiku_capacity": "${CLAUDE_HAIKU_CAPACITY=50}", + "sonnet_capacity": "${CLAUDE_SONNET_CAPACITY=50}", + "opus_capacity": "${CLAUDE_OPUS_CAPACITY=50}", "model_name": "${CLAUDE_MODEL_NAME=claude-sonnet-4-6}", "model_version": "${CLAUDE_MODEL_VERSION=1}", "model_capacity": "${CLAUDE_MODEL_CAPACITY=50}", diff --git a/infra-terraform/infra/outputs.tf b/infra-terraform/infra/outputs.tf index e104823..b21002b 100644 --- a/infra-terraform/infra/outputs.tf +++ b/infra-terraform/infra/outputs.tf @@ -8,11 +8,6 @@ output "FOUNDRY_PROJECT_ENDPOINT" { value = "https://${azapi_resource.foundry.name}.services.ai.azure.com/api/projects/${azapi_resource.project.name}" } -output "CLAUDE_DEPLOYMENT_NAME" { - description = "Pass this as the `model` parameter in Messages API calls." - value = azapi_resource.claude.name -} - output "FOUNDRY_ACCOUNT_NAME" { value = azapi_resource.foundry.name } @@ -24,3 +19,30 @@ output "AZURE_RESOURCE_GROUP" { output "AZURE_LOCATION" { value = var.location } + +# --- Per-family deployment names. Empty when that family wasn't deployed. --- +output "CLAUDE_HAIKU_DEPLOYMENT_NAME" { + description = "Deployment name for the haiku family (empty if not deployed)." + value = length(azapi_resource.claude_haiku) > 0 ? azapi_resource.claude_haiku[0].name : "" +} + +output "CLAUDE_SONNET_DEPLOYMENT_NAME" { + description = "Deployment name for the sonnet family (empty if not deployed)." + value = length(azapi_resource.claude_sonnet) > 0 ? azapi_resource.claude_sonnet[0].name : "" +} + +output "CLAUDE_OPUS_DEPLOYMENT_NAME" { + description = "Deployment name for the opus family (empty if not deployed)." + value = length(azapi_resource.claude_opus) > 0 ? azapi_resource.claude_opus[0].name : "" +} + +# --- Legacy single-deployment-name output for back-compat with older +# configure-claude-code scripts. Picks sonnet > opus > haiku as priority. --- +output "CLAUDE_DEPLOYMENT_NAME" { + description = "Legacy single-deployment name. Set to the first non-empty family deployment." + value = length(azapi_resource.claude_sonnet) > 0 ? azapi_resource.claude_sonnet[0].name : ( + length(azapi_resource.claude_opus) > 0 ? azapi_resource.claude_opus[0].name : ( + length(azapi_resource.claude_haiku) > 0 ? azapi_resource.claude_haiku[0].name : "" + ) + ) +} diff --git a/infra-terraform/infra/variables.tf b/infra-terraform/infra/variables.tf index 1eccca4..2909419 100644 --- a/infra-terraform/infra/variables.tf +++ b/infra-terraform/infra/variables.tf @@ -7,11 +7,11 @@ variable "environment_name" { } variable "location" { - description = "Azure region. Claude in Foundry: eastus2 or swedencentral (or westus2 for opus-only)." + description = "Azure region. All three families coexist in eastus2 or swedencentral." type = string validation { condition = contains(["eastus2", "swedencentral", "westus2"], var.location) - error_message = "location must be eastus2, swedencentral, or westus2 (opus only)." + error_message = "location must be eastus2, swedencentral, or westus2." } } @@ -27,7 +27,7 @@ variable "principal_id" { } variable "assign_rbac" { - description = "Whether to assign Azure AI User / Project Manager to principal_id. Set to \"true\" to enable. Requires Microsoft.Authorization/roleAssignments/write on the deployer." + description = "Whether to assign Foundry User + Foundry Project Manager (formerly Azure AI User / Project Manager) to principal_id. Set to \"true\" to enable. Requires Microsoft.Authorization/roleAssignments/write on the deployer." type = string default = "false" } @@ -42,24 +42,42 @@ variable "base_name" { } # --------------------------------------------------------------------------- -# Claude model +# Per-family Claude deployments (preferred). Empty string = skip that family. # --------------------------------------------------------------------------- -variable "model_name" { - description = "Claude model id." +variable "haiku_model" { + description = "Haiku family model id. Empty = do not deploy haiku." type = string - default = "claude-sonnet-4-6" - validation { - condition = contains([ - "claude-haiku-4-5", - "claude-sonnet-4-5", - "claude-sonnet-4-6", - "claude-opus-4-1", - "claude-opus-4-5", - "claude-opus-4-6", - "claude-opus-4-7", - ], var.model_name) - error_message = "Unsupported Claude model." - } + default = "" +} + +variable "sonnet_model" { + description = "Sonnet family model id. Empty = do not deploy sonnet." + type = string + default = "" +} + +variable "opus_model" { + description = "Opus family model id. Empty = do not deploy opus." + type = string + default = "" +} + +variable "haiku_capacity" { + description = "Haiku deployment capacity (TPM / 1000). Default 25 fits most subs out of the box; raise via `azd env set CLAUDE_HAIKU_CAPACITY `." + type = string + default = "25" +} + +variable "sonnet_capacity" { + description = "Sonnet deployment capacity (TPM / 1000). Default 25 fits most subs out of the box; raise via `azd env set CLAUDE_SONNET_CAPACITY `." + type = string + default = "25" +} + +variable "opus_capacity" { + description = "Opus deployment capacity (TPM / 1000). Default 25 fits most subs out of the box; raise via `azd env set CLAUDE_OPUS_CAPACITY `." + type = string + default = "25" } variable "model_version" { @@ -68,10 +86,20 @@ variable "model_version" { default = "1" } +# --------------------------------------------------------------------------- +# Legacy single-model fallback. Only used when all three per-family vars +# are empty. +# --------------------------------------------------------------------------- +variable "model_name" { + description = "Legacy single-model name. Ignored when any of haiku_model/sonnet_model/opus_model is set." + type = string + default = "claude-sonnet-4-6" +} + variable "model_capacity" { - description = "Deployment capacity (TPM / 1000). Sent as string from azd, converted to number." + description = "Legacy single-model capacity. Ignored when any per-family var is set." type = string - default = "50" + default = "25" } # --------------------------------------------------------------------------- diff --git a/scripts/configure-claude-code.ps1 b/scripts/configure-claude-code.ps1 new file mode 100644 index 0000000..bfe1263 --- /dev/null +++ b/scripts/configure-claude-code.ps1 @@ -0,0 +1,311 @@ +<# +.SYNOPSIS + Configure Claude Code CLI for Microsoft Foundry after `azd up`. + +.DESCRIPTION + Designed to be invoked from the `postprovision` hook in `azure.yaml`. + Reads the per-family deployment outputs from `azd env get-values` and + wires up Claude Code so the user can immediately run `claude`: + + 1. Writes a project-scoped activator at the repo root: + claude-code.env.ps1 (PowerShell) + claude-code.env.sh (Bash / WSL) + containing ANTHROPIC_DEFAULT__MODEL for each non-empty + family deployment (haiku / sonnet / opus). Documented at: + https://learn.microsoft.com/azure/foundry/foundry-models/how-to/configure-claude-code + + 2. Writes (or merges into) `.vscode/settings.json` with + `claudeCode.environmentVariables` and `claudeCode.disableLoginPrompt`. + + 3. Detects whether the `claude` CLI is on PATH. If not, prints the + platform-appropriate install command. Set CLAUDE_CODE_AUTO_INSTALL=true + to run the official installer automatically. + + Works on PowerShell 7+ on Windows, Linux, and macOS. Safe to re-run. + +.NOTES + Exit codes: + 0 Configuration written. + 1 No deployment outputs found (provision didn't deploy any family). + 2 azd CLI not on PATH (when running standalone). +#> + +[CmdletBinding()] +param( + [string] $RepoRoot, + [switch] $SkipVsCodeSettings +) + +$ErrorActionPreference = 'Stop' + +function Fail([int]$code, [string]$message) { + Write-Host "" + Write-Host "ERROR: $message" -ForegroundColor Red + Write-Host "" + exit $code +} + +# --------------------------------------------------------------------------- +# Locate the repo root. +# --------------------------------------------------------------------------- +if (-not $RepoRoot) { + $here = Split-Path -Parent $PSCommandPath + $RepoRoot = Resolve-Path (Join-Path $here '..') | Select-Object -ExpandProperty Path +} +Write-Host "Configuring Claude Code: repo root '$RepoRoot'" + +# --------------------------------------------------------------------------- +# Resolve azd outputs. +# --------------------------------------------------------------------------- +$accountName = $env:FOUNDRY_ACCOUNT_NAME +$resourceGroup = $env:AZURE_RESOURCE_GROUP +$haikuDeploy = $env:CLAUDE_HAIKU_DEPLOYMENT_NAME +$sonnetDeploy = $env:CLAUDE_SONNET_DEPLOYMENT_NAME +$opusDeploy = $env:CLAUDE_OPUS_DEPLOYMENT_NAME +$legacyDeploy = $env:CLAUDE_DEPLOYMENT_NAME + +# When the trio outputs aren't in env (running standalone), parse azd env. +$needsAzd = -not $accountName -or + (-not $haikuDeploy -and -not $sonnetDeploy -and -not $opusDeploy -and -not $legacyDeploy) +if ($needsAzd) { + $azd = Get-Command azd -ErrorAction SilentlyContinue + if (-not $azd) { + Fail 2 "azd CLI not on PATH and required outputs not in env. Install azd or run from an azd-aware shell." + } + Write-Host "Reading outputs from 'azd env get-values'..." + $vals = & azd env get-values 2>$null + foreach ($line in $vals) { + if ($line -match '^(?[A-Z0-9_]+)="?(?.*?)"?$') { + switch ($Matches['k']) { + 'FOUNDRY_ACCOUNT_NAME' { if (-not $accountName) { $accountName = $Matches['v'] } } + 'AZURE_RESOURCE_GROUP' { if (-not $resourceGroup){ $resourceGroup= $Matches['v'] } } + 'CLAUDE_HAIKU_DEPLOYMENT_NAME' { if (-not $haikuDeploy) { $haikuDeploy = $Matches['v'] } } + 'CLAUDE_SONNET_DEPLOYMENT_NAME' { if (-not $sonnetDeploy) { $sonnetDeploy = $Matches['v'] } } + 'CLAUDE_OPUS_DEPLOYMENT_NAME' { if (-not $opusDeploy) { $opusDeploy = $Matches['v'] } } + 'CLAUDE_DEPLOYMENT_NAME' { if (-not $legacyDeploy) { $legacyDeploy = $Matches['v'] } } + } + } + } +} + +if (-not $accountName) { + Fail 1 "FOUNDRY_ACCOUNT_NAME not available. Has 'azd provision' completed?" +} + +# Build the list of (family, deployment) pairs that were actually deployed. +$deployments = @() +if ($haikuDeploy) { $deployments += [pscustomobject]@{ Family='HAIKU'; Deployment=$haikuDeploy } } +if ($sonnetDeploy) { $deployments += [pscustomobject]@{ Family='SONNET'; Deployment=$sonnetDeploy } } +if ($opusDeploy) { $deployments += [pscustomobject]@{ Family='OPUS'; Deployment=$opusDeploy } } + +if ($deployments.Count -eq 0) { + # Legacy single-deployment fallback: infer family from the model name baked + # into the deployment name (e.g. "claude-opus-4-6-abc123" → OPUS). + if (-not $legacyDeploy) { + Fail 1 "No family deployments and no legacy CLAUDE_DEPLOYMENT_NAME found. Has 'azd provision' completed?" + } + $lower = $legacyDeploy.ToLower() + $family = + if ($lower -like '*sonnet*') { 'SONNET' } + elseif ($lower -like '*haiku*') { 'HAIKU' } + elseif ($lower -like '*opus*') { 'OPUS' } + else { Fail 1 "Could not infer Claude family from deployment name '$legacyDeploy'." } + $deployments += [pscustomobject]@{ Family=$family; Deployment=$legacyDeploy } +} + +Write-Host " Foundry account : $accountName" +foreach ($d in $deployments) { + Write-Host (" {0,-18} : {1}" -f $d.Family, $d.Deployment) +} + +# --------------------------------------------------------------------------- +# 1. Write the PowerShell + Bash activator scripts at the repo root. +# --------------------------------------------------------------------------- +$ps1Path = Join-Path $RepoRoot 'claude-code.env.ps1' +$shPath = Join-Path $RepoRoot 'claude-code.env.sh' + +$ps1Lines = @( + "# Auto-generated by scripts/configure-claude-code.ps1 — safe to overwrite.", + "# Source me with: . ./claude-code.env.ps1", + "# Then run: claude", + "", + "# Scope 'az login' (and azd) to this workspace only — never touches ~/.azure", + "# and never leaks tokens into other VS Code windows or shells.", + "`$_claudeRoot = Split-Path -Parent `$MyInvocation.MyCommand.Path", + "`$env:AZURE_CONFIG_DIR = Join-Path `$_claudeRoot '.azure-cli'", + "if (-not (Test-Path `$env:AZURE_CONFIG_DIR)) { New-Item -ItemType Directory -Path `$env:AZURE_CONFIG_DIR -Force | Out-Null }", + "", + "`$env:CLAUDE_CODE_USE_FOUNDRY = '1'", + "`$env:ANTHROPIC_FOUNDRY_RESOURCE = '$accountName'" +) +foreach ($d in $deployments) { + $ps1Lines += "`$env:ANTHROPIC_DEFAULT_$($d.Family)_MODEL = '$($d.Deployment)'" +} +$ps1Lines += "" +$ps1Lines += "Write-Host `"Claude Code configured for Foundry resource '$accountName'.`" -ForegroundColor Green" +$ps1Lines += "Write-Host `"Azure CLI config scoped to: `$env:AZURE_CONFIG_DIR`" -ForegroundColor Green" +$ps1Lines += "Write-Host `"Authentication: Microsoft Entra ID via 'az login' (already done if 'azd up' succeeded).`" -ForegroundColor Green" + +$shLines = @( + "# Auto-generated by scripts/configure-claude-code.ps1 — safe to overwrite.", + "# Source me with: source ./claude-code.env.sh (or: . ./claude-code.env.sh)", + "# Then run: claude", + "", + "# Scope 'az login' (and azd) to this workspace only — never touches ~/.azure", + "# and never leaks tokens into other VS Code windows or shells.", + "_claude_root=`"`$(cd `"`$(dirname `"`${BASH_SOURCE[0]:-`$0}`")`" && pwd)`"", + "export AZURE_CONFIG_DIR=`"`$_claude_root/.azure-cli`"", + "mkdir -p `"`$AZURE_CONFIG_DIR`"", + "unset _claude_root", + "", + "export CLAUDE_CODE_USE_FOUNDRY=1", + "export ANTHROPIC_FOUNDRY_RESOURCE='$accountName'" +) +foreach ($d in $deployments) { + $shLines += "export ANTHROPIC_DEFAULT_$($d.Family)_MODEL='$($d.Deployment)'" +} +$shLines += "" +$shLines += "echo `"Claude Code configured for Foundry resource '$accountName'.`"" +$shLines += "echo `"Azure CLI config scoped to: `$AZURE_CONFIG_DIR`"" +$shLines += "echo `"Authentication: Microsoft Entra ID via 'az login' (already done if 'azd up' succeeded).`"" + +Set-Content -Path $ps1Path -Value ($ps1Lines -join [Environment]::NewLine) -Encoding utf8 +Set-Content -Path $shPath -Value ($shLines -join "`n") -Encoding utf8 -NoNewline:$false +Write-Host "Wrote activator: $ps1Path" +Write-Host "Wrote activator: $shPath" + +# --------------------------------------------------------------------------- +# 2. Write / merge `.vscode/settings.json` for the Claude Code VS Code extension. +# --------------------------------------------------------------------------- +if (-not $SkipVsCodeSettings) { + $vscodeDir = Join-Path $RepoRoot '.vscode' + $settingsPath = Join-Path $vscodeDir 'settings.json' + if (-not (Test-Path $vscodeDir)) { + New-Item -ItemType Directory -Path $vscodeDir -Force | Out-Null + } + + $existing = [ordered]@{} + if (Test-Path $settingsPath) { + try { + $raw = Get-Content -Raw -Path $settingsPath + if ($raw -and $raw.Trim()) { + $obj = $raw | ConvertFrom-Json -ErrorAction Stop + foreach ($p in $obj.PSObject.Properties) { + $existing[$p.Name] = $p.Value + } + } + } catch { + Write-Host "WARNING: Could not parse existing $settingsPath ($($_.Exception.Message)). Leaving it untouched." -ForegroundColor Yellow + $SkipVsCodeSettings = $true + } + } + + if (-not $SkipVsCodeSettings) { + # Use [ordered] hashtables per entry so name appears before value in + # the rendered JSON (PSCustomObject hashtable iteration is unordered). + $claudeEnv = @( + [ordered]@{ name = 'CLAUDE_CODE_USE_FOUNDRY'; value = '1' } + [ordered]@{ name = 'ANTHROPIC_FOUNDRY_RESOURCE'; value = $accountName } + ) + foreach ($d in $deployments) { + $claudeEnv += [ordered]@{ name = "ANTHROPIC_DEFAULT_$($d.Family)_MODEL"; value = $d.Deployment } + } + $existing['claudeCode.environmentVariables'] = $claudeEnv + $existing['claudeCode.disableLoginPrompt'] = $true + + # Scope 'az login' (and azd) to a workspace-local config dir so it + # never touches ~/.azure and never leaks tokens into other VS Code + # windows. Applies to every terminal VS Code spawns in this workspace. + $azCfgWin = [ordered]@{ AZURE_CONFIG_DIR = '${workspaceFolder}\.azure-cli' } + $azCfgPosix = [ordered]@{ AZURE_CONFIG_DIR = '${workspaceFolder}/.azure-cli' } + $existing['terminal.integrated.env.windows'] = $azCfgWin + $existing['terminal.integrated.env.linux'] = $azCfgPosix + $existing['terminal.integrated.env.osx'] = $azCfgPosix + + # Strip any stale display-title key from prior versions of this script. + if ($existing.Contains('Claude Code: Environment Variables')) { + $existing.Remove('Claude Code: Environment Variables') + } + ($existing | ConvertTo-Json -Depth 8) | Set-Content -Path $settingsPath -Encoding utf8 + Write-Host "Wrote VS Code settings: $settingsPath" + } +} + +# --------------------------------------------------------------------------- +# 3. Detect / optionally install the Claude Code CLI. +# --------------------------------------------------------------------------- +$claude = Get-Command claude -ErrorAction SilentlyContinue +$autoInstall = $env:CLAUDE_CODE_AUTO_INSTALL -and ($env:CLAUDE_CODE_AUTO_INSTALL -ne 'false' -and $env:CLAUDE_CODE_AUTO_INSTALL -ne '0') + +if ($claude) { + Write-Host "" + Write-Host "Claude Code CLI detected: $($claude.Source)" -ForegroundColor Green +} else { + Write-Host "" + Write-Host "Claude Code CLI not found on PATH." -ForegroundColor Yellow + + $onWindows = ($PSVersionTable.PSEdition -eq 'Desktop') -or ` + ($PSVersionTable.Platform -eq 'Win32NT') -or ` + ($env:OS -eq 'Windows_NT') + + if ($autoInstall) { + Write-Host "CLAUDE_CODE_AUTO_INSTALL is set — running the official installer..." + try { + if ($onWindows) { + Invoke-RestMethod -Uri 'https://claude.ai/install.ps1' | Invoke-Expression + } else { + & bash -c "curl -fsSL https://claude.ai/install.sh | bash" + } + $claude = Get-Command claude -ErrorAction SilentlyContinue + if ($claude) { + Write-Host "Claude Code installed: $($claude.Source)" -ForegroundColor Green + } else { + Write-Host "Install ran but 'claude' is still not on PATH. Open a new shell, or add the install dir to PATH." -ForegroundColor Yellow + } + } catch { + Write-Host "WARNING: auto-install failed ($($_.Exception.Message)). Install manually." -ForegroundColor Yellow + } + } else { + Write-Host "" + Write-Host "To install (one-time):" -ForegroundColor Cyan + if ($onWindows) { + Write-Host " irm https://claude.ai/install.ps1 | iex" + Write-Host " or in Git Bash / WSL:" + Write-Host " curl -fsSL https://claude.ai/install.sh | bash" + } else { + Write-Host " curl -fsSL https://claude.ai/install.sh | bash" + } + Write-Host "" + Write-Host "Or set CLAUDE_CODE_AUTO_INSTALL=true and re-run 'azd provision' to install automatically." + } +} + +# --------------------------------------------------------------------------- +# 4. Final next-step message. +# --------------------------------------------------------------------------- +Write-Host "" +Write-Host "=============================================================" -ForegroundColor Cyan +Write-Host " Claude Code is configured for Microsoft Foundry." -ForegroundColor Cyan +Write-Host "=============================================================" -ForegroundColor Cyan +Write-Host "" +Write-Host " Foundry resource : $accountName" +foreach ($d in $deployments) { + Write-Host (" {0,-16} : {1}" -f "$($d.Family) deployment", $d.Deployment) +} +if ($resourceGroup) { Write-Host " Resource group : $resourceGroup" } +Write-Host "" +Write-Host "To start Claude Code from your terminal:" +Write-Host "" +Write-Host " PowerShell:" -ForegroundColor Cyan +Write-Host " . $RepoRoot\claude-code.env.ps1" +Write-Host " claude" +Write-Host "" +Write-Host " Bash / WSL:" -ForegroundColor Cyan +Write-Host " source $RepoRoot/claude-code.env.sh" +Write-Host " claude" +Write-Host "" +Write-Host "Or in VS Code: install the 'Claude Code' extension" +Write-Host "(https://marketplace.visualstudio.com/items?itemName=anthropic.claude-code)" +Write-Host "— the .vscode/settings.json in this workspace already wires it up." +Write-Host "" +exit 0 diff --git a/scripts/configure-claude-code.sh b/scripts/configure-claude-code.sh new file mode 100644 index 0000000..402ef0a --- /dev/null +++ b/scripts/configure-claude-code.sh @@ -0,0 +1,273 @@ +#!/usr/bin/env bash +# Configure Claude Code CLI for Microsoft Foundry after `azd up`. +# +# Designed to be invoked from the `postprovision` hook in `azure.yaml`. +# Reads the per-family deployment outputs from `azd env get-values` and +# wires up Claude Code so the user can immediately run `claude`: +# +# 1. Writes claude-code.env.ps1 / claude-code.env.sh at the repo root with +# ANTHROPIC_DEFAULT__MODEL for each non-empty family deployment. +# 2. Writes / merges .vscode/settings.json with claudeCode.environmentVariables +# and claudeCode.disableLoginPrompt. +# 3. Detects / optionally installs the `claude` CLI. +# +# Exit codes: +# 0 Configuration written. +# 1 Required outputs not available. +# 2 azd CLI not on PATH (standalone mode). + +set -euo pipefail + +fail() { + local code="$1"; shift + printf '\nERROR: %s\n\n' "$*" >&2 + exit "$code" +} + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="${REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}" +SKIP_VSCODE_SETTINGS="${SKIP_VSCODE_SETTINGS:-0}" + +while [[ $# -gt 0 ]]; do + case "$1" in + --repo-root) REPO_ROOT="$2"; shift 2 ;; + --repo-root=*) REPO_ROOT="${1#*=}"; shift ;; + --skip-vscode-settings) SKIP_VSCODE_SETTINGS=1; shift ;; + *) fail 2 "Unknown argument: $1" ;; + esac +done +echo "Configuring Claude Code: repo root '$REPO_ROOT'" + +ACCOUNT_NAME="${FOUNDRY_ACCOUNT_NAME:-}" +RESOURCE_GROUP="${AZURE_RESOURCE_GROUP:-}" +HAIKU_DEPLOY="${CLAUDE_HAIKU_DEPLOYMENT_NAME:-}" +SONNET_DEPLOY="${CLAUDE_SONNET_DEPLOYMENT_NAME:-}" +OPUS_DEPLOY="${CLAUDE_OPUS_DEPLOYMENT_NAME:-}" +LEGACY_DEPLOY="${CLAUDE_DEPLOYMENT_NAME:-}" + +if [ -z "$ACCOUNT_NAME" ] || { [ -z "$HAIKU_DEPLOY" ] && [ -z "$SONNET_DEPLOY" ] && [ -z "$OPUS_DEPLOY" ] && [ -z "$LEGACY_DEPLOY" ]; }; then + if ! command -v azd >/dev/null 2>&1; then + fail 2 "azd CLI not on PATH and required outputs not in env. Install azd or run from an azd-aware shell." + fi + echo "Reading outputs from 'azd env get-values'..." + while IFS= read -r line; do + key="$(echo "$line" | sed -n 's/^\([A-Z0-9_]*\)=.*/\1/p')" + val="$(echo "$line" | sed -n 's/^[A-Z0-9_]*="\{0,1\}\(.*[^"]\)"\{0,1\}$/\1/p')" + case "$key" in + FOUNDRY_ACCOUNT_NAME) [ -z "$ACCOUNT_NAME" ] && ACCOUNT_NAME="$val" ;; + AZURE_RESOURCE_GROUP) [ -z "$RESOURCE_GROUP" ] && RESOURCE_GROUP="$val" ;; + CLAUDE_HAIKU_DEPLOYMENT_NAME) [ -z "$HAIKU_DEPLOY" ] && HAIKU_DEPLOY="$val" ;; + CLAUDE_SONNET_DEPLOYMENT_NAME) [ -z "$SONNET_DEPLOY" ] && SONNET_DEPLOY="$val" ;; + CLAUDE_OPUS_DEPLOYMENT_NAME) [ -z "$OPUS_DEPLOY" ] && OPUS_DEPLOY="$val" ;; + CLAUDE_DEPLOYMENT_NAME) [ -z "$LEGACY_DEPLOY" ] && LEGACY_DEPLOY="$val" ;; + esac + done < <(azd env get-values 2>/dev/null || true) +fi + +[ -z "$ACCOUNT_NAME" ] && fail 1 "FOUNDRY_ACCOUNT_NAME not available. Has 'azd provision' completed?" + +# Build parallel arrays of (family, deployment) for deployed families. +FAMILIES=() +DEPLOYMENTS=() +[ -n "$HAIKU_DEPLOY" ] && { FAMILIES+=("HAIKU"); DEPLOYMENTS+=("$HAIKU_DEPLOY"); } +[ -n "$SONNET_DEPLOY" ] && { FAMILIES+=("SONNET"); DEPLOYMENTS+=("$SONNET_DEPLOY"); } +[ -n "$OPUS_DEPLOY" ] && { FAMILIES+=("OPUS"); DEPLOYMENTS+=("$OPUS_DEPLOY"); } + +if [ "${#FAMILIES[@]}" -eq 0 ]; then + # Legacy fallback: infer family from the deployment name. + [ -z "$LEGACY_DEPLOY" ] && fail 1 "No family deployments and no legacy CLAUDE_DEPLOYMENT_NAME found. Has 'azd provision' completed?" + LOWER="$(echo "$LEGACY_DEPLOY" | tr '[:upper:]' '[:lower:]')" + case "$LOWER" in + *sonnet*) FAMILIES+=("SONNET") ;; + *haiku*) FAMILIES+=("HAIKU") ;; + *opus*) FAMILIES+=("OPUS") ;; + *) fail 1 "Could not infer Claude family from deployment name '$LEGACY_DEPLOY'." ;; + esac + DEPLOYMENTS+=("$LEGACY_DEPLOY") +fi + +echo " Foundry account : $ACCOUNT_NAME" +for i in "${!FAMILIES[@]}"; do + printf " %-15s : %s\n" "${FAMILIES[$i]}" "${DEPLOYMENTS[$i]}" +done + +# --------------------------------------------------------------------------- +# 1. Activator scripts. +# --------------------------------------------------------------------------- +PS1_PATH="$REPO_ROOT/claude-code.env.ps1" +SH_PATH="$REPO_ROOT/claude-code.env.sh" + +{ + echo "# Auto-generated by scripts/configure-claude-code.sh — safe to overwrite." + echo "# Source me with: . ./claude-code.env.ps1" + echo "# Then run: claude" + echo "" + echo "# Scope 'az login' (and azd) to this workspace only — never touches ~/.azure" + echo "# and never leaks tokens into other VS Code windows or shells." + echo "\$_claudeRoot = Split-Path -Parent \$MyInvocation.MyCommand.Path" + echo "\$env:AZURE_CONFIG_DIR = Join-Path \$_claudeRoot '.azure-cli'" + echo "if (-not (Test-Path \$env:AZURE_CONFIG_DIR)) { New-Item -ItemType Directory -Path \$env:AZURE_CONFIG_DIR -Force | Out-Null }" + echo "" + echo "\$env:CLAUDE_CODE_USE_FOUNDRY = '1'" + echo "\$env:ANTHROPIC_FOUNDRY_RESOURCE = '$ACCOUNT_NAME'" + for i in "${!FAMILIES[@]}"; do + echo "\$env:ANTHROPIC_DEFAULT_${FAMILIES[$i]}_MODEL = '${DEPLOYMENTS[$i]}'" + done + echo "" + echo "Write-Host \"Claude Code configured for Foundry resource '$ACCOUNT_NAME'.\" -ForegroundColor Green" + echo "Write-Host \"Azure CLI config scoped to: \$env:AZURE_CONFIG_DIR\" -ForegroundColor Green" + echo "Write-Host \"Authentication: Microsoft Entra ID via 'az login' (already done if 'azd up' succeeded).\" -ForegroundColor Green" +} > "$PS1_PATH" + +{ + echo "# Auto-generated by scripts/configure-claude-code.sh — safe to overwrite." + echo "# Source me with: source ./claude-code.env.sh (or: . ./claude-code.env.sh)" + echo "# Then run: claude" + echo "" + echo "# Scope 'az login' (and azd) to this workspace only — never touches ~/.azure" + echo "# and never leaks tokens into other VS Code windows or shells." + echo "_claude_root=\"\$(cd \"\$(dirname \"\${BASH_SOURCE[0]:-\$0}\")\" && pwd)\"" + echo "export AZURE_CONFIG_DIR=\"\$_claude_root/.azure-cli\"" + echo "mkdir -p \"\$AZURE_CONFIG_DIR\"" + echo "unset _claude_root" + echo "" + echo "export CLAUDE_CODE_USE_FOUNDRY=1" + echo "export ANTHROPIC_FOUNDRY_RESOURCE='$ACCOUNT_NAME'" + for i in "${!FAMILIES[@]}"; do + echo "export ANTHROPIC_DEFAULT_${FAMILIES[$i]}_MODEL='${DEPLOYMENTS[$i]}'" + done + echo "" + echo "echo \"Claude Code configured for Foundry resource '$ACCOUNT_NAME'.\"" + echo "echo \"Azure CLI config scoped to: \$AZURE_CONFIG_DIR\"" + echo "echo \"Authentication: Microsoft Entra ID via 'az login' (already done if 'azd up' succeeded).\"" +} > "$SH_PATH" + +echo "Wrote activator: $PS1_PATH" +echo "Wrote activator: $SH_PATH" + +# --------------------------------------------------------------------------- +# 2. .vscode/settings.json — use python for safe JSON merge. +# --------------------------------------------------------------------------- +PYTHON_BIN="" +for cand in python python3; do + if command -v "$cand" >/dev/null 2>&1; then PYTHON_BIN="$cand"; break; fi +done + +if [ "${SKIP_VSCODE_SETTINGS:-}" != "1" ] && [ -n "$PYTHON_BIN" ]; then + VSCODE_DIR="$REPO_ROOT/.vscode" + mkdir -p "$VSCODE_DIR" + SETTINGS_PATH="$VSCODE_DIR/settings.json" + + # Pass deployment list as a tab-separated string for python to parse. + PAIRS="" + for i in "${!FAMILIES[@]}"; do + PAIRS+="${FAMILIES[$i]}=${DEPLOYMENTS[$i]}"$'\t' + done + + ACCOUNT_NAME="$ACCOUNT_NAME" SETTINGS_PATH="$SETTINGS_PATH" PAIRS="$PAIRS" \ + "$PYTHON_BIN" - <<'PYEOF' +import json, os, sys +path = os.environ['SETTINGS_PATH'] +data = {} +if os.path.exists(path): + try: + with open(path, 'r', encoding='utf-8') as f: + text = f.read().strip() + if text: + data = json.loads(text) + if not isinstance(data, dict): + print(f"WARNING: {path} root is not an object; leaving untouched.", file=sys.stderr) + sys.exit(0) + except Exception as e: + print(f"WARNING: Could not parse {path} ({e}); leaving untouched.", file=sys.stderr) + sys.exit(0) + +env = [ + {'name': 'CLAUDE_CODE_USE_FOUNDRY', 'value': '1'}, + {'name': 'ANTHROPIC_FOUNDRY_RESOURCE', 'value': os.environ['ACCOUNT_NAME']}, +] +for pair in os.environ['PAIRS'].split('\t'): + if not pair: + continue + family, deployment = pair.split('=', 1) + env.append({'name': f'ANTHROPIC_DEFAULT_{family}_MODEL', 'value': deployment}) + +data['claudeCode.environmentVariables'] = env +data['claudeCode.disableLoginPrompt'] = True + +# Scope 'az login' (and azd) to a workspace-local config dir so it never +# touches ~/.azure and never leaks tokens into other VS Code windows. +# Applies to every terminal VS Code spawns in this workspace. +data['terminal.integrated.env.windows'] = {'AZURE_CONFIG_DIR': '${workspaceFolder}\\.azure-cli'} +data['terminal.integrated.env.linux'] = {'AZURE_CONFIG_DIR': '${workspaceFolder}/.azure-cli'} +data['terminal.integrated.env.osx'] = {'AZURE_CONFIG_DIR': '${workspaceFolder}/.azure-cli'} + +data.pop('Claude Code: Environment Variables', None) + +with open(path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2) + f.write('\n') +print(f"Wrote VS Code settings: {path}") +PYEOF +elif [ "${SKIP_VSCODE_SETTINGS:-}" != "1" ]; then + echo "WARNING: python not found on PATH; skipping .vscode/settings.json." +fi + +# --------------------------------------------------------------------------- +# 3. Detect / optionally install Claude Code CLI. +# --------------------------------------------------------------------------- +echo "" +if command -v claude >/dev/null 2>&1; then + echo "Claude Code CLI detected: $(command -v claude)" +else + echo "Claude Code CLI not found on PATH." + AUTO="${CLAUDE_CODE_AUTO_INSTALL:-}" + if [ -n "$AUTO" ] && [ "$AUTO" != "false" ] && [ "$AUTO" != "0" ]; then + echo "CLAUDE_CODE_AUTO_INSTALL is set — running the official installer..." + if curl -fsSL https://claude.ai/install.sh | bash; then + if command -v claude >/dev/null 2>&1; then + echo "Claude Code installed: $(command -v claude)" + else + echo "Install ran but 'claude' is still not on PATH. Open a new shell, or add the install dir to PATH." + fi + else + echo "WARNING: auto-install failed. Install manually." + fi + else + echo "" + echo "To install (one-time):" + echo " curl -fsSL https://claude.ai/install.sh | bash" + echo "" + echo "Or set CLAUDE_CODE_AUTO_INSTALL=true and re-run 'azd provision' to install automatically." + fi +fi + +# --------------------------------------------------------------------------- +# 4. Final next-step message. +# --------------------------------------------------------------------------- +echo "" +echo "=============================================================" +echo " Claude Code is configured for Microsoft Foundry." +echo "=============================================================" +echo "" +echo " Foundry resource : $ACCOUNT_NAME" +for i in "${!FAMILIES[@]}"; do + printf " %-15s : %s\n" "${FAMILIES[$i]} deployment" "${DEPLOYMENTS[$i]}" +done +[ -n "$RESOURCE_GROUP" ] && echo " Resource group : $RESOURCE_GROUP" +echo "" +echo "To start Claude Code from your terminal:" +echo "" +echo " Bash / WSL:" +echo " source $REPO_ROOT/claude-code.env.sh" +echo " claude" +echo "" +echo " PowerShell:" +echo " . $REPO_ROOT/claude-code.env.ps1" +echo " claude" +echo "" +echo "Or in VS Code: install the 'Claude Code' extension" +echo "(https://marketplace.visualstudio.com/items?itemName=anthropic.claude-code)" +echo "- the .vscode/settings.json in this workspace already wires it up." +echo "" +exit 0 diff --git a/scripts/preflight-claude.ps1 b/scripts/preflight-claude.ps1 new file mode 100644 index 0000000..2fc475b --- /dev/null +++ b/scripts/preflight-claude.ps1 @@ -0,0 +1,180 @@ +<# +.SYNOPSIS + Preflight check for Claude-on-Microsoft-Foundry deployments. + +.DESCRIPTION + Gates `azd up` on: + 1. Required env vars being set. + 2. (Informational) Marketplace catalog: the Anthropic offer resolves + via the Microsoft.MarketplaceOrdering REST API. A missing offer + is a hard fail (typo / unreleased SKU). The Cognitive Services + RP auto-signs the agreement during deployment on eligible subs, + so an unsigned status is informational only. + 3. **Per-region Cognitive Services quota headroom** per model. + A hard fail (exit 6) when `currentValue + requestedCapacity > + limit`. This is the most common cause of `azd up` failures and + the cause of the opaque `400 715-123420` error that Terraform's + `azapi_resource` returns. (Bicep / `az deployment group create` + surface the real `InsufficientQuota` message because they go + through ARM preflight; `azapi` bypasses it.) + + Per-family mode: set any of CLAUDE_HAIKU_MODEL / CLAUDE_SONNET_MODEL / + CLAUDE_OPUS_MODEL. Empty = skip that family. If all three are empty, + falls back to CLAUDE_MODEL_NAME (legacy single-model behavior). + + Env vars consumed: + CLAUDE_ORGANIZATION_NAME, AZURE_LOCATION, CLAUDE_HAIKU_MODEL, + CLAUDE_SONNET_MODEL, CLAUDE_OPUS_MODEL (+ matching *_CAPACITY), + CLAUDE_MODEL_NAME, CLAUDE_MODEL_CAPACITY (legacy fallback). + +.NOTES + Exit codes: + 0 Preflight passed. + 1 A required env var is missing. + 2 Azure CLI / subscription not available. + 4 Marketplace offer not found (typo in a model name, or model not + in the Anthropic-on-Foundry catalog yet). + 6 Insufficient quota (used + requested > limit). +#> + +[CmdletBinding()] +param() + +$ErrorActionPreference = "Stop" + +function Fail([int]$code, [string]$message) { + Write-Host "" + Write-Host "ERROR: $message" -ForegroundColor Red + Write-Host "" + exit $code +} + +# --- 1. Required env vars --------------------------------------------------- +if (-not $env:CLAUDE_ORGANIZATION_NAME) { + Fail 1 "CLAUDE_ORGANIZATION_NAME is required. Run: azd env set CLAUDE_ORGANIZATION_NAME 'Your Org'" +} +if (-not $env:AZURE_LOCATION) { + Fail 1 "AZURE_LOCATION is required. Run: azd env set AZURE_LOCATION swedencentral" +} + +$location = $env:AZURE_LOCATION + +# Build the list of (family, model, capacity) tuples to validate. Empty +# family vars are skipped. If none are set, fall back to legacy single-model. +$requested = @() +if ($env:CLAUDE_HAIKU_MODEL) { $requested += [pscustomobject]@{ Family='haiku'; Model=$env:CLAUDE_HAIKU_MODEL; Capacity=[int]($env:CLAUDE_HAIKU_CAPACITY | ForEach-Object { if ($_) { $_ } else { 50 } }) } } +if ($env:CLAUDE_SONNET_MODEL) { $requested += [pscustomobject]@{ Family='sonnet'; Model=$env:CLAUDE_SONNET_MODEL; Capacity=[int]($env:CLAUDE_SONNET_CAPACITY | ForEach-Object { if ($_) { $_ } else { 50 } }) } } +if ($env:CLAUDE_OPUS_MODEL) { $requested += [pscustomobject]@{ Family='opus'; Model=$env:CLAUDE_OPUS_MODEL; Capacity=[int]($env:CLAUDE_OPUS_CAPACITY | ForEach-Object { if ($_) { $_ } else { 50 } }) } } + +if ($requested.Count -eq 0) { + $legacyModel = if ($env:CLAUDE_MODEL_NAME) { $env:CLAUDE_MODEL_NAME } else { "claude-sonnet-4-6" } + $legacyCapacity = if ($env:CLAUDE_MODEL_CAPACITY) { [int]$env:CLAUDE_MODEL_CAPACITY } else { 50 } + $requested = ,([pscustomobject]@{ Family='legacy'; Model=$legacyModel; Capacity=$legacyCapacity }) +} + +# --- 2. Azure CLI / active subscription ------------------------------------ +$az = Get-Command az -ErrorAction SilentlyContinue +if (-not $az) { + Fail 2 "Azure CLI (az) not found on PATH. Install: https://learn.microsoft.com/cli/azure/install-azure-cli" +} + +$subId = (az account show --query id -o tsv 2>$null) +if (-not $subId) { + Fail 2 "No active Azure subscription. Run: az login (and 'az account set --subscription ' if needed)" +} + +$summary = ($requested | ForEach-Object { "$($_.Family)=$($_.Model)@$($_.Capacity)" }) -join ', ' +Write-Host "Preflight: subscription $subId, location $location, deployments: $summary" + +$publisher = "anthropic" + +foreach ($r in $requested) { + $modelName = $r.Model + $capacity = $r.Capacity + $family = $r.Family + + # --- Marketplace catalog check (offer exists; agreement status informational) --- + # Anthropic publishes Claude as a fetch-style plan named '-plan-new' + # ('-test-plan' is a non-purchasable stub used by some legacy tooling). + $offer = "anthropic-$modelName-offer" + $plan = "anthropic-$modelName-plan-new" + $mpUrl = "https://management.azure.com/subscriptions/$subId/providers/Microsoft.MarketplaceOrdering/offerTypes/virtualmachine/publishers/$publisher/offers/$offer/plans/$plan/agreements/current?api-version=2021-01-01" + + $mpRaw = az rest --method get --url $mpUrl 2>&1 + $mpExit = $LASTEXITCODE + + if ($mpExit -ne 0) { + $msg = ($mpRaw | Out-String).Trim() + if ($msg -match "was not found" -or $msg -match "BadRequest") { + Fail 4 @" +Marketplace offer 'anthropic/$offer/$plan' not found (family=$family). + +Likely causes: + - The model id '$modelName' is misspelled. + - The model isn't (yet) published in the Anthropic-on-Foundry catalog. + - Anthropic changed the plan naming convention. + +Available Anthropic agreements on this subscription: + az rest --method get --url 'https://management.azure.com/subscriptions/$subId/providers/Microsoft.MarketplaceOrdering/agreements?api-version=2021-01-01' --query "value[?properties.publisher=='anthropic']" + +Underlying error: +$msg +"@ + } + Write-Host "Preflight: Marketplace catalog query for '$modelName' returned an unexpected error (continuing — RP will validate at deploy time):" -ForegroundColor Yellow + Write-Host " $msg" -ForegroundColor Yellow + } else { + $mp = $mpRaw | ConvertFrom-Json + if (-not $mp.properties.accepted) { + Write-Host "Preflight: '$modelName' marketplace agreement is currently unsigned. The Cognitive Services RP will auto-sign during deployment on eligible subs." -ForegroundColor Yellow + Write-Host " If your subscription blocks RP-initiated subscribes, pre-accept manually:" -ForegroundColor Yellow + Write-Host " az term accept --publisher $publisher --product $offer --plan $plan" -ForegroundColor Yellow + } else { + Write-Host "Preflight: '$modelName' marketplace agreement signed." -ForegroundColor Green + } + } + + # --- Quota headroom (HARD FAIL on insufficient) ------------------------ + # The Cognitive Services RP returns an opaque `400 715-123420` for + # quota-rejected requests when called via azapi/Terraform. Catch it + # early with a clear message. + $sku = "AIServices.GlobalStandard.$modelName" + $limitRaw = az cognitiveservices usage list --location $location --query "[?name.value=='$sku'].limit | [0]" -o tsv 2>$null + $currentRaw = az cognitiveservices usage list --location $location --query "[?name.value=='$sku'].currentValue | [0]" -o tsv 2>$null + + if (-not [string]::IsNullOrWhiteSpace($limitRaw)) { + $limit = [int]([double]$limitRaw) + $current = if ([string]::IsNullOrWhiteSpace($currentRaw)) { 0 } else { [int]([double]$currentRaw) } + $available = $limit - $current + if ($available -lt $capacity) { + $upperFamily = $family.ToUpper() + Fail 6 @" +Insufficient quota for '$modelName' (family=$family) in '$location'. + +Requested capacity: $capacity TPM (thousands) +Available: $available TPM (limit $limit, currently used $current) + +Fix one of: + - Lower the requested capacity: + azd env set CLAUDE_$($upperFamily)_CAPACITY $available + (or CLAUDE_MODEL_CAPACITY for legacy single-model mode) + - Free up quota by deleting unused deployments: + az cognitiveservices account deployment list --name --resource-group -o table + az cognitiveservices account deployment delete --name --resource-group --deployment-name + - Request a quota increase in the Azure Foundry portal: + Foundry portal -> Management center -> Quota -> select '$sku' -> Request increase + +Note: without this preflight, Terraform (azapi_resource) would fail with an +opaque '400 715-123420' error because azapi bypasses ARM preflight +validation. Bicep and 'az deployment group create' show the real +'InsufficientQuota' message because they go through ARM preflight. +"@ + } + Write-Host "Preflight: '$modelName' quota OK ($capacity requested, $available available of $limit in $location)." -ForegroundColor Green + } else { + Write-Host "Preflight: no quota row visible for '$sku' in '$location' yet — first deploy may surface a quota error from the RP." -ForegroundColor Yellow + } +} + +Write-Host "Preflight OK." -ForegroundColor Green +exit 0 diff --git a/scripts/preflight-claude.sh b/scripts/preflight-claude.sh new file mode 100644 index 0000000..0c761b4 --- /dev/null +++ b/scripts/preflight-claude.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# Preflight check for Claude-on-Microsoft-Foundry deployments. +# +# Per-family mode: set any of CLAUDE_HAIKU_MODEL / CLAUDE_SONNET_MODEL / +# CLAUDE_OPUS_MODEL to validate that family. Empty = skip. If all three are +# empty, falls back to CLAUDE_MODEL_NAME (legacy single-model behavior). +# +# Gates `azd up` on: +# 1. Required env vars being set. +# 2. (Informational) Marketplace catalog: each requested model exists in +# the Anthropic-on-Foundry catalog (offer must resolve via +# Microsoft.MarketplaceOrdering). A typo is a hard fail. Agreement +# signed/unsigned is informational (RP auto-signs at deploy time on +# eligible subs). +# 3. **Per-region Cognitive Services quota headroom** per model. +# Hard fail (exit 6) when used + requested > limit. This catches the +# opaque `400 715-123420` error that Terraform's azapi_resource returns +# for quota-rejected requests (azapi bypasses ARM preflight; Bicep / +# `az deployment group create` show the real `InsufficientQuota`). +# +# Exit codes: +# 0 Preflight passed. +# 1 A required env var is missing. +# 2 Azure CLI / subscription not available. +# 4 Marketplace offer not found. +# 6 Insufficient quota. + +set -euo pipefail + +fail() { + local code="$1"; shift + printf '\nERROR: %s\n\n' "$*" >&2 + exit "$code" +} + +# --- 1. Required env vars --------------------------------------------------- +if [ -z "${CLAUDE_ORGANIZATION_NAME:-}" ]; then + fail 1 "CLAUDE_ORGANIZATION_NAME is required. Run: azd env set CLAUDE_ORGANIZATION_NAME 'Your Org'" +fi +if [ -z "${AZURE_LOCATION:-}" ]; then + fail 1 "AZURE_LOCATION is required. Run: azd env set AZURE_LOCATION swedencentral" +fi + +LOCATION="$AZURE_LOCATION" + +# Build the list of (family, model, capacity) tuples. +FAMILIES=() +MODELS=() +CAPACITIES=() + +if [ -n "${CLAUDE_HAIKU_MODEL:-}" ]; then + FAMILIES+=("haiku"); MODELS+=("$CLAUDE_HAIKU_MODEL"); CAPACITIES+=("${CLAUDE_HAIKU_CAPACITY:-50}") +fi +if [ -n "${CLAUDE_SONNET_MODEL:-}" ]; then + FAMILIES+=("sonnet"); MODELS+=("$CLAUDE_SONNET_MODEL"); CAPACITIES+=("${CLAUDE_SONNET_CAPACITY:-50}") +fi +if [ -n "${CLAUDE_OPUS_MODEL:-}" ]; then + FAMILIES+=("opus"); MODELS+=("$CLAUDE_OPUS_MODEL"); CAPACITIES+=("${CLAUDE_OPUS_CAPACITY:-50}") +fi + +if [ "${#FAMILIES[@]}" -eq 0 ]; then + FAMILIES+=("legacy") + MODELS+=("${CLAUDE_MODEL_NAME:-claude-sonnet-4-6}") + CAPACITIES+=("${CLAUDE_MODEL_CAPACITY:-50}") +fi + +# --- 2. Azure CLI / active subscription ------------------------------------ +if ! command -v az >/dev/null 2>&1; then + fail 2 "Azure CLI (az) not found on PATH. Install: https://learn.microsoft.com/cli/azure/install-azure-cli" +fi + +SUB_ID="$(az account show --query id -o tsv 2>/dev/null || true)" +if [ -z "$SUB_ID" ]; then + fail 2 "No active Azure subscription. Run: az login (and 'az account set --subscription ' if needed)" +fi + +SUMMARY="" +for i in "${!FAMILIES[@]}"; do + SUMMARY="${SUMMARY}${SUMMARY:+, }${FAMILIES[$i]}=${MODELS[$i]}@${CAPACITIES[$i]}" +done +echo "Preflight: subscription $SUB_ID, location $LOCATION, deployments: $SUMMARY" + +PUBLISHER="anthropic" + +for i in "${!FAMILIES[@]}"; do + FAMILY="${FAMILIES[$i]}" + MODEL_NAME="${MODELS[$i]}" + CAPACITY="${CAPACITIES[$i]}" + + # Anthropic publishes Claude as a fetch-style plan named '-plan-new'. + OFFER="anthropic-$MODEL_NAME-offer" + PLAN="anthropic-$MODEL_NAME-plan-new" + MP_URL="https://management.azure.com/subscriptions/$SUB_ID/providers/Microsoft.MarketplaceOrdering/offerTypes/virtualmachine/publishers/$PUBLISHER/offers/$OFFER/plans/$PLAN/agreements/current?api-version=2021-01-01" + + set +e + MP_RAW="$(az rest --method get --url "$MP_URL" 2>&1)" + MP_EXIT=$? + set -e + + if [ "$MP_EXIT" -ne 0 ]; then + if echo "$MP_RAW" | grep -qE "was not found|BadRequest"; then + fail 4 "Marketplace offer 'anthropic/$OFFER/$PLAN' not found (family=$FAMILY). + +Likely causes: + - The model id '$MODEL_NAME' is misspelled. + - The model isn't (yet) published in the Anthropic-on-Foundry catalog. + - Anthropic changed the plan naming convention. + +Available Anthropic agreements on this subscription: + az rest --method get --url 'https://management.azure.com/subscriptions/$SUB_ID/providers/Microsoft.MarketplaceOrdering/agreements?api-version=2021-01-01' --query \"value[?properties.publisher=='anthropic']\" + +Underlying error: +$MP_RAW" + fi + echo "Preflight: Marketplace catalog query for '$MODEL_NAME' returned an unexpected error (continuing — RP will validate at deploy time):" >&2 + echo " $MP_RAW" >&2 + else + ACCEPTED="$(echo "$MP_RAW" | python -c 'import json, sys; print(json.load(sys.stdin)["properties"]["accepted"])' 2>/dev/null || true)" + if [ "$ACCEPTED" != "True" ] && [ "$ACCEPTED" != "true" ]; then + echo "Preflight: '$MODEL_NAME' marketplace agreement is currently unsigned. The Cognitive Services RP will auto-sign during deployment on eligible subs." + echo " If your subscription blocks RP-initiated subscribes, pre-accept manually:" + echo " az term accept --publisher $PUBLISHER --product $OFFER --plan $PLAN" + else + echo "Preflight: '$MODEL_NAME' marketplace agreement signed." + fi + fi + + # --- Quota headroom (HARD FAIL on insufficient) ------------------------ + SKU="AIServices.GlobalStandard.$MODEL_NAME" + LIMIT="$(az cognitiveservices usage list --location "$LOCATION" --query "[?name.value=='$SKU'].limit | [0]" -o tsv 2>/dev/null || true)" + CURRENT="$(az cognitiveservices usage list --location "$LOCATION" --query "[?name.value=='$SKU'].currentValue | [0]" -o tsv 2>/dev/null || true)" + + if [ -n "$LIMIT" ]; then + LIMIT_INT="${LIMIT%%.*}" + CURRENT_INT="${CURRENT%%.*}" + CURRENT_INT="${CURRENT_INT:-0}" + AVAILABLE=$(( LIMIT_INT - CURRENT_INT )) + if [ "$AVAILABLE" -lt "$CAPACITY" ]; then + FAMILY_UPPER="$(echo "$FAMILY" | tr '[:lower:]' '[:upper:]')" + fail 6 "Insufficient quota for '$MODEL_NAME' (family=$FAMILY) in '$LOCATION'. + +Requested capacity: $CAPACITY TPM (thousands) +Available: $AVAILABLE TPM (limit $LIMIT_INT, currently used $CURRENT_INT) + +Fix one of: + - Lower the requested capacity: + azd env set CLAUDE_${FAMILY_UPPER}_CAPACITY $AVAILABLE + (or CLAUDE_MODEL_CAPACITY for legacy single-model mode) + - Free up quota by deleting unused deployments: + az cognitiveservices account deployment list --name --resource-group -o table + az cognitiveservices account deployment delete --name --resource-group --deployment-name + - Request a quota increase in the Azure Foundry portal: + Foundry portal -> Management center -> Quota -> select '$SKU' -> Request increase + +Note: without this preflight, Terraform (azapi_resource) would fail with an +opaque '400 715-123420' error because azapi bypasses ARM preflight +validation. Bicep / 'az deployment group create' show the real +'InsufficientQuota' message because they go through ARM preflight." + fi + echo "Preflight: '$MODEL_NAME' quota OK ($CAPACITY requested, $AVAILABLE available of $LIMIT_INT in $LOCATION)." + else + echo "Preflight: no quota row visible for '$SKU' in '$LOCATION' yet — first deploy may surface a quota error from the RP." >&2 + fi +done + +echo "Preflight OK." diff --git a/scripts/verify-claude-code.ps1 b/scripts/verify-claude-code.ps1 new file mode 100644 index 0000000..6e37430 --- /dev/null +++ b/scripts/verify-claude-code.ps1 @@ -0,0 +1,300 @@ +<# +.SYNOPSIS + End-to-end smoke test for a freshly provisioned Claude-on-Foundry deployment. + +.DESCRIPTION + Run this after `azd up` to verify in one shot that: + + 1. The post-provision activator (`claude-code.env.ps1`) exists and + exports the expected `CLAUDE_CODE_USE_FOUNDRY`, `ANTHROPIC_FOUNDRY_RESOURCE`, + and `ANTHROPIC_DEFAULT__MODEL` variables. + 2. `.vscode/settings.json` is wired up with the `claudeCode.environmentVariables` + schema the Claude Code VS Code extension reads. + 3. `az` is logged in and the current token tenant matches the tenant + that owns the Foundry resource (a mismatch is the #1 cause of 401s). + 4. The Claude Code CLI is on PATH. If not, the script prints the install + hint (or runs the official installer when `-AutoInstall` is set, the + same gate as `CLAUDE_CODE_AUTO_INSTALL` in the postprovision hook). + 5. (Default) A non-interactive `claude -p` round trip against each + deployed family. Skips this step with `-SkipClaudeCall`. + 6. (Opt-in) A `python src/hello_claude.py` round trip exercising the + Anthropic SDK + Entra ID code path. Enable with `-RunPythonSample`. + +.PARAMETER RepoRoot + Path to the repo root. Defaults to the parent of the scripts/ folder. + +.PARAMETER AutoInstall + Install the Claude Code CLI if it is missing. Equivalent to + `CLAUDE_CODE_AUTO_INSTALL=true` for the postprovision hook. + +.PARAMETER SkipClaudeCall + Skip the live `claude -p` round trip (avoids burning tokens). + +.PARAMETER RunPythonSample + After the CLI check, run `python src/hello_claude.py` from the repo root. + Requires `.env.local` populated via `azd env get-values` and a venv with + `pip install -r requirements.txt`. + +.EXAMPLE + pwsh -File scripts/verify-claude-code.ps1 + # All checks + live claude -p round trip per deployed family. + +.EXAMPLE + pwsh -File scripts/verify-claude-code.ps1 -SkipClaudeCall + # Config checks only, no token cost. + +.EXAMPLE + pwsh -File scripts/verify-claude-code.ps1 -RunPythonSample + # Adds a Python Entra ID round trip on top of the standard checks. +#> +[CmdletBinding()] +param( + [string] $RepoRoot, + [switch] $AutoInstall, + [switch] $SkipClaudeCall, + [switch] $RunPythonSample +) + +$ErrorActionPreference = 'Stop' + +# --------------------------------------------------------------------------- +# Result accumulator -> printed as a summary table at the end. +# --------------------------------------------------------------------------- +$results = [System.Collections.Generic.List[object]]::new() +function Add-Result([string]$Name, [string]$Status, [string]$Detail = '') { + $results.Add([pscustomobject]@{ + Check = $Name + Status = $Status + Detail = $Detail + }) | Out-Null + $color = switch ($Status) { + 'PASS' { 'Green' } + 'WARN' { 'Yellow' } + 'FAIL' { 'Red' } + default { 'Gray' } + } + Write-Host (" [{0,-4}] {1}{2}" -f $Status, $Name, $(if ($Detail) { " - $Detail" } else { '' })) -ForegroundColor $color +} + +# --------------------------------------------------------------------------- +# Locate the repo root. +# --------------------------------------------------------------------------- +if (-not $RepoRoot) { + $here = Split-Path -Parent $PSCommandPath + $RepoRoot = Resolve-Path (Join-Path $here '..') | Select-Object -ExpandProperty Path +} +Write-Host "" +Write-Host "Verifying Claude Code wiring under: $RepoRoot" -ForegroundColor Cyan +Write-Host "" + +# --------------------------------------------------------------------------- +# 1. Activator file exists. +# --------------------------------------------------------------------------- +$activator = Join-Path $RepoRoot 'claude-code.env.ps1' +if (-not (Test-Path $activator)) { + Add-Result 'Activator (claude-code.env.ps1)' 'FAIL' 'not found - run azd up or scripts/configure-claude-code.ps1 first' + Write-Host "" + Write-Host "Stopping: cannot verify without an activator file." -ForegroundColor Red + exit 1 +} +Add-Result 'Activator (claude-code.env.ps1)' 'PASS' $activator + +# --------------------------------------------------------------------------- +# 2. Source the activator into the current scope. +# --------------------------------------------------------------------------- +. $activator | Out-Null + +$expectedVars = @('CLAUDE_CODE_USE_FOUNDRY', 'ANTHROPIC_FOUNDRY_RESOURCE') +$familyVars = @('ANTHROPIC_DEFAULT_HAIKU_MODEL', 'ANTHROPIC_DEFAULT_SONNET_MODEL', 'ANTHROPIC_DEFAULT_OPUS_MODEL') +$deployedFamilies = @() +foreach ($v in $expectedVars) { + $val = [Environment]::GetEnvironmentVariable($v, 'Process') + if ($val) { + Add-Result "env $v" 'PASS' $val + } else { + Add-Result "env $v" 'FAIL' 'not set after sourcing activator' + } +} +foreach ($v in $familyVars) { + $val = [Environment]::GetEnvironmentVariable($v, 'Process') + if ($val) { + Add-Result "env $v" 'PASS' $val + $deployedFamilies += [pscustomobject]@{ Family = ($v -replace 'ANTHROPIC_DEFAULT_(\w+)_MODEL','$1'); Deployment = $val } + } +} +if ($deployedFamilies.Count -eq 0) { + Add-Result 'Deployed families' 'FAIL' 'no ANTHROPIC_DEFAULT__MODEL set' +} else { + Add-Result 'Deployed families' 'PASS' (($deployedFamilies | ForEach-Object { $_.Family }) -join ', ') +} + +$foundryResource = $env:ANTHROPIC_FOUNDRY_RESOURCE + +# --------------------------------------------------------------------------- +# 3. .vscode/settings.json contains the right keys. +# --------------------------------------------------------------------------- +$settingsPath = Join-Path $RepoRoot '.vscode/settings.json' +if (Test-Path $settingsPath) { + try { + $settings = Get-Content -Raw -Path $settingsPath | ConvertFrom-Json + $envArr = $settings.'claudeCode.environmentVariables' + if ($envArr) { + $names = ($envArr | ForEach-Object { $_.name }) -join ', ' + Add-Result 'VS Code settings.json' 'PASS' $names + } else { + Add-Result 'VS Code settings.json' 'WARN' "no 'claudeCode.environmentVariables' key (extension may show login prompt)" + } + } catch { + Add-Result 'VS Code settings.json' 'WARN' "could not parse: $($_.Exception.Message)" + } +} else { + Add-Result 'VS Code settings.json' 'WARN' "missing (CLI works fine; VS Code extension won't auto-configure)" +} + +# --------------------------------------------------------------------------- +# 4. az login tenant matches the Foundry resource tenant. +# --------------------------------------------------------------------------- +$azCmd = Get-Command az -ErrorAction SilentlyContinue +if (-not $azCmd) { + Add-Result 'Azure CLI (az)' 'WARN' 'not on PATH - cannot validate tenant' +} else { + try { + $acct = & az account show -o json 2>$null | ConvertFrom-Json + if (-not $acct) { + Add-Result 'az account show' 'FAIL' 'not logged in - run az login --tenant ' + } else { + Add-Result 'az account show' 'PASS' "$($acct.user.name) on '$($acct.name)' (tenant $($acct.tenantId))" + + # Best-effort: look up the resource tenant and compare. + if ($foundryResource) { + $found = $null + try { + $accountsJson = & az cognitiveservices account list -o json 2>$null + if ($accountsJson) { + $accounts = $accountsJson | ConvertFrom-Json + $found = $accounts | Where-Object { $_.name -eq $foundryResource } | Select-Object -First 1 + } + } catch { } + if ($found) { + Add-Result 'Foundry resource reachable' 'PASS' "$($found.name) (rg: $($found.resourceGroup), location: $($found.location))" + } else { + Add-Result 'Foundry resource reachable' 'WARN' "$foundryResource not visible to current az login - wrong tenant/subscription?" + } + } + } + } catch { + Add-Result 'az account show' 'FAIL' $_.Exception.Message + } +} + +# --------------------------------------------------------------------------- +# 5. Claude Code CLI on PATH (optional auto-install). +# --------------------------------------------------------------------------- +$claudeCmd = Get-Command claude -ErrorAction SilentlyContinue +if (-not $claudeCmd) { + $autoInstallEnv = $env:CLAUDE_CODE_AUTO_INSTALL -and ($env:CLAUDE_CODE_AUTO_INSTALL -ne 'false' -and $env:CLAUDE_CODE_AUTO_INSTALL -ne '0') + if ($AutoInstall -or $autoInstallEnv) { + Write-Host "" + Write-Host "Installing Claude Code CLI..." -ForegroundColor Cyan + $onWindows = ($PSVersionTable.PSEdition -eq 'Desktop') -or ($PSVersionTable.Platform -eq 'Win32NT') -or ($env:OS -eq 'Windows_NT') + try { + if ($onWindows) { + Invoke-RestMethod -Uri 'https://claude.ai/install.ps1' | Invoke-Expression + $userBin = Join-Path $env:USERPROFILE '.local\bin' + if (Test-Path (Join-Path $userBin 'claude.exe')) { + $env:PATH = "$userBin;$env:PATH" + } + } else { + & bash -c "curl -fsSL https://claude.ai/install.sh | bash" + $env:PATH = "$HOME/.local/bin:$env:PATH" + } + $claudeCmd = Get-Command claude -ErrorAction SilentlyContinue + } catch { + Add-Result 'Claude Code CLI install' 'FAIL' $_.Exception.Message + } + } +} + +if ($claudeCmd) { + $ver = (& claude --version 2>$null) -join ' ' + Add-Result 'Claude Code CLI' 'PASS' "$($claudeCmd.Source) ($ver)" +} else { + Add-Result 'Claude Code CLI' 'WARN' "not on PATH - install with 'irm https://claude.ai/install.ps1 | iex' or rerun with -AutoInstall" +} + +# --------------------------------------------------------------------------- +# 6. Live `claude -p` round trip per family (default on). +# --------------------------------------------------------------------------- +if ($claudeCmd -and -not $SkipClaudeCall) { + foreach ($d in $deployedFamilies) { + $modelArg = $d.Family.ToLower() + Write-Host "" + Write-Host " -> claude --model $modelArg -p 'say hi in 5 words'" -ForegroundColor Gray + try { + $reply = 'say hi in 5 words' | & claude --model $modelArg -p 2>&1 | Out-String + $reply = $reply.Trim() + if ($LASTEXITCODE -eq 0 -and $reply) { + $snippet = if ($reply.Length -gt 80) { $reply.Substring(0, 80) + '...' } else { $reply } + Add-Result "claude -p ($($d.Family))" 'PASS' $snippet + } else { + Add-Result "claude -p ($($d.Family))" 'FAIL' "exit $LASTEXITCODE - $reply" + } + } catch { + Add-Result "claude -p ($($d.Family))" 'FAIL' $_.Exception.Message + } + } +} elseif ($SkipClaudeCall) { + Add-Result 'claude -p round trip' 'WARN' 'skipped (-SkipClaudeCall)' +} + +# --------------------------------------------------------------------------- +# 7. Optional Python Entra ID round trip. +# --------------------------------------------------------------------------- +if ($RunPythonSample) { + $envLocal = Join-Path $RepoRoot '.env.local' + if (-not (Test-Path $envLocal)) { + Add-Result 'Python sample (hello_claude.py)' 'WARN' "no .env.local at repo root - run 'azd env get-values | Out-File -Encoding utf8 ../.env.local' first" + } else { + $py = Get-Command python -ErrorAction SilentlyContinue + if (-not $py) { + Add-Result 'Python sample (hello_claude.py)' 'WARN' 'python not on PATH (activate venv?)' + } else { + Push-Location $RepoRoot + try { + Write-Host "" + Write-Host " -> python src/hello_claude.py" -ForegroundColor Gray + $pyOut = & python src/hello_claude.py 2>&1 | Out-String + if ($LASTEXITCODE -eq 0) { + $snippet = ($pyOut.Trim() -split "`n" | Select-Object -First 1).ToString() + if ($snippet.Length -gt 80) { $snippet = $snippet.Substring(0, 80) + '...' } + Add-Result 'Python sample (hello_claude.py)' 'PASS' $snippet + } else { + Add-Result 'Python sample (hello_claude.py)' 'FAIL' "exit $LASTEXITCODE - $($pyOut.Trim())" + } + } finally { + Pop-Location + } + } + } +} + +# --------------------------------------------------------------------------- +# Summary. +# --------------------------------------------------------------------------- +Write-Host "" +Write-Host "=============================================================" -ForegroundColor Cyan +Write-Host " Verification summary" -ForegroundColor Cyan +Write-Host "=============================================================" -ForegroundColor Cyan +$results | Format-Table -AutoSize | Out-String | Write-Host + +$failures = @($results | Where-Object Status -eq 'FAIL') +$warnings = @($results | Where-Object Status -eq 'WARN') +if ($failures.Count -gt 0) { + Write-Host "$($failures.Count) check(s) FAILED. See above." -ForegroundColor Red + exit 1 +} +if ($warnings.Count -gt 0) { + Write-Host "$($warnings.Count) warning(s). Deployment is usable; review above for follow-ups." -ForegroundColor Yellow +} +Write-Host "All required checks passed." -ForegroundColor Green +exit 0 diff --git a/scripts/verify-claude-code.sh b/scripts/verify-claude-code.sh new file mode 100644 index 0000000..a7b8aab --- /dev/null +++ b/scripts/verify-claude-code.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash +# End-to-end smoke test for a freshly provisioned Claude-on-Foundry deployment. +# See verify-claude-code.ps1 for the full docstring. POSIX flavor, same checks. +# +# Usage: +# bash scripts/verify-claude-code.sh # all checks + claude -p per family +# bash scripts/verify-claude-code.sh --skip-claude-call # config checks only, no token cost +# bash scripts/verify-claude-code.sh --auto-install # install claude CLI if missing +# bash scripts/verify-claude-code.sh --run-python-sample # also run python src/hello_claude.py +# +# Exit codes: +# 0 all checks passed (warnings allowed) +# 1 one or more required checks failed +set -u + +repo_root="" +auto_install=0 +skip_claude=0 +run_python=0 + +while [[ $# -gt 0 ]]; do + case "$1" in + --repo-root) repo_root="$2"; shift 2 ;; + --auto-install) auto_install=1; shift ;; + --skip-claude-call) skip_claude=1; shift ;; + --run-python-sample) run_python=1; shift ;; + -h|--help) sed -n '2,15p' "$0"; exit 0 ;; + *) echo "Unknown flag: $1" >&2; exit 2 ;; + esac +done + +if [[ -z "$repo_root" ]]; then + here="$(cd "$(dirname "$0")" && pwd)" + repo_root="$(cd "$here/.." && pwd)" +fi + +# ANSI colors (only when stdout is a tty). +if [[ -t 1 ]]; then + C_RED=$'\033[0;31m'; C_YEL=$'\033[0;33m'; C_GRN=$'\033[0;32m'; C_CYA=$'\033[0;36m'; C_DIM=$'\033[0;90m'; C_RST=$'\033[0m' +else + C_RED=''; C_YEL=''; C_GRN=''; C_CYA=''; C_DIM=''; C_RST='' +fi + +results=() +fail_count=0 +warn_count=0 +add_result() { + local status="$1" name="$2" detail="${3:-}" + case "$status" in + PASS) color="$C_GRN" ;; + WARN) color="$C_YEL"; warn_count=$((warn_count + 1)) ;; + FAIL) color="$C_RED"; fail_count=$((fail_count + 1)) ;; + *) color="" ;; + esac + if [[ -n "$detail" ]]; then + printf " ${color}[%-4s] %s${C_RST} - %s\n" "$status" "$name" "$detail" + else + printf " ${color}[%-4s] %s${C_RST}\n" "$status" "$name" + fi + results+=("$status|$name|$detail") +} + +echo +printf "${C_CYA}Verifying Claude Code wiring under: %s${C_RST}\n" "$repo_root" +echo + +# 1. Activator file. +activator="$repo_root/claude-code.env.sh" +if [[ ! -f "$activator" ]]; then + add_result FAIL "Activator (claude-code.env.sh)" "not found - run azd up or scripts/configure-claude-code.sh first" + echo + echo "${C_RED}Stopping: cannot verify without an activator file.${C_RST}" + exit 1 +fi +add_result PASS "Activator (claude-code.env.sh)" "$activator" + +# 2. Source activator + check env vars. +# shellcheck disable=SC1090 +source "$activator" >/dev/null 2>&1 || true + +for v in CLAUDE_CODE_USE_FOUNDRY ANTHROPIC_FOUNDRY_RESOURCE; do + val="${!v:-}" + if [[ -n "$val" ]]; then + add_result PASS "env $v" "$val" + else + add_result FAIL "env $v" "not set after sourcing activator" + fi +done + +deployed_families=() +for fam in HAIKU SONNET OPUS; do + var="ANTHROPIC_DEFAULT_${fam}_MODEL" + val="${!var:-}" + if [[ -n "$val" ]]; then + add_result PASS "env $var" "$val" + deployed_families+=("$fam|$val") + fi +done +if [[ ${#deployed_families[@]} -eq 0 ]]; then + add_result FAIL "Deployed families" "no ANTHROPIC_DEFAULT__MODEL set" +else + list=$(IFS=,; echo "${deployed_families[*]%%|*}") + add_result PASS "Deployed families" "$list" +fi + +foundry_resource="${ANTHROPIC_FOUNDRY_RESOURCE:-}" + +# 3. .vscode/settings.json sanity. +settings="$repo_root/.vscode/settings.json" +if [[ -f "$settings" ]]; then + if command -v jq >/dev/null 2>&1; then + names=$(jq -r '."claudeCode.environmentVariables" // [] | map(.name) | join(", ")' "$settings" 2>/dev/null) + if [[ -n "$names" ]]; then + add_result PASS "VS Code settings.json" "$names" + else + add_result WARN "VS Code settings.json" "no 'claudeCode.environmentVariables' key (extension may show login prompt)" + fi + elif grep -q 'claudeCode.environmentVariables' "$settings" 2>/dev/null; then + add_result PASS "VS Code settings.json" "(jq not installed; grep'd claudeCode.environmentVariables key)" + else + add_result WARN "VS Code settings.json" "no 'claudeCode.environmentVariables' key" + fi +else + add_result WARN "VS Code settings.json" "missing (CLI works fine; VS Code extension won't auto-configure)" +fi + +# 4. az login. +if ! command -v az >/dev/null 2>&1; then + add_result WARN "Azure CLI (az)" "not on PATH - cannot validate tenant" +else + acct_json=$(az account show -o json 2>/dev/null || true) + if [[ -z "$acct_json" ]]; then + add_result FAIL "az account show" "not logged in - run az login --tenant " + else + user=$(echo "$acct_json" | sed -n 's/.*"name": *"\([^"]*\)".*/\1/p' | head -1) + tenant=$(echo "$acct_json" | sed -n 's/.*"tenantId": *"\([^"]*\)".*/\1/p' | head -1) + sub=$(echo "$acct_json" | sed -n 's/.*"id": *"\([^"]*\)".*/\1/p' | head -1) + add_result PASS "az account show" "user=$user tenant=$tenant sub=$sub" + + if [[ -n "$foundry_resource" ]]; then + rg=$(az cognitiveservices account list -o tsv --query "[?name=='$foundry_resource'].resourceGroup | [0]" 2>/dev/null || echo '') + loc=$(az cognitiveservices account list -o tsv --query "[?name=='$foundry_resource'].location | [0]" 2>/dev/null || echo '') + if [[ -n "$rg" ]]; then + add_result PASS "Foundry resource reachable" "$foundry_resource (rg: $rg, location: $loc)" + else + add_result WARN "Foundry resource reachable" "$foundry_resource not visible to current az login - wrong tenant/subscription?" + fi + fi + fi +fi + +# 5. Claude Code CLI on PATH. +auto_install_env="${CLAUDE_CODE_AUTO_INSTALL:-}" +auto_install_env_on=0 +if [[ -n "$auto_install_env" && "$auto_install_env" != "false" && "$auto_install_env" != "0" ]]; then + auto_install_env_on=1 +fi + +if ! command -v claude >/dev/null 2>&1; then + if [[ $auto_install -eq 1 || $auto_install_env_on -eq 1 ]]; then + echo + echo "${C_CYA}Installing Claude Code CLI...${C_RST}" + if curl -fsSL https://claude.ai/install.sh | bash; then + export PATH="$HOME/.local/bin:$PATH" + else + add_result FAIL "Claude Code CLI install" "installer exited non-zero" + fi + fi +fi + +if command -v claude >/dev/null 2>&1; then + ver=$(claude --version 2>/dev/null | head -1) + add_result PASS "Claude Code CLI" "$(command -v claude) ($ver)" + + # 6. claude -p per family. + if [[ $skip_claude -eq 0 ]]; then + for entry in "${deployed_families[@]}"; do + fam="${entry%%|*}" + model_arg=$(echo "$fam" | tr '[:upper:]' '[:lower:]') + echo + echo " ${C_DIM}-> claude --model $model_arg -p 'say hi in 5 words'${C_RST}" + reply=$(echo 'say hi in 5 words' | claude --model "$model_arg" -p 2>&1) || rc=$? || rc=$? + rc=${rc:-0} + if [[ $rc -eq 0 && -n "$reply" ]]; then + snippet="${reply:0:80}" + add_result PASS "claude -p ($fam)" "$snippet" + else + add_result FAIL "claude -p ($fam)" "exit $rc - $reply" + fi + done + else + add_result WARN "claude -p round trip" "skipped (--skip-claude-call)" + fi +else + add_result WARN "Claude Code CLI" "not on PATH - install with 'curl -fsSL https://claude.ai/install.sh | bash' or rerun with --auto-install" +fi + +# 7. Optional Python Entra ID round trip. +if [[ $run_python -eq 1 ]]; then + env_local="$repo_root/.env.local" + if [[ ! -f "$env_local" ]]; then + add_result WARN "Python sample (hello_claude.py)" "no .env.local at repo root - run 'azd env get-values > ../.env.local' first" + elif ! command -v python >/dev/null 2>&1; then + add_result WARN "Python sample (hello_claude.py)" "python not on PATH (activate venv?)" + else + echo + echo " ${C_DIM}-> python src/hello_claude.py${C_RST}" + (cd "$repo_root" && python src/hello_claude.py >/tmp/hello_claude.out 2>&1) || rc=$? || rc=$? + rc=${rc:-0} + out=$(head -1 /tmp/hello_claude.out 2>/dev/null || echo '') + if [[ $rc -eq 0 ]]; then + add_result PASS "Python sample (hello_claude.py)" "${out:0:80}" + else + add_result FAIL "Python sample (hello_claude.py)" "exit $rc - $out" + fi + fi +fi + +# Summary. +echo +echo "${C_CYA}=============================================================${C_RST}" +echo "${C_CYA} Verification summary${C_RST}" +echo "${C_CYA}=============================================================${C_RST}" +printf "%-4s %-40s %s\n" "STAT" "CHECK" "DETAIL" +printf "%-4s %-40s %s\n" "----" "----------------------------------------" "------" +for line in "${results[@]}"; do + status="${line%%|*}" + rest="${line#*|}" + name="${rest%%|*}" + detail="${rest#*|}" + printf "%-4s %-40s %s\n" "$status" "$name" "$detail" +done +echo + +if [[ $fail_count -gt 0 ]]; then + echo "${C_RED}$fail_count check(s) FAILED. See above.${C_RST}" + exit 1 +fi +if [[ $warn_count -gt 0 ]]; then + echo "${C_YEL}$warn_count warning(s). Deployment is usable; review above for follow-ups.${C_RST}" +fi +echo "${C_GRN}All required checks passed.${C_RST}" +exit 0 diff --git a/src/check_claude_quota.py b/src/check_claude_quota.py new file mode 100644 index 0000000..910f7e0 --- /dev/null +++ b/src/check_claude_quota.py @@ -0,0 +1,558 @@ +""" +Check Claude (Anthropic) quota and capacity on Microsoft Foundry. + +Uses the same Azure Resource Manager APIs documented for Azure OpenAI quota: + https://learn.microsoft.com/azure/foundry/openai/how-to/quota#programmatically-check-quota-and-capacity + +Two endpoints are queried: + +1. Usages API + GET .../locations/{location}/usages?api-version=2024-10-01 + -> per-region ledger of "limit" vs "currentValue" for each quota line. + +2. Model Capacities API + GET .../modelCapacities?api-version=2024-10-01 + &modelFormat=Anthropic&modelName={name}&modelVersion={version} + -> deployable capacity per location + SKU (GlobalStandard) for a model. + +Prereqs +------- +* Azure CLI logged in (`az login --tenant `; `az account set -s `), + or any other DefaultAzureCredential source. +* Caller has at least `Cognitive Services Usages Reader` at subscription scope + (or Reader). Without it the calls return 403. +* The subscription must be Enterprise or MCA-E for Claude to actually appear + (per the Foundry docs); on other subscription types the quota lines may be + absent rather than zero. + +Usage +----- + python src/check_claude_quota.py # current `az` sub, default regions + python src/check_claude_quota.py --subscription + python src/check_claude_quota.py --regions eastus2 swedencentral + python src/check_claude_quota.py --json # raw JSON instead of tables +""" +from __future__ import annotations + +import argparse +import json +import os +import subprocess +import sys +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass, asdict +from typing import Any + +import requests +from azure.identity import ( + AzureCliCredential, + AzureDeveloperCliCredential, + ChainedTokenCredential, + DefaultAzureCredential, +) + +ARM = "https://management.azure.com" +API_VERSION = "2024-10-01" + +# Regions where Claude in Foundry is offered (per the Foundry Claude doc). +DEFAULT_REGIONS = ["eastus2", "swedencentral"] + +# Known Claude model IDs in Foundry. Versions are auto-discovered at runtime +# (the Model Capacities API requires `modelVersion`, and Anthropic versions are +# a mix of `1` and date strings like `20251001`). +CLAUDE_MODELS = [ + "claude-mythos-preview", + "claude-opus-4-7", + "claude-opus-4-6", + "claude-opus-4-5", + "claude-opus-4-1", + "claude-sonnet-4-6", + "claude-sonnet-4-5", + "claude-haiku-4-5", +] + +# Regions used for auto-discovery of (model, version) pairs. Claude is currently +# only listed under these locations; discovery in one is sufficient. +DISCOVERY_REGIONS = ["eastus2", "swedencentral"] + +MODEL_FORMAT = "Anthropic" + +# RPM is NOT a separate quota line for Claude in the Usages API. The Foundry +# Claude docs publish fixed RPM:TPM ratios per model; we derive RPM from the +# TPM limit using these ratios. Source: +# https://learn.microsoft.com/azure/foundry/foundry-models/how-to/use-foundry-models-claude#api-quotas-and-limits +# Format: RPM per 1,000 TPM (i.e. multiply by the "thousands" units the API +# returns to get RPM). +RPM_PER_KTPM: dict[str, float] = { + "claude-opus-4-7": 1.0, # 2,000 RPM / 2,000 k-TPM + "claude-opus-4-6": 1.0, + "claude-opus-4-5": 1.0, + "claude-opus-4-1": 1.0, + "claude-sonnet-4-6": 1.0, + "claude-sonnet-4-5": 2.0, # 4,000 RPM / 2,000 k-TPM + "claude-haiku-4-5": 1.0, # 4,000 RPM / 4,000 k-TPM + "claude-mythos-preview": 1.0, +} + + +@dataclass +class QuotaLine: + region: str + name: str + localized: str + current: float + limit: float + model: str | None = None # parsed Claude model id, when identifiable + + @property + def pct(self) -> float: + return (self.current / self.limit * 100.0) if self.limit else 0.0 + + @property + def rpm_limit(self) -> float | None: + """Derived RPM limit from the published RPM:TPM ratio for this model.""" + if not self.model: + return None + ratio = RPM_PER_KTPM.get(self.model) + if ratio is None: + return None + return self.limit * ratio # limit is in thousands-of-TPM + + @property + def rpm_used(self) -> float | None: + if not self.model: + return None + ratio = RPM_PER_KTPM.get(self.model) + if ratio is None: + return None + return self.current * ratio + + +@dataclass +class CapacityLine: + model: str + region: str + sku: str + available: float + available_finetune: float | None + + +def _get_current_subscription() -> str: + """Resolve a subscription id from AZURE_SUBSCRIPTION_ID or `az account show`.""" + env = os.environ.get("AZURE_SUBSCRIPTION_ID") + if env: + return env.strip() + try: + out = subprocess.check_output( + ["az", "account", "show", "--query", "id", "-o", "tsv"], + stderr=subprocess.STDOUT, + text=True, + ) + return out.strip() + except (subprocess.CalledProcessError, FileNotFoundError) as ex: + raise SystemExit( + "Could not resolve a subscription id. Either:\n" + " * set $env:AZURE_SUBSCRIPTION_ID, or\n" + " * pass --subscription , or\n" + " * install Azure CLI and run `az login` so `az account show` works.\n" + f"Underlying error: {ex}" + ) + + +def _bearer_token(tenant_id: str | None = None) -> str: + """ + Build a token-getter. When ``tenant_id`` is supplied we try AzureCliCredential + and AzureDeveloperCliCredential against that tenant explicitly (DefaultAzureCredential + doesn't accept a flat ``tenant_id`` argument). Otherwise fall back to the full + DefaultAzureCredential chain. + """ + if tenant_id: + cred = ChainedTokenCredential( + AzureCliCredential(tenant_id=tenant_id), + AzureDeveloperCliCredential(tenant_id=tenant_id), + ) + else: + cred = DefaultAzureCredential(exclude_interactive_browser_credential=False) + return cred.get_token(f"{ARM}/.default").token + + +def _is_claude(item: dict[str, Any]) -> bool: + """ + A usage line is Claude-related if its quota name references Anthropic or a + known Claude model. The name format is `{Provider}.{DeploymentType}.{Model}`. + """ + name = (item.get("name", {}) or {}).get("value", "") or "" + localized = (item.get("name", {}) or {}).get("localizedValue", "") or "" + haystack = f"{name} {localized}".lower() + if "anthropic" in haystack or "claude" in haystack: + return True + return any(m in haystack for m in CLAUDE_MODELS) + + +def fetch_usages(subscription_id: str, region: str, token: str) -> list[QuotaLine]: + url = ( + f"{ARM}/subscriptions/{subscription_id}" + f"/providers/Microsoft.CognitiveServices/locations/{region}/usages" + f"?api-version={API_VERSION}" + ) + r = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=30) + if r.status_code == 403: + raise PermissionError( + f"403 from Usages API in {region}. Grant the caller " + f"'Cognitive Services Usages Reader' at the subscription scope." + ) + r.raise_for_status() + payload = r.json() + lines: list[QuotaLine] = [] + for item in payload.get("value", []): + if not _is_claude(item): + continue + name = (item.get("name") or {}).get("value", "") + # Quota name format: AIServices.GlobalStandard. + model_id = name.split(".")[-1] if name else None + lines.append( + QuotaLine( + region=region, + name=name, + localized=(item.get("name") or {}).get("localizedValue", ""), + current=float(item.get("currentValue", 0) or 0), + limit=float(item.get("limit", 0) or 0), + model=model_id, + ) + ) + return lines + + +def fetch_capacity( + subscription_id: str, model: str, version: str, token: str +) -> list[CapacityLine]: + url = ( + f"{ARM}/subscriptions/{subscription_id}" + f"/providers/Microsoft.CognitiveServices/modelCapacities" + f"?api-version={API_VERSION}" + f"&modelFormat={MODEL_FORMAT}" + f"&modelName={model}" + f"&modelVersion={version}" + ) + r = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=30) + # 404 / empty value means the model isn't surfaced in this subscription + # (often a subscription-type or onboarding issue for Claude). Treat as empty. + if r.status_code == 404: + return [] + if r.status_code == 403: + raise PermissionError( + "403 from Model Capacities API. Grant the caller 'Cognitive Services " + "Usages Reader' (or Reader) at the subscription scope." + ) + r.raise_for_status() + payload = r.json() + out: list[CapacityLine] = [] + for item in payload.get("value", []): + props = item.get("properties", {}) or {} + out.append( + CapacityLine( + model=f"{model}:{version}", + region=item.get("location", ""), + sku=props.get("skuName", ""), + available=float(props.get("availableCapacity", 0) or 0), + available_finetune=( + float(props["availableFinetuneCapacity"]) + if props.get("availableFinetuneCapacity") is not None + else None + ), + ) + ) + return out + + +def discover_claude_versions( + subscription_id: str, token: str, regions: list[str] = DISCOVERY_REGIONS +) -> dict[str, set[str]]: + """Return {model_name -> {versions...}} for every Claude model visible to the sub.""" + found: dict[str, set[str]] = {} + for region in regions: + url = ( + f"{ARM}/subscriptions/{subscription_id}" + f"/providers/Microsoft.CognitiveServices/locations/{region}/models" + f"?api-version={API_VERSION}" + ) + r = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=30) + if r.status_code >= 400: + continue + for item in (r.json().get("value") or []): + m = item.get("model") or {} + if m.get("format") != MODEL_FORMAT: + continue + name = m.get("name") + version = m.get("version") + if not name or not version: + continue + found.setdefault(name, set()).add(version) + return found + + +def _print_merged(usages: list[QuotaLine], capacities: list[CapacityLine]) -> None: + """Single unified table keyed on (model, region). + + Combines: + * Usages API -> TPM used / TPM limit / derived RPM limit + * Capacity API -> deployable capacity (model:version : available) + * Doc defaults -> public Default RPM/TPM (0/0; Claude is EA/MCA-E gated) + + Outer-join: capacity may exist in regions where no quota has been + consumed yet, and vice versa. + """ + # Index by (model, region) + u_idx: dict[tuple[str, str], QuotaLine] = { + (u.model, u.region): u for u in usages if u.model + } + # Group capacity by (model_base, region) -> list of "version:available" + c_idx: dict[tuple[str, str], list[CapacityLine]] = {} + for c in capacities: + base = c.model.split(":", 1)[0] + c_idx.setdefault((base, c.region), []).append(c) + + keys = sorted(set(u_idx) | set(c_idx)) + if not keys: + print(" (no Claude quota or capacity visible)") + return + + headers = ( + "Model", "Region", "SKU", + "Def RPM", "Def TPM", + "TPM Used", "TPM Limit", "TPM %", + "RPM Limit*", "Capacity", "Version", + ) + data: list[tuple[str, ...]] = [] + for model, region in keys: + u = u_idx.get((model, region)) + caps = c_idx.get((model, region), []) + cap = caps[0] if caps else None + if u: + tpm_used = f"{u.current * 1000:,.0f}" + tpm_lim = f"{u.limit * 1000:,.0f}" + tpm_pct = f"{u.pct:.1f}%" + rpm_lim = f"{u.rpm_limit * 1000:,.0f}" if u.rpm_limit is not None else "-" + else: + tpm_used = tpm_lim = tpm_pct = rpm_lim = "-" + sku = cap.sku if cap else "GlobalStandard" + cap_avail = f"{cap.available:,.0f}" if cap else "-" + version = cap.model.split(":", 1)[1] if cap and ":" in cap.model else "-" + data.append(( + model, region, sku, + "0", "0", + tpm_used, tpm_lim, tpm_pct, + rpm_lim, cap_avail, version, + )) + + widths = [max(len(h), max(len(r[i]) for r in data)) for i, h in enumerate(headers)] + # Right-align numeric columns; left-align identifiers. + right_align = {3, 4, 5, 6, 7, 8, 9} + align_specs = [">" if i in right_align else "<" for i in range(len(headers))] + fmt = " " + " ".join(f"{{:{a}{w}}}" for a, w in zip(align_specs, widths)) + print(fmt.format(*headers)) + print(" " + " ".join("-" * w for w in widths)) + for r in data: + print(fmt.format(*r)) + print( + "\n * RPM Limit is DERIVED from the per-model RPM:TPM ratios published in the\n" + " Foundry Claude docs; it is not a separate quota line in the Usages API.\n" + " Def RPM/TPM (Default) = public non-EA defaults; Claude is gated to EA/MCA-E." + ) + + +def _print_usages(lines: list[QuotaLine]) -> None: + if not lines: + print(" (no Claude/Anthropic quota lines visible in this subscription)") + return + width_region = max(len(l.region) for l in lines) + width_name = max(len(l.localized or l.name) for l in lines) + print( + f" {'REGION'.ljust(width_region)} " + f"{'QUOTA'.ljust(width_name)} " + f"{'TPM USED':>14} {'TPM LIMIT':>14} {'TPM %':>6} " + f"{'RPM USED*':>10} {'RPM LIMIT*':>11}" + ) + for l in sorted(lines, key=lambda x: (x.region, x.localized or x.name)): + label = l.localized or l.name + rpm_u = f"{l.rpm_used:>10,.0f}" if l.rpm_used is not None else " -" + rpm_l = f"{l.rpm_limit:>11,.0f}" if l.rpm_limit is not None else " -" + print( + f" {l.region.ljust(width_region)} " + f"{label.ljust(width_name)} " + f"{l.current:>14,.0f} {l.limit:>14,.0f} {l.pct:>5.1f}% " + f"{rpm_u} {rpm_l}" + ) + print( + " * RPM columns are DERIVED from the documented per-model RPM:TPM ratios\n" + " (Foundry Claude docs); they are not separate quota lines in the API." + ) + + +def _print_doc_style(lines: list[QuotaLine]) -> None: + """Print one row per (model, region) in the Foundry-doc table format. + + Columns: Model | Region | Deployment type | Default RPM | Default TPM | + EA/MCA-E RPM | EA/MCA-E TPM + + "Default" = the public doc default for non-EA subs (0/0, since Claude is + gated to Enterprise + MCA-E). "EA/MCA-E" columns are the actual values + pulled from this subscription's Usages API (TPM) and derived RPM. + """ + rows = [l for l in lines if l.model] + if not rows: + print(" (no Claude quota visible)") + return + rows.sort(key=lambda x: (x.model or "", x.region)) + headers = ( + "Model", "Region", "Deployment type", + "Default RPM", "Default TPM", + "EA/MCA-E RPM", "EA/MCA-E TPM", + ) + data = [] + for l in rows: + tpm_limit = l.limit * 1000 # API reports in thousands + rpm_limit = l.rpm_limit * 1000 if l.rpm_limit is not None else None + rpm_str = f"{rpm_limit:,.0f}" if rpm_limit is not None else "-" + data.append(( + l.model or "", + l.region, + "Global Standard", + "0", "0", + rpm_str, + f"{tpm_limit:,.0f}", + )) + widths = [max(len(h), max(len(r[i]) for r in data)) for i, h in enumerate(headers)] + fmt = " " + " ".join(f"{{:<{w}}}" for w in widths) + print(fmt.format(*headers)) + print(" " + " ".join("-" * w for w in widths)) + for r in data: + print(fmt.format(*r)) + + +def _print_capacities(lines: list[CapacityLine], regions_filter: list[str]) -> None: + lines = [l for l in lines if not regions_filter or l.region in regions_filter] + if not lines: + print(" (no Claude model capacity visible in the requested regions)") + return + width_model = max(len(l.model) for l in lines) + width_region = max(len(l.region) for l in lines) + width_sku = max(len(l.sku) for l in lines) + print( + f" {'MODEL'.ljust(width_model)} " + f"{'REGION'.ljust(width_region)} " + f"{'SKU'.ljust(width_sku)} {'AVAILABLE':>12}" + ) + for l in sorted(lines, key=lambda x: (x.model, x.region, x.sku)): + print( + f" {l.model.ljust(width_model)} " + f"{l.region.ljust(width_region)} " + f"{l.sku.ljust(width_sku)} {l.available:>12,.0f}" + ) + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument("--subscription", help="Azure subscription id (default: current az subscription)") + p.add_argument("--tenant", help="Azure tenant id to authenticate against (default: caller's home tenant)") + p.add_argument( + "--regions", + nargs="+", + default=DEFAULT_REGIONS, + help=f"Regions to query for usages (default: {' '.join(DEFAULT_REGIONS)})", + ) + p.add_argument( + "--models", + nargs="+", + default=CLAUDE_MODELS, + help="Claude model names to query for capacity", + ) + p.add_argument("--json", action="store_true", help="Emit raw JSON instead of tables") + args = p.parse_args() + + sub = args.subscription or _get_current_subscription() + token = _bearer_token(args.tenant) + + # --- 1. Usages (per region) ------------------------------------------------- + usages: list[QuotaLine] = [] + usage_errors: dict[str, str] = {} + with ThreadPoolExecutor(max_workers=min(8, len(args.regions))) as ex: + futures = {ex.submit(fetch_usages, sub, r, token): r for r in args.regions} + for fut in as_completed(futures): + region = futures[fut] + try: + usages.extend(fut.result()) + except Exception as e: # noqa: BLE001 + usage_errors[region] = str(e) + + # --- 2. Model capacities (per (model, version), all regions) -------------- + # Discover (name, version) pairs from the locations/{region}/models endpoint + # so we don't have to hard-code Anthropic's versioning scheme. + discovered = discover_claude_versions(sub, token) + pairs: list[tuple[str, str]] = sorted( + ((name, ver) for name, vers in discovered.items() for ver in vers), + key=lambda x: (x[0], x[1]), + ) + # Constrain to models the user asked about (if they passed --models). + if args.models != CLAUDE_MODELS: + pairs = [(n, v) for (n, v) in pairs if n in args.models] + + capacities: list[CapacityLine] = [] + cap_errors: dict[str, str] = {} + if pairs: + with ThreadPoolExecutor(max_workers=min(8, len(pairs))) as ex: + futures = {ex.submit(fetch_capacity, sub, n, v, token): (n, v) for (n, v) in pairs} + for fut in as_completed(futures): + key = futures[fut] + try: + capacities.extend(fut.result()) + except Exception as e: # noqa: BLE001 + cap_errors[f"{key[0]}:{key[1]}"] = str(e) + + if args.json: + print( + json.dumps( + { + "subscription": sub, + "regions": args.regions, + "usages": [ + asdict(u) | { + "pct": u.pct, + "rpm_used": u.rpm_used, + "rpm_limit": u.rpm_limit, + } + for u in usages + ], + "usage_errors": usage_errors, + "capacities": [asdict(c) for c in capacities], + "capacity_errors": cap_errors, + }, + indent=2, + ) + ) + return 0 + + print(f"Subscription: {sub}") + print(f"Regions queried: {', '.join(args.regions)}") + + print("\n=== Claude quota + capacity (merged) ===") + _print_merged(usages, capacities) + for region, err in usage_errors.items(): + print(f" ! usages {region}: {err}", file=sys.stderr) + for model, err in cap_errors.items(): + print(f" ! capacity {model}: {err}", file=sys.stderr) + + if not usages and not capacities: + print( + "\nNothing returned. Common reasons:" + "\n * The subscription is not Enterprise / MCA-E (Claude in Foundry is gated)." + "\n * The caller lacks 'Cognitive Services Usages Reader' at sub scope." + "\n * No Foundry resource has ever been created in the queried regions.", + file=sys.stderr, + ) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/hello_claude_apikey.py b/src/hello_claude_apikey.py index 0e23f93..bcd0ceb 100644 --- a/src/hello_claude_apikey.py +++ b/src/hello_claude_apikey.py @@ -1,7 +1,8 @@ """Quick API-key test against the deployed Claude model. -Useful when the deployer lacks the 'Azure AI User' data-plane role for Entra ID -auth. For production, prefer the Entra ID flow in src/hello_claude.py. +Useful when the deployer lacks the 'Foundry User' (formerly 'Azure AI User') +data-plane role for Entra ID auth. For production, prefer the Entra ID flow +in src/hello_claude.py. Note: this uses the plain `Anthropic` client. For API-key auth, the Foundry endpoint accepts the standard `x-api-key` header, so nothing Foundry-specific