diff --git a/crates/jcode-app-core/src/ambient.rs b/crates/jcode-app-core/src/ambient.rs index 20cbba8ff..ba7fc010d 100644 --- a/crates/jcode-app-core/src/ambient.rs +++ b/crates/jcode-app-core/src/ambient.rs @@ -12,7 +12,8 @@ pub mod runner; pub mod scheduler; pub use directives::{ - UserDirective, add_directive, has_pending_directives, load_directives, take_pending_directives, + UserDirective, add_directive, has_pending_directives, load_directives, + record_session_end_directive, take_pending_directives, }; pub use manager::AmbientManager; pub use persistence::{AmbientLock, ScheduledQueue}; diff --git a/crates/jcode-app-core/src/ambient/directives.rs b/crates/jcode-app-core/src/ambient/directives.rs index b90dd6704..00bc42202 100644 --- a/crates/jcode-app-core/src/ambient/directives.rs +++ b/crates/jcode-app-core/src/ambient/directives.rs @@ -54,6 +54,32 @@ pub fn add_directive(text: String, in_reply_to: String) -> Result<()> { save_directives(&directives) } +/// Record an auditable session-end marker for the ambient runner. +/// +/// See `docs/SESSION_END_LEARNINGS_RULES.md` (Rule 8): when a session ends via +/// `/exit`/`/quit` the session-end learnings capture runs as an ambient task, +/// and this leaves a data-only trail in `~/.jcode/ambient/directives.json` so +/// the ambient runner has an auditable record and can pick up any follow-up. +/// +/// The directive text is data only; it is never executed as an instruction. +/// Failures are intentionally swallowed by callers: this is best-effort +/// bookkeeping that must never block or fail session teardown. +pub fn record_session_end_directive(session_id: &str) -> Result<()> { + let (text, reply_to) = session_end_directive_fields(session_id); + add_directive(text, reply_to) +} + +/// Build the (text, in_reply_to) pair for a session-end directive. +/// +/// Pure helper so the format can be unit-tested without touching the +/// `~/.jcode/ambient/directives.json` store. +fn session_end_directive_fields(session_id: &str) -> (String, String) { + ( + format!("session-end capture ran for session {session_id}"), + format!("session_end:{session_id}"), + ) +} + /// Take all unconsumed directives, marking them as consumed. pub fn take_pending_directives() -> Vec { let mut all = load_directives(); @@ -74,3 +100,15 @@ pub fn take_pending_directives() -> Vec { pub fn has_pending_directives() -> bool { load_directives().iter().any(|d| !d.consumed) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn session_end_directive_fields_format() { + let (text, reply_to) = session_end_directive_fields("sess-123"); + assert_eq!(text, "session-end capture ran for session sess-123"); + assert_eq!(reply_to, "session_end:sess-123"); + } +} diff --git a/crates/jcode-base/src/prompt/system_prompt.md b/crates/jcode-base/src/prompt/system_prompt.md index fdef6fd92..9ed4dc98d 100644 --- a/crates/jcode-base/src/prompt/system_prompt.md +++ b/crates/jcode-base/src/prompt/system_prompt.md @@ -37,6 +37,15 @@ When adding a new feature, think about how to best structure what you are about Commit as you go by default, unless asked otherwise. Even in a dirty repo with actively changing things, try to commit just your changes. Avoid doing irreversibly destructive actions. +## Task artifacts (write files at the end of major tasks) + +At the end of any major task (multi-step work, research, a feature, a debugging session, or anything spanning many tool calls), write durable artifact files to disk, just like writing memex/memory files. Treat the filesystem as persistent memory so the next session (yours or another agent's) starts with context instead of cold. + +- Write a short markdown artifact capturing: what the task was, what you did, key decisions and rationale, files touched, how you verified it, and any follow-ups or known gaps. +- Default location: `docs/` for shareable references that belong with the repo (e.g. `docs/.md`), or scratch planning files (`task_plan.md`, `findings.md`, `progress.md`) in the working directory for in-progress working memory. Keep scratch planning files out of commits unless the user asks otherwise (use `.git/info/exclude`). +- Record only durable, useful-weeks-later content: decisions, outcomes, learnings, gotchas. Do NOT dump line-by-line diffs, commit hashes, or transient build noise; that lives in git history. +- This complements, not replaces, the automatic session-end memory capture. See `docs/SESSION_END_LEARNINGS_RULES.md` for the full ruleset (categories, dedup, trust, cost bounds). + ## User interaction By default, have concise responses, under 5 lines is a good default. diff --git a/crates/jcode-base/src/sidecar.rs b/crates/jcode-base/src/sidecar.rs index 8ad4e1146..7c2571039 100644 --- a/crates/jcode-base/src/sidecar.rs +++ b/crates/jcode-base/src/sidecar.rs @@ -14,7 +14,11 @@ use serde::{Deserialize, Serialize}; /// Fast/cheap OpenAI model used when Codex credentials are available. pub const SIDECAR_OPENAI_MODEL: &str = "gpt-5.3-codex-spark"; -const SIDECAR_OPENAI_OAUTH_FALLBACK_MODEL: &str = "gpt-5.4"; +/// Pareto-optimal sidecar fallback: `gpt-5.4-mini` (live in the Codex catalog as +/// of 2026-05) keeps the full 272k context window of `gpt-5.4` while being +/// cheaper and faster, which suits the sidecar's high-frequency +/// relevance/extraction workload. See docs/model-role-assignment.md. +const SIDECAR_OPENAI_OAUTH_FALLBACK_MODEL: &str = "gpt-5.4-mini"; const SIDECAR_OPENAI_OAUTH_FALLBACK_REASONING: &str = "low"; /// Fast/cheap Claude model used when only Claude credentials are available. diff --git a/crates/jcode-provider-core/src/models.rs b/crates/jcode-provider-core/src/models.rs index ece20b3a1..e21ec40cb 100644 --- a/crates/jcode-provider-core/src/models.rs +++ b/crates/jcode-provider-core/src/models.rs @@ -17,6 +17,7 @@ pub const ALL_OPENAI_MODELS: &[&str] = &[ "gpt-5.5", "gpt-5.4", "gpt-5.4-pro", + "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2-chat-latest", diff --git a/crates/jcode-tui/src/tui/app/conversation_state.rs b/crates/jcode-tui/src/tui/app/conversation_state.rs index 523a41a44..1494639aa 100644 --- a/crates/jcode-tui/src/tui/app/conversation_state.rs +++ b/crates/jcode-tui/src/tui/app/conversation_state.rs @@ -446,6 +446,13 @@ impl App { self.session.id.clone(), self.session.working_dir.clone(), ); + + // Rule 8 (docs/SESSION_END_LEARNINGS_RULES.md): when the ambient + // subsystem is active, leave an auditable data-only marker that the + // session-end capture ran. Best-effort; never blocks or fails teardown. + if crate::config::config().ambient.enabled { + let _ = crate::ambient::record_session_end_directive(&self.session.id); + } } pub(super) fn memory_prompt_signature(prompt: &str) -> String { diff --git a/docs/SESSION_END_LEARNINGS_RULES.md b/docs/SESSION_END_LEARNINGS_RULES.md new file mode 100644 index 000000000..14d2c091f --- /dev/null +++ b/docs/SESSION_END_LEARNINGS_RULES.md @@ -0,0 +1,150 @@ +# Session-End Learnings Capture Rules + +| | | +|---|---| +| Status | Enforced ruleset (ambient on-exit task) | +| Scope | jcode TUI/CLI agent | +| Trigger | `/exit` or `/quit` (session teardown), fired as an ambient task | +| Pipeline | `trigger_save_memory_extraction` -> `trigger_final_extraction_with_dir` -> `run_final_extraction` -> `Sidecar::extract_memories_with_existing` -> `MemoryManager::remember_project` | +| Storage | Project memory (`MemoryEntry`) + ambient on-exit directive (`~/.jcode/ambient/directives.json`) | + +## Purpose + +Every session must end by recording what was learned and what was done, so that +future sessions start with durable context instead of cold. This is a free, +local, OAuth-only operation that runs as an ambient task when the user exits. +These rules formalize *what* gets captured and *how* it is enforced; they sit on +top of the existing final-extraction pipeline rather than replacing it. + +## When the task fires + +The session-end capture fires on session teardown, which `/exit` and `/quit` +trigger. It is a no-op (silently skipped) when any of the following hold, to +keep exit fast and avoid noise: + +- The session is remote (`is_remote`). +- Memory is disabled (`memory_enabled == false`). +- The transcript has fewer than 4 provider messages (nothing substantive + happened). + +Otherwise the transcript is materialized and handed to the final extractor, +which runs asynchronously so exit never blocks on the model call. + +## The Rules + +### Rule 1: Capture is mandatory at session end +Every non-trivial session (>= 4 provider messages, memory enabled, local) MUST +run the session-end capture on `/exit`/`/quit`. The capture is fire-and-forget: +it must never block or delay the user's exit, and a failure must never prevent +exit. Failures are logged via `memory_log`, not surfaced as errors. + +### Rule 2: Record durable learnings, not transient noise +Capture only what a developer would benefit from recalling weeks later. Use +exactly one category per item: + +- `fact` - objective technical info about the codebase, architecture, patterns, + dependencies, tools, environment. +- `preference` - what the USER wants or how they like things (workflow, UX, + coding style, how the assistant should behave). +- `correction` - a mistake corrected, bug found and fixed, wrong assumption, or + something the user explicitly corrected. +- `entity` - named people, projects, services, repos, teams worth tracking. + +Categorization MUST follow: +- User wants / likes => `preference` (never `fact`). +- Bug fix / mistake => `correction` (never `fact`). +- `fact` is reserved for objective system information, never user behavior. + +### Rule 3: Never record ephemera +Do NOT capture: +- Transient debugging details, compile errors, intermediate build steps. +- Commit hashes, git operations, or "changes were committed/pushed" notes. +- Line-by-line code edits ("X changed to Y in file Z") - that belongs in git + history, not memory. +- Self-evident project context already in the system prompt (project name, repo + URL, language). +- Redundant variations of already-known memories (check the "Already known" + list before emitting). + +### Rule 4: Deduplicate against existing memory +Before storing, the extractor MUST be given the current active project memories +and MUST NOT re-emit them or close paraphrases. The existing-memory list is +capped (80 entries, 150 chars each) to bound cost; dedup is best-effort but +required. + +### Rule 5: Attribute trust honestly +Each item carries a trust level: +- `high` - the user stated it explicitly. +- `medium` - observed from the assistant's own actions/results. +- `low` - inferred. +Trust MUST reflect the actual evidence, not optimism. + +### Rule 6: Record work, not just facts +The session-end capture covers both *learnings* (Rules 2-5) and *work done*. For +work, record only durable outcomes worth recalling (e.g. "added gpt-5.4-mini to +the OpenAI catalog and switched the sidecar OAuth fallback to it"), not the +mechanical diff. Mechanical change detail lives in git history (Rule 3). + +### Rule 7: Bound cost and stay free +The capture uses the sidecar (cheap/fast OAuth model; OpenAI fallback is +`gpt-5.4-mini`). It MUST stay within the sidecar's existing context caps +(extraction context: <= 40 messages / 24k chars) and MUST NOT spend API-key +budget unless the user has explicitly enabled API keys for ambient work. + +### Rule 8: Persist an auditable on-exit directive +When the ambient subsystem is active, the session-end task SHOULD append a +machine-readable directive to `~/.jcode/ambient/directives.json` recording that +the session ended and that capture ran, so the ambient runner has an auditable +trail and can pick up any follow-up. The directive is data only and is never +treated as executable instructions. + +### Rule 9: Write task artifact files at the end of major tasks +Every jcode agent (primary, swarm subagents, ambient, server) MUST write durable +artifact files to disk at the end of any major task, the same way memex/memory +files are written. A "major task" is multi-step work, research, a feature, a +debugging session, or anything spanning many tool calls. + +- Write a short markdown artifact capturing: what the task was, what was done, + key decisions + rationale, files touched, how it was verified, and any + follow-ups or known gaps. +- Location: `docs/.md` for shareable references that belong with the repo; + or scratch planning files (`task_plan.md`, `findings.md`, `progress.md`) in the + working directory for in-progress working memory. Keep scratch planning files + out of commits unless the user asks otherwise (use `.git/info/exclude`). +- Content discipline mirrors Rules 2-3 and 6: durable, useful-weeks-later content + only. No line-by-line diffs, commit hashes, or transient build noise (that + lives in git history). +- This complements the automatic session-end memory capture (Rules 1-8); it does + not replace it. The agent writes the human-readable artifact; the pipeline + writes the structured memory. +- Enforced via the agent system prompt (`crates/jcode-base/src/prompt/system_prompt.md`, + "Task artifacts" section), which every agent surface embeds through + `build_system_prompt_split`. + +## Enforcement model + +These rules are enforced at four layers: + +1. **Pipeline** - the existing `run_final_extraction` already runs on teardown + across TUI, server, comm, and desktop disconnect paths. The rules document + its contract so it is not silently weakened by future edits. +2. **Extraction prompt** - Rules 2-5 mirror the sidecar extraction system prompt + in `crates/jcode-base/src/sidecar.rs`. Any change to that prompt must keep + these guarantees. +3. **Ambient directive seed** - Rule 8 wires an on-exit directive so the ambient + task is explicit and auditable rather than implicit. +4. **Agent system prompt** - Rule 9 is instructed to every agent via the + "Task artifacts" section of `crates/jcode-base/src/prompt/system_prompt.md`, + which all agent surfaces embed through `build_system_prompt_split`. + +## Verification + +A change to the capture path is correct only if: +- `/exit` on a >= 4-message local session triggers `trigger_final_extraction*` + (logged via `memory_log::log_final_extraction`). +- The extractor receives the existing-memory list (dedup, Rule 4). +- New memories are stored via `manager.remember_project` with a category, trust, + and `with_source(session_id)`. +- Exit latency is unchanged (capture is async, Rule 1). +- Memory/ambient unit tests pass (`cargo test -p jcode-base memory`, + `cargo test -p jcode-app-core ambient`). diff --git a/docs/model-role-assignment.md b/docs/model-role-assignment.md new file mode 100644 index 000000000..bb518074a --- /dev/null +++ b/docs/model-role-assignment.md @@ -0,0 +1,201 @@ +# Pareto-Optimal Model Assignment for jcode Specialized Agent Roles + +| | | +|---|---| +| Status | Reference / recommended defaults | +| Last verified | 2026-05-30 | +| Method | Live provider catalogs queried via the wired OAuth credentials (not the hardcoded fallback lists) | +| Snapshot | `/tmp/jcode_models/live_catalog.json` (ephemeral; regenerate with the commands in section 6) | +| Code impact | `crates/jcode-base/src/sidecar.rs` (sidecar OAuth fallback), `crates/jcode-provider-core/src/models.rs` (`ALL_OPENAI_MODELS`) | + +These are recommended values, not enforced defaults: each role's model is left +`None` in config so the runtime picks the provider's strongest model unless the +user overrides it. Section 4 lists the values to set when you want the +Pareto-optimal pick for a role. + +## 1. Live model catalogs (verified via API) + +### OpenAI / Codex backend +Endpoint: `https://chatgpt.com/backend-api/codex/models?client_version=1.0.0` +(auth: `~/.codex/auth.json` `tokens.access_token`). + +| slug | ctx | reasoning levels | priority | notes | +|---|---|---|---|---| +| `gpt-5.5` | 272k | low/medium/high/xhigh | 9 | frontier coding model | +| `gpt-5.4` | 272k | low/medium/high/xhigh | 16 | strong generalist | +| `gpt-5.4-mini` | 272k | low/medium/high/xhigh | 23 | cheap, large ctx (NOT in hardcoded catalog) | +| `gpt-5.3-codex` | 272k | low/medium/high/xhigh | 25 | codex-tuned | +| `gpt-5.3-codex-spark` | 128k | low/medium/high/xhigh | 26 | fast, default reasoning=high | +| `gpt-5.2` | 272k | low/medium/high/xhigh | 29 | older generalist | +| `codex-auto-review` | 272k | low/medium/high/xhigh | 43 | hidden; vendor's dedicated review model | + +### Antigravity / Gemini (cloudcode-pa) +Endpoint: `https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels` +(auth: agy account token at `~/.antigravity_tools/accounts/.json`; the +Gemini-CLI token at `~/.gemini/oauth_creds.json` is `PERMISSION_DENIED` here). +Tier: **Google AI Ultra**. + +| name | display | max_tok | thinking | vendor role hint | +|---|---|---|---|---| +| `gemini-3.1-pro-high` | Gemini 3.1 Pro (High) | 1.05M | yes | tiered:pro (deprecated -> `gemini-pro-agent`) | +| `gemini-pro-agent` | Gemini 3.1 Pro (High) | 1.05M | yes | agent-grade pro | +| `gemini-3.1-pro-low` | Gemini 3.1 Pro (Low) | 1.05M | yes | tiered:pro | +| `gemini-3-flash-agent` | Gemini 3.5 Flash (High) | 1.05M | yes | tiered:flash | +| `gemini-3.5-flash-low` | Gemini 3.5 Flash (Medium) | 1.05M | yes | **defaultAgentModelId** | +| `gemini-3.5-flash-extra-low` | Gemini 3.5 Flash (Low) | 1.05M | yes | cheap agent | +| `gemini-3-flash` | Gemini 3 Flash | 1.05M | yes | commandModelIds | +| `gemini-3.1-flash-lite` | Gemini 3.1 Flash Lite | 1.05M | no | tiered:flashLite, webSearch/mquery | +| `gpt-oss-120b-medium` | GPT-OSS 120B (Medium) | 131k | yes | OSS option | +| `claude-sonnet-4-6` | Claude Sonnet 4.6 (Thinking) | 250k | yes | via antigravity proxy | +| `claude-opus-4-6-thinking` | Claude Opus 4.6 (Thinking) | 250k | yes | via antigravity proxy | +| `gemini-2.5-pro` | Gemini 2.5 Pro | 1.05M | yes | legacy | +| (+ tab/image/lite variants) | | | | non-chat | + +Vendor role hints from the same response: +`defaultAgentModelId=gemini-3.5-flash-low`, +`commandModelIds=[gemini-3-flash]`, +`webSearchModelIds/mqueryModelIds=[gemini-3.1-flash-lite]`, +`tieredModelIds={flashLite: gemini-3.1-flash-lite, flash: gemini-3-flash-agent, pro: gemini-3.1-pro-low}`. + +### xAI / Grok +Endpoint: `https://api.x.ai/v1/language-models` (auth: `~/.grok/auth.json` +OIDC `key`). Profile in repo: `XAI_PROFILE` (`api.x.ai/v1`, default +`grok-code-fast-1`). + +| id | in price | out price | +|---|---|---| +| `grok-4.3` | 12500 | 25000 | +| `grok-4.20-0309-reasoning` | 12500 | 25000 | +| `grok-4.20-0309-non-reasoning` | 12500 | 25000 | +| `grok-4.20-multi-agent-0309` | 12500 | 25000 | +| `grok-build-0.1` | 10000 | 20000 | + +`grok-build-0.1` and `grok-4.20-multi-agent-0309` remain first-class (per +standing preference). Prices are micro-units per the xAI API; relative scaling +only. + +## 2. jcode role -> config key mapping (verified) + +| Role | Config key | Current default | +|---|---|---| +| Primary coding | `provider.default_model` + `provider.default_provider` | none (provider strongest) | +| Swarm subagents | `agents.swarm_model` | none (inherits) | +| Memory sidecar / side panel | `agents.memory_model`; `sidecar.rs` consts | OpenAI `gpt-5.3-codex-spark` -> fallback `gpt-5.4` -> Claude `claude-haiku-4-5` | +| Autoreview | `autoreview.model` | none | +| Autojudge | `autojudge.model` | none | +| Ambient / orchestrator | `ambient.model` + `ambient.provider` | none (provider strongest) | + +There is no separate "side panel model" role; the side panel is driven by the +memory sidecar. + +## 3. Pareto reasoning + +Each role is scored on capability (benchmark/agentic strength), latency +(time-to-first-token + throughput), and cost (token price / quota burn). A model +is Pareto-optimal for a role when no other available model is at least as good on +all three axes and strictly better on one, for that role's workload. + +Role workload profiles: +- Primary coding: high capability dominant, latency secondary, cost tertiary. +- Swarm subagents: parallel fan-out, so cost + latency dominate; capability + "good enough" since work is decomposed. +- Memory sidecar: very high frequency, tiny tasks (relevance/extraction); + latency + cost dominate, capability minimal. +- Autoreview: capability dominant (catching real bugs), latency irrelevant + (end-of-turn), cost secondary. +- Autojudge: structured verdicts; mid capability, low latency, low cost. +- Ambient: long-horizon autonomous; capability dominant, cost matters (runs + unattended), latency irrelevant. + +## 4. Assignments + +| Role | Primary (OpenAI-first) | Antigravity alt | Grok alt | Rationale | +|---|---|---|---|---| +| Primary coding | `gpt-5.5` (high) | `gemini-3.1-pro-high` | `grok-4.3` | Frontier coding; top priority slug 9. 272k ctx. | +| Swarm subagents | `gpt-5.4-mini` | `gemini-3.5-flash-low` (vendor default agent) | `grok-build-0.1` | Cheapest capable agent tier; large ctx; built for fan-out. | +| Memory sidecar | `gpt-5.3-codex-spark` (keep) -> `gpt-5.4-mini` | `gemini-3.1-flash-lite` | `grok-build-0.1` | High-frequency tiny tasks; spark is fast. flash-lite is vendor's mquery/search pick. | +| Autoreview | `gpt-5.3-codex` | `gemini-pro-agent` | `grok-4.20-0309-reasoning` | Codex-tuned for code review; `codex-auto-review` is hidden so use codex slug. | +| Autojudge | `gpt-5.4` | `gemini-3-flash-agent` | `grok-4.20-0309-reasoning` | Structured verdicts; balanced capability/latency. | +| Ambient/orchestrator | `gpt-5.5` (medium) | `gemini-3.1-pro-high` | `grok-4.20-multi-agent-0309` | Long-horizon autonomy; multi-agent grok is purpose-built. | + +Notes: +- Sidecar already prefers `gpt-5.3-codex-spark`; keep but add `gpt-5.4-mini` as a + cheaper/larger-ctx alternative now that it is live (it was missing from the + hardcoded catalog). This is now applied in `sidecar.rs`. +- `codex-auto-review` exists but has `visibility=hide`; do not surface it in the + picker. Use `gpt-5.3-codex` for the autoreview role instead. +- For Grok, autoreview/autojudge should use a reasoning variant + (`grok-4.20-0309-reasoning`), not the non-reasoning one. + +### Config example (OpenAI-first picks) + +Set these in the jcode config to pin the Pareto picks per role: + +```toml +[provider] +default_provider = "openai" +default_model = "gpt-5.5" + +[agents] +swarm_model = "gpt-5.4-mini" +memory_model = "gpt-5.3-codex-spark" + +[autoreview] +model = "gpt-5.3-codex" + +[autojudge] +model = "gpt-5.4" + +[ambient] +provider = "openai" +model = "gpt-5.5" +``` + +## 5. Catalog drift to fix in code + +The hardcoded fallback catalogs are stale relative to live: +- `crates/jcode-provider-core/src/models.rs` `ALL_OPENAI_MODELS` was missing + `gpt-5.4-mini` (now added). +- `crates/jcode-provider-gemini/src/lib.rs` `AVAILABLE_MODELS` lists + `gemini-3.1-pro-preview` / `gemini-3-pro-preview` / `gemini-3-flash-preview`, + but the live Ultra-tier Antigravity catalog exposes `gemini-3.1-pro-high`, + `gemini-pro-agent`, `gemini-3.5-flash-low`, `gemini-3-flash`, + `gemini-3.1-flash-lite`, etc. + +Recommend wiring the role defaults to read from the live catalog (already +fetched by `fetch_openai_model_catalog` / `fetchAvailableModels`) and only fall +back to the static lists when offline. + +## 6. Reproducing the live catalog + +The snapshot in the header is ephemeral. Regenerate it from the wired creds: + +```bash +# OpenAI / Codex backend +CODEX_TOKEN=$(python3 -c "import json;print(json.load(open('$HOME/.codex/auth.json'))['tokens']['access_token'])") +curl -s "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0" \ + -H "Authorization: Bearer $CODEX_TOKEN" + +# Antigravity / Gemini (uses the agy account token, NOT ~/.gemini) +ACC=$HOME/.antigravity_tools/accounts/$(python3 -c "import json;print(json.load(open('$HOME/.antigravity_tools/accounts.json'))['current_account_id'])").json +ATOKEN=$(python3 -c "import json;print(json.load(open('$ACC'))['token']['access_token'])") +APROJ=$(python3 -c "import json;print(json.load(open('$ACC'))['token']['project_id'])") +curl -s -X POST "https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels" \ + -H "Authorization: Bearer $ATOKEN" -H "Content-Type: application/json" \ + -H "User-Agent: antigravity/1.18.3 darwin/arm64" \ + -H "x-goog-api-client: google-cloud-sdk vscode_cloudshelleditor/0.1" \ + -H 'client-metadata: {"ideType":"ANTIGRAVITY","platform":"PLATFORM_UNSPECIFIED","pluginType":"GEMINI"}' \ + -d "{\"project\":\"$APROJ\"}" + +# xAI / Grok +GKEY=$(python3 -c "import json;d=json.load(open('$HOME/.grok/auth.json'));print(list(d.values())[0]['key'])") +curl -s "https://api.x.ai/v1/language-models" -H "Authorization: Bearer $GKEY" +``` + +Notes: +- The `~/.gemini/oauth_creds.json` token is `PERMISSION_DENIED` on + `fetchAvailableModels`; that endpoint is gated to the Antigravity OAuth client, + so the agy account token must be used. +- Tokens expire (Codex/Gemini ~1h, Grok ~6h); refresh via the respective CLI if + a request returns 401/403 with an auth error. +