diff --git a/crates/jcode-app-core/src/ambient.rs b/crates/jcode-app-core/src/ambient.rs
index 20cbba8ff..ba7fc010d 100644
--- a/crates/jcode-app-core/src/ambient.rs
+++ b/crates/jcode-app-core/src/ambient.rs
@@ -12,7 +12,8 @@ pub mod runner;
 pub mod scheduler;
 
 pub use directives::{
-    UserDirective, add_directive, has_pending_directives, load_directives, take_pending_directives,
+    UserDirective, add_directive, has_pending_directives, load_directives,
+    record_session_end_directive, take_pending_directives,
 };
 pub use manager::AmbientManager;
 pub use persistence::{AmbientLock, ScheduledQueue};
diff --git a/crates/jcode-app-core/src/ambient/directives.rs b/crates/jcode-app-core/src/ambient/directives.rs
index b90dd6704..00bc42202 100644
--- a/crates/jcode-app-core/src/ambient/directives.rs
+++ b/crates/jcode-app-core/src/ambient/directives.rs
@@ -54,6 +54,32 @@ pub fn add_directive(text: String, in_reply_to: String) -> Result<()> {
     save_directives(&directives)
 }
 
+/// Record an auditable session-end marker for the ambient runner.
+///
+/// See `docs/SESSION_END_LEARNINGS_RULES.md` (Rule 8): when a session ends via
+/// `/exit`/`/quit` the session-end learnings capture runs as an ambient task,
+/// and this leaves a data-only trail in `~/.jcode/ambient/directives.json` so
+/// the ambient runner has an auditable record and can pick up any follow-up.
+///
+/// The directive text is data only; it is never executed as an instruction.
+/// Failures are intentionally swallowed by callers: this is best-effort
+/// bookkeeping that must never block or fail session teardown.
+pub fn record_session_end_directive(session_id: &str) -> Result<()> {
+    let (text, reply_to) = session_end_directive_fields(session_id);
+    add_directive(text, reply_to)
+}
+
+/// Build the (text, in_reply_to) pair for a session-end directive.
+///
+/// Pure helper so the format can be unit-tested without touching the
+/// `~/.jcode/ambient/directives.json` store.
+fn session_end_directive_fields(session_id: &str) -> (String, String) {
+    (
+        format!("session-end capture ran for session {session_id}"),
+        format!("session_end:{session_id}"),
+    )
+}
+
 /// Take all unconsumed directives, marking them as consumed.
 pub fn take_pending_directives() -> Vec<UserDirective> {
     let mut all = load_directives();
@@ -74,3 +100,15 @@ pub fn take_pending_directives() -> Vec<UserDirective> {
 pub fn has_pending_directives() -> bool {
     load_directives().iter().any(|d| !d.consumed)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn session_end_directive_fields_format() {
+        let (text, reply_to) = session_end_directive_fields("sess-123");
+        assert_eq!(text, "session-end capture ran for session sess-123");
+        assert_eq!(reply_to, "session_end:sess-123");
+    }
+}
diff --git a/crates/jcode-base/src/prompt/system_prompt.md b/crates/jcode-base/src/prompt/system_prompt.md
index fdef6fd92..9ed4dc98d 100644
--- a/crates/jcode-base/src/prompt/system_prompt.md
+++ b/crates/jcode-base/src/prompt/system_prompt.md
@@ -37,6 +37,15 @@ When adding a new feature, think about how to best structure what you are about
 Commit as you go by default, unless asked otherwise. Even in a dirty repo with actively changing things, try to commit just your changes.
 Avoid doing irreversibly destructive actions.
 
+## Task artifacts (write files at the end of major tasks)
+
+At the end of any major task (multi-step work, research, a feature, a debugging session, or anything spanning many tool calls), write durable artifact files to disk, just like writing memex/memory files. Treat the filesystem as persistent memory so the next session (yours or another agent's) starts with context instead of cold.
+
+- Write a short markdown artifact capturing: what the task was, what you did, key decisions and rationale, files touched, how you verified it, and any follow-ups or known gaps.
+- Default location: `docs/` for shareable references that belong with the repo (e.g. `docs/<TOPIC>.md`), or scratch planning files (`task_plan.md`, `findings.md`, `progress.md`) in the working directory for in-progress working memory. Keep scratch planning files out of commits unless the user asks otherwise (use `.git/info/exclude`).
+- Record only durable, useful-weeks-later content: decisions, outcomes, learnings, gotchas. Do NOT dump line-by-line diffs, commit hashes, or transient build noise; that lives in git history.
+- This complements, not replaces, the automatic session-end memory capture. See `docs/SESSION_END_LEARNINGS_RULES.md` for the full ruleset (categories, dedup, trust, cost bounds).
+
 ## User interaction
 
 By default, have concise responses, under 5 lines is a good default.
diff --git a/crates/jcode-base/src/sidecar.rs b/crates/jcode-base/src/sidecar.rs
index 8ad4e1146..7c2571039 100644
--- a/crates/jcode-base/src/sidecar.rs
+++ b/crates/jcode-base/src/sidecar.rs
@@ -14,7 +14,11 @@ use serde::{Deserialize, Serialize};
 
 /// Fast/cheap OpenAI model used when Codex credentials are available.
 pub const SIDECAR_OPENAI_MODEL: &str = "gpt-5.3-codex-spark";
-const SIDECAR_OPENAI_OAUTH_FALLBACK_MODEL: &str = "gpt-5.4";
+/// Pareto-optimal sidecar fallback: `gpt-5.4-mini` (live in the Codex catalog as
+/// of 2026-05) keeps the full 272k context window of `gpt-5.4` while being
+/// cheaper and faster, which suits the sidecar's high-frequency
+/// relevance/extraction workload. See docs/model-role-assignment.md.
+const SIDECAR_OPENAI_OAUTH_FALLBACK_MODEL: &str = "gpt-5.4-mini";
 const SIDECAR_OPENAI_OAUTH_FALLBACK_REASONING: &str = "low";
 
 /// Fast/cheap Claude model used when only Claude credentials are available.
diff --git a/crates/jcode-provider-core/src/models.rs b/crates/jcode-provider-core/src/models.rs
index ece20b3a1..e21ec40cb 100644
--- a/crates/jcode-provider-core/src/models.rs
+++ b/crates/jcode-provider-core/src/models.rs
@@ -17,6 +17,7 @@ pub const ALL_OPENAI_MODELS: &[&str] = &[
     "gpt-5.5",
     "gpt-5.4",
     "gpt-5.4-pro",
+    "gpt-5.4-mini",
     "gpt-5.3-codex",
     "gpt-5.3-codex-spark",
     "gpt-5.2-chat-latest",
diff --git a/crates/jcode-tui/src/tui/app/conversation_state.rs b/crates/jcode-tui/src/tui/app/conversation_state.rs
index 523a41a44..1494639aa 100644
--- a/crates/jcode-tui/src/tui/app/conversation_state.rs
+++ b/crates/jcode-tui/src/tui/app/conversation_state.rs
@@ -446,6 +446,13 @@ impl App {
             self.session.id.clone(),
             self.session.working_dir.clone(),
         );
+
+        // Rule 8 (docs/SESSION_END_LEARNINGS_RULES.md): when the ambient
+        // subsystem is active, leave an auditable data-only marker that the
+        // session-end capture ran. Best-effort; never blocks or fails teardown.
+        if crate::config::config().ambient.enabled {
+            let _ = crate::ambient::record_session_end_directive(&self.session.id);
+        }
     }
 
     pub(super) fn memory_prompt_signature(prompt: &str) -> String {
diff --git a/docs/SESSION_END_LEARNINGS_RULES.md b/docs/SESSION_END_LEARNINGS_RULES.md
new file mode 100644
index 000000000..14d2c091f
--- /dev/null
+++ b/docs/SESSION_END_LEARNINGS_RULES.md
@@ -0,0 +1,150 @@
+# Session-End Learnings Capture Rules
+
+| | |
+|---|---|
+| Status | Enforced ruleset (ambient on-exit task) |
+| Scope | jcode TUI/CLI agent |
+| Trigger | `/exit` or `/quit` (session teardown), fired as an ambient task |
+| Pipeline | `trigger_save_memory_extraction` -> `trigger_final_extraction_with_dir` -> `run_final_extraction` -> `Sidecar::extract_memories_with_existing` -> `MemoryManager::remember_project` |
+| Storage | Project memory (`MemoryEntry`) + ambient on-exit directive (`~/.jcode/ambient/directives.json`) |
+
+## Purpose
+
+Every session must end by recording what was learned and what was done, so that
+future sessions start with durable context instead of cold. This is a free,
+local, OAuth-only operation that runs as an ambient task when the user exits.
+These rules formalize *what* gets captured and *how* it is enforced; they sit on
+top of the existing final-extraction pipeline rather than replacing it.
+
+## When the task fires
+
+The session-end capture fires on session teardown, which `/exit` and `/quit`
+trigger. It is a no-op (silently skipped) when any of the following hold, to
+keep exit fast and avoid noise:
+
+- The session is remote (`is_remote`).
+- Memory is disabled (`memory_enabled == false`).
+- The transcript has fewer than 4 provider messages (nothing substantive
+  happened).
+
+Otherwise the transcript is materialized and handed to the final extractor,
+which runs asynchronously so exit never blocks on the model call.
+
+## The Rules
+
+### Rule 1: Capture is mandatory at session end
+Every non-trivial session (>= 4 provider messages, memory enabled, local) MUST
+run the session-end capture on `/exit`/`/quit`. The capture is fire-and-forget:
+it must never block or delay the user's exit, and a failure must never prevent
+exit. Failures are logged via `memory_log`, not surfaced as errors.
+
+### Rule 2: Record durable learnings, not transient noise
+Capture only what a developer would benefit from recalling weeks later. Use
+exactly one category per item:
+
+- `fact` - objective technical info about the codebase, architecture, patterns,
+  dependencies, tools, environment.
+- `preference` - what the USER wants or how they like things (workflow, UX,
+  coding style, how the assistant should behave).
+- `correction` - a mistake corrected, bug found and fixed, wrong assumption, or
+  something the user explicitly corrected.
+- `entity` - named people, projects, services, repos, teams worth tracking.
+
+Categorization MUST follow:
+- User wants / likes => `preference` (never `fact`).
+- Bug fix / mistake => `correction` (never `fact`).
+- `fact` is reserved for objective system information, never user behavior.
+
+### Rule 3: Never record ephemera
+Do NOT capture:
+- Transient debugging details, compile errors, intermediate build steps.
+- Commit hashes, git operations, or "changes were committed/pushed" notes.
+- Line-by-line code edits ("X changed to Y in file Z") - that belongs in git
+  history, not memory.
+- Self-evident project context already in the system prompt (project name, repo
+  URL, language).
+- Redundant variations of already-known memories (check the "Already known"
+  list before emitting).
+
+### Rule 4: Deduplicate against existing memory
+Before storing, the extractor MUST be given the current active project memories
+and MUST NOT re-emit them or close paraphrases. The existing-memory list is
+capped (80 entries, 150 chars each) to bound cost; dedup is best-effort but
+required.
+
+### Rule 5: Attribute trust honestly
+Each item carries a trust level:
+- `high` - the user stated it explicitly.
+- `medium` - observed from the assistant's own actions/results.
+- `low` - inferred.
+Trust MUST reflect the actual evidence, not optimism.
+
+### Rule 6: Record work, not just facts
+The session-end capture covers both *learnings* (Rules 2-5) and *work done*. For
+work, record only durable outcomes worth recalling (e.g. "added gpt-5.4-mini to
+the OpenAI catalog and switched the sidecar OAuth fallback to it"), not the
+mechanical diff. Mechanical change detail lives in git history (Rule 3).
+
+### Rule 7: Bound cost and stay free
+The capture uses the sidecar (cheap/fast OAuth model; OpenAI fallback is
+`gpt-5.4-mini`). It MUST stay within the sidecar's existing context caps
+(extraction context: <= 40 messages / 24k chars) and MUST NOT spend API-key
+budget unless the user has explicitly enabled API keys for ambient work.
+
+### Rule 8: Persist an auditable on-exit directive
+When the ambient subsystem is active, the session-end task SHOULD append a
+machine-readable directive to `~/.jcode/ambient/directives.json` recording that
+the session ended and that capture ran, so the ambient runner has an auditable
+trail and can pick up any follow-up. The directive is data only and is never
+treated as executable instructions.
+
+### Rule 9: Write task artifact files at the end of major tasks
+Every jcode agent (primary, swarm subagents, ambient, server) MUST write durable
+artifact files to disk at the end of any major task, the same way memex/memory
+files are written. A "major task" is multi-step work, research, a feature, a
+debugging session, or anything spanning many tool calls.
+
+- Write a short markdown artifact capturing: what the task was, what was done,
+  key decisions + rationale, files touched, how it was verified, and any
+  follow-ups or known gaps.
+- Location: `docs/<TOPIC>.md` for shareable references that belong with the repo;
+  or scratch planning files (`task_plan.md`, `findings.md`, `progress.md`) in the
+  working directory for in-progress working memory. Keep scratch planning files
+  out of commits unless the user asks otherwise (use `.git/info/exclude`).
+- Content discipline mirrors Rules 2-3 and 6: durable, useful-weeks-later content
+  only. No line-by-line diffs, commit hashes, or transient build noise (that
+  lives in git history).
+- This complements the automatic session-end memory capture (Rules 1-8); it does
+  not replace it. The agent writes the human-readable artifact; the pipeline
+  writes the structured memory.
+- Enforced via the agent system prompt (`crates/jcode-base/src/prompt/system_prompt.md`,
+  "Task artifacts" section), which every agent surface embeds through
+  `build_system_prompt_split`.
+
+## Enforcement model
+
+These rules are enforced at four layers:
+
+1. **Pipeline** - the existing `run_final_extraction` already runs on teardown
+   across TUI, server, comm, and desktop disconnect paths. The rules document
+   its contract so it is not silently weakened by future edits.
+2. **Extraction prompt** - Rules 2-5 mirror the sidecar extraction system prompt
+   in `crates/jcode-base/src/sidecar.rs`. Any change to that prompt must keep
+   these guarantees.
+3. **Ambient directive seed** - Rule 8 wires an on-exit directive so the ambient
+   task is explicit and auditable rather than implicit.
+4. **Agent system prompt** - Rule 9 is instructed to every agent via the
+   "Task artifacts" section of `crates/jcode-base/src/prompt/system_prompt.md`,
+   which all agent surfaces embed through `build_system_prompt_split`.
+
+## Verification
+
+A change to the capture path is correct only if:
+- `/exit` on a >= 4-message local session triggers `trigger_final_extraction*`
+  (logged via `memory_log::log_final_extraction`).
+- The extractor receives the existing-memory list (dedup, Rule 4).
+- New memories are stored via `manager.remember_project` with a category, trust,
+  and `with_source(session_id)`.
+- Exit latency is unchanged (capture is async, Rule 1).
+- Memory/ambient unit tests pass (`cargo test -p jcode-base memory`,
+  `cargo test -p jcode-app-core ambient`).
diff --git a/docs/model-role-assignment.md b/docs/model-role-assignment.md
new file mode 100644
index 000000000..bb518074a
--- /dev/null
+++ b/docs/model-role-assignment.md
@@ -0,0 +1,201 @@
+# Pareto-Optimal Model Assignment for jcode Specialized Agent Roles
+
+| | |
+|---|---|
+| Status | Reference / recommended defaults |
+| Last verified | 2026-05-30 |
+| Method | Live provider catalogs queried via the wired OAuth credentials (not the hardcoded fallback lists) |
+| Snapshot | `/tmp/jcode_models/live_catalog.json` (ephemeral; regenerate with the commands in section 6) |
+| Code impact | `crates/jcode-base/src/sidecar.rs` (sidecar OAuth fallback), `crates/jcode-provider-core/src/models.rs` (`ALL_OPENAI_MODELS`) |
+
+These are recommended values, not enforced defaults: each role's model is left
+`None` in config so the runtime picks the provider's strongest model unless the
+user overrides it. Section 4 lists the values to set when you want the
+Pareto-optimal pick for a role.
+
+## 1. Live model catalogs (verified via API)
+
+### OpenAI / Codex backend
+Endpoint: `https://chatgpt.com/backend-api/codex/models?client_version=1.0.0`
+(auth: `~/.codex/auth.json` `tokens.access_token`).
+
+| slug | ctx | reasoning levels | priority | notes |
+|---|---|---|---|---|
+| `gpt-5.5` | 272k | low/medium/high/xhigh | 9 | frontier coding model |
+| `gpt-5.4` | 272k | low/medium/high/xhigh | 16 | strong generalist |
+| `gpt-5.4-mini` | 272k | low/medium/high/xhigh | 23 | cheap, large ctx (NOT in hardcoded catalog) |
+| `gpt-5.3-codex` | 272k | low/medium/high/xhigh | 25 | codex-tuned |
+| `gpt-5.3-codex-spark` | 128k | low/medium/high/xhigh | 26 | fast, default reasoning=high |
+| `gpt-5.2` | 272k | low/medium/high/xhigh | 29 | older generalist |
+| `codex-auto-review` | 272k | low/medium/high/xhigh | 43 | hidden; vendor's dedicated review model |
+
+### Antigravity / Gemini (cloudcode-pa)
+Endpoint: `https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels`
+(auth: agy account token at `~/.antigravity_tools/accounts/<id>.json`; the
+Gemini-CLI token at `~/.gemini/oauth_creds.json` is `PERMISSION_DENIED` here).
+Tier: **Google AI Ultra**.
+
+| name | display | max_tok | thinking | vendor role hint |
+|---|---|---|---|---|
+| `gemini-3.1-pro-high` | Gemini 3.1 Pro (High) | 1.05M | yes | tiered:pro (deprecated -> `gemini-pro-agent`) |
+| `gemini-pro-agent` | Gemini 3.1 Pro (High) | 1.05M | yes | agent-grade pro |
+| `gemini-3.1-pro-low` | Gemini 3.1 Pro (Low) | 1.05M | yes | tiered:pro |
+| `gemini-3-flash-agent` | Gemini 3.5 Flash (High) | 1.05M | yes | tiered:flash |
+| `gemini-3.5-flash-low` | Gemini 3.5 Flash (Medium) | 1.05M | yes | **defaultAgentModelId** |
+| `gemini-3.5-flash-extra-low` | Gemini 3.5 Flash (Low) | 1.05M | yes | cheap agent |
+| `gemini-3-flash` | Gemini 3 Flash | 1.05M | yes | commandModelIds |
+| `gemini-3.1-flash-lite` | Gemini 3.1 Flash Lite | 1.05M | no | tiered:flashLite, webSearch/mquery |
+| `gpt-oss-120b-medium` | GPT-OSS 120B (Medium) | 131k | yes | OSS option |
+| `claude-sonnet-4-6` | Claude Sonnet 4.6 (Thinking) | 250k | yes | via antigravity proxy |
+| `claude-opus-4-6-thinking` | Claude Opus 4.6 (Thinking) | 250k | yes | via antigravity proxy |
+| `gemini-2.5-pro` | Gemini 2.5 Pro | 1.05M | yes | legacy |
+| (+ tab/image/lite variants) | | | | non-chat |
+
+Vendor role hints from the same response:
+`defaultAgentModelId=gemini-3.5-flash-low`,
+`commandModelIds=[gemini-3-flash]`,
+`webSearchModelIds/mqueryModelIds=[gemini-3.1-flash-lite]`,
+`tieredModelIds={flashLite: gemini-3.1-flash-lite, flash: gemini-3-flash-agent, pro: gemini-3.1-pro-low}`.
+
+### xAI / Grok
+Endpoint: `https://api.x.ai/v1/language-models` (auth: `~/.grok/auth.json`
+OIDC `key`). Profile in repo: `XAI_PROFILE` (`api.x.ai/v1`, default
+`grok-code-fast-1`).
+
+| id | in price | out price |
+|---|---|---|
+| `grok-4.3` | 12500 | 25000 |
+| `grok-4.20-0309-reasoning` | 12500 | 25000 |
+| `grok-4.20-0309-non-reasoning` | 12500 | 25000 |
+| `grok-4.20-multi-agent-0309` | 12500 | 25000 |
+| `grok-build-0.1` | 10000 | 20000 |
+
+`grok-build-0.1` and `grok-4.20-multi-agent-0309` remain first-class (per
+standing preference). Prices are micro-units per the xAI API; relative scaling
+only.
+
+## 2. jcode role -> config key mapping (verified)
+
+| Role | Config key | Current default |
+|---|---|---|
+| Primary coding | `provider.default_model` + `provider.default_provider` | none (provider strongest) |
+| Swarm subagents | `agents.swarm_model` | none (inherits) |
+| Memory sidecar / side panel | `agents.memory_model`; `sidecar.rs` consts | OpenAI `gpt-5.3-codex-spark` -> fallback `gpt-5.4` -> Claude `claude-haiku-4-5` |
+| Autoreview | `autoreview.model` | none |
+| Autojudge | `autojudge.model` | none |
+| Ambient / orchestrator | `ambient.model` + `ambient.provider` | none (provider strongest) |
+
+There is no separate "side panel model" role; the side panel is driven by the
+memory sidecar.
+
+## 3. Pareto reasoning
+
+Each role is scored on capability (benchmark/agentic strength), latency
+(time-to-first-token + throughput), and cost (token price / quota burn). A model
+is Pareto-optimal for a role when no other available model is at least as good on
+all three axes and strictly better on one, for that role's workload.
+
+Role workload profiles:
+- Primary coding: high capability dominant, latency secondary, cost tertiary.
+- Swarm subagents: parallel fan-out, so cost + latency dominate; capability
+  "good enough" since work is decomposed.
+- Memory sidecar: very high frequency, tiny tasks (relevance/extraction);
+  latency + cost dominate, capability minimal.
+- Autoreview: capability dominant (catching real bugs), latency irrelevant
+  (end-of-turn), cost secondary.
+- Autojudge: structured verdicts; mid capability, low latency, low cost.
+- Ambient: long-horizon autonomous; capability dominant, cost matters (runs
+  unattended), latency irrelevant.
+
+## 4. Assignments
+
+| Role | Primary (OpenAI-first) | Antigravity alt | Grok alt | Rationale |
+|---|---|---|---|---|
+| Primary coding | `gpt-5.5` (high) | `gemini-3.1-pro-high` | `grok-4.3` | Frontier coding; top priority slug 9. 272k ctx. |
+| Swarm subagents | `gpt-5.4-mini` | `gemini-3.5-flash-low` (vendor default agent) | `grok-build-0.1` | Cheapest capable agent tier; large ctx; built for fan-out. |
+| Memory sidecar | `gpt-5.3-codex-spark` (keep) -> `gpt-5.4-mini` | `gemini-3.1-flash-lite` | `grok-build-0.1` | High-frequency tiny tasks; spark is fast. flash-lite is vendor's mquery/search pick. |
+| Autoreview | `gpt-5.3-codex` | `gemini-pro-agent` | `grok-4.20-0309-reasoning` | Codex-tuned for code review; `codex-auto-review` is hidden so use codex slug. |
+| Autojudge | `gpt-5.4` | `gemini-3-flash-agent` | `grok-4.20-0309-reasoning` | Structured verdicts; balanced capability/latency. |
+| Ambient/orchestrator | `gpt-5.5` (medium) | `gemini-3.1-pro-high` | `grok-4.20-multi-agent-0309` | Long-horizon autonomy; multi-agent grok is purpose-built. |
+
+Notes:
+- Sidecar already prefers `gpt-5.3-codex-spark`; keep but add `gpt-5.4-mini` as a
+  cheaper/larger-ctx alternative now that it is live (it was missing from the
+  hardcoded catalog). This is now applied in `sidecar.rs`.
+- `codex-auto-review` exists but has `visibility=hide`; do not surface it in the
+  picker. Use `gpt-5.3-codex` for the autoreview role instead.
+- For Grok, autoreview/autojudge should use a reasoning variant
+  (`grok-4.20-0309-reasoning`), not the non-reasoning one.
+
+### Config example (OpenAI-first picks)
+
+Set these in the jcode config to pin the Pareto picks per role:
+
+```toml
+[provider]
+default_provider = "openai"
+default_model = "gpt-5.5"
+
+[agents]
+swarm_model = "gpt-5.4-mini"
+memory_model = "gpt-5.3-codex-spark"
+
+[autoreview]
+model = "gpt-5.3-codex"
+
+[autojudge]
+model = "gpt-5.4"
+
+[ambient]
+provider = "openai"
+model = "gpt-5.5"
+```
+
+## 5. Catalog drift to fix in code
+
+The hardcoded fallback catalogs are stale relative to live:
+- `crates/jcode-provider-core/src/models.rs` `ALL_OPENAI_MODELS` was missing
+  `gpt-5.4-mini` (now added).
+- `crates/jcode-provider-gemini/src/lib.rs` `AVAILABLE_MODELS` lists
+  `gemini-3.1-pro-preview` / `gemini-3-pro-preview` / `gemini-3-flash-preview`,
+  but the live Ultra-tier Antigravity catalog exposes `gemini-3.1-pro-high`,
+  `gemini-pro-agent`, `gemini-3.5-flash-low`, `gemini-3-flash`,
+  `gemini-3.1-flash-lite`, etc.
+
+Recommend wiring the role defaults to read from the live catalog (already
+fetched by `fetch_openai_model_catalog` / `fetchAvailableModels`) and only fall
+back to the static lists when offline.
+
+## 6. Reproducing the live catalog
+
+The snapshot in the header is ephemeral. Regenerate it from the wired creds:
+
+```bash
+# OpenAI / Codex backend
+CODEX_TOKEN=$(python3 -c "import json;print(json.load(open('$HOME/.codex/auth.json'))['tokens']['access_token'])")
+curl -s "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0" \
+  -H "Authorization: Bearer $CODEX_TOKEN"
+
+# Antigravity / Gemini (uses the agy account token, NOT ~/.gemini)
+ACC=$HOME/.antigravity_tools/accounts/$(python3 -c "import json;print(json.load(open('$HOME/.antigravity_tools/accounts.json'))['current_account_id'])").json
+ATOKEN=$(python3 -c "import json;print(json.load(open('$ACC'))['token']['access_token'])")
+APROJ=$(python3 -c "import json;print(json.load(open('$ACC'))['token']['project_id'])")
+curl -s -X POST "https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels" \
+  -H "Authorization: Bearer $ATOKEN" -H "Content-Type: application/json" \
+  -H "User-Agent: antigravity/1.18.3 darwin/arm64" \
+  -H "x-goog-api-client: google-cloud-sdk vscode_cloudshelleditor/0.1" \
+  -H 'client-metadata: {"ideType":"ANTIGRAVITY","platform":"PLATFORM_UNSPECIFIED","pluginType":"GEMINI"}' \
+  -d "{\"project\":\"$APROJ\"}"
+
+# xAI / Grok
+GKEY=$(python3 -c "import json;d=json.load(open('$HOME/.grok/auth.json'));print(list(d.values())[0]['key'])")
+curl -s "https://api.x.ai/v1/language-models" -H "Authorization: Bearer $GKEY"
+```
+
+Notes:
+- The `~/.gemini/oauth_creds.json` token is `PERMISSION_DENIED` on
+  `fetchAvailableModels`; that endpoint is gated to the Antigravity OAuth client,
+  so the agy account token must be used.
+- Tokens expire (Codex/Gemini ~1h, Grok ~6h); refresh via the respective CLI if
+  a request returns 401/403 with an auth error.
+