From 019cf8eb02563384b955b60de5fbcc261ec04baf Mon Sep 17 00:00:00 2001 From: zyang-dev <267119621+zyang-dev@users.noreply.github.com> Date: Wed, 20 May 2026 12:28:42 -0700 Subject: [PATCH 1/4] feat(onboard): show managed vLLM by default on DGX Spark and Station Signed-off-by: zyang-dev <267119621+zyang-dev@users.noreply.github.com> --- src/lib/onboard.ts | 4 +-- src/lib/onboard/vllm-menu.test.ts | 43 +++++++++++++++++++++++++++++++ src/lib/onboard/vllm-menu.ts | 18 ++++++++++--- 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 0a57c85460..9a56b1a30a 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -6124,8 +6124,7 @@ async function setupNim( ["curl", "-sf", ...localProbeCurlArgs, `http://127.0.0.1:${VLLM_PORT}/v1/models`], { ignoreError: true }, ); - // Pick a vLLM install recipe for this host. Profiles live in inference/vllm.ts; - // null means "no supported platform" (vLLM stays behind EXPERIMENTAL). + // Pick a vLLM install recipe for this host. Profiles live in inference/vllm.ts. const vllmProfile = detectVllmProfile(gpu); // If the profile's image is already cached, the install path is really a // "start" — docker pull is a no-op and the container can come up in seconds. @@ -6237,6 +6236,7 @@ async function setupNim( vllmRunning, vllmProfile, experimental: EXPERIMENTAL, + platform: gpu?.platform, hasVllmImage, }), ); diff --git a/src/lib/onboard/vllm-menu.test.ts b/src/lib/onboard/vllm-menu.test.ts index 98fe45a006..84096b1584 100644 --- a/src/lib/onboard/vllm-menu.test.ts +++ b/src/lib/onboard/vllm-menu.test.ts @@ -49,6 +49,49 @@ describe("buildVllmMenuEntries", () => { assert.equal(entries[0].label, "Install vLLM (Linux NVIDIA)"); }); + it("returns the install entry by default for DGX Spark", () => { + const entries = buildVllmMenuEntries({ + vllmRunning: false, + vllmProfile: { name: "DGX Spark" }, + experimental: false, + platform: "spark", + hasVllmImage: false, + env: {}, + log: () => {}, + }); + assert.equal(entries.length, 1); + assert.equal(entries[0].key, "install-vllm"); + assert.equal(entries[0].label, "Install vLLM (DGX Spark)"); + }); + + it("returns the start entry by default for DGX Station when the image is already cached", () => { + const entries = buildVllmMenuEntries({ + vllmRunning: false, + vllmProfile: { name: "DGX Station" }, + experimental: false, + platform: "station", + hasVllmImage: true, + env: {}, + log: () => {}, + }); + assert.equal(entries.length, 1); + assert.equal(entries[0].key, "install-vllm"); + assert.equal(entries[0].label, "Start vLLM (DGX Station)"); + }); + + it("keeps generic Linux managed vLLM behind EXPERIMENTAL", () => { + const entries = buildVllmMenuEntries({ + vllmRunning: false, + vllmProfile: { name: "Linux NVIDIA" }, + experimental: false, + platform: "linux", + hasVllmImage: false, + env: {}, + log: () => {}, + }); + assert.deepEqual(entries, []); + }); + it("uses Start verb when the image is already cached", () => { const entries = buildVllmMenuEntries({ vllmRunning: false, diff --git a/src/lib/onboard/vllm-menu.ts b/src/lib/onboard/vllm-menu.ts index b72d5d45f9..c135e34ffb 100644 --- a/src/lib/onboard/vllm-menu.ts +++ b/src/lib/onboard/vllm-menu.ts @@ -18,16 +18,20 @@ * buildVllmMenuEntries always emits the install-vllm entry when the user * explicitly opts in via NEMOCLAW_PROVIDER=install-vllm, even when the profile * is null, so the dispatcher can emit the precise "No vLLM install profile - * available for this host." message. It also logs a note when running-vLLM - * takes precedence over the env-var opt-in. + * available for this host." message. It also lets the caller surface managed + * vLLM by default for known DGX platforms while generic Linux stays gated, and + * logs a note when running-vLLM takes precedence over the env-var opt-in. */ import { VLLM_PORT } from "../core/ports"; +import type { NvidiaPlatform } from "../inference/nim"; interface VllmProfileShape { name: string; } +const MANAGED_VLLM_DEFAULT_PLATFORMS = new Set(["spark", "station"]); + export interface VllmMenuEntry { key: "vllm" | "install-vllm"; label: string; @@ -37,6 +41,7 @@ export interface BuildVllmMenuOptions { vllmRunning: boolean; vllmProfile: VllmProfileShape | null | undefined; experimental: boolean; + platform?: NvidiaPlatform; hasVllmImage: boolean; /** Defaults to process.env so tests can inject a clean environment. */ env?: NodeJS.ProcessEnv; @@ -49,7 +54,8 @@ export function buildVllmMenuEntries(opts: BuildVllmMenuOptions): VllmMenuEntry[ // env-var opt-in surface the menu entry too — the non-interactive provider // hint is null outside non-interactive mode. const env = opts.env ?? process.env; - const userChoseManagedVllm = (env.NEMOCLAW_PROVIDER || "").trim().toLowerCase() === "install-vllm"; + const userChoseManagedVllm = + (env.NEMOCLAW_PROVIDER || "").trim().toLowerCase() === "install-vllm"; if (opts.vllmRunning) { if (userChoseManagedVllm) { log( @@ -63,7 +69,11 @@ export function buildVllmMenuEntries(opts: BuildVllmMenuOptions): VllmMenuEntry[ }, ]; } - if (userChoseManagedVllm || (opts.vllmProfile && opts.experimental)) { + if ( + userChoseManagedVllm || + (opts.vllmProfile && + (opts.experimental || MANAGED_VLLM_DEFAULT_PLATFORMS.has(opts.platform as NvidiaPlatform))) + ) { const verb = opts.hasVllmImage ? "Start" : "Install"; const profileLabel = opts.vllmProfile?.name ?? "no profile detected"; return [{ key: "install-vllm", label: `${verb} vLLM (${profileLabel})` }]; From c035129afdaf7142491413da987b49e4ed25fa15 Mon Sep 17 00:00:00 2001 From: zyang-dev <267119621+zyang-dev@users.noreply.github.com> Date: Wed, 20 May 2026 13:27:56 -0700 Subject: [PATCH 2/4] Remove the unnecessary platform type assertion by checking opts.platform before testing the managed-vLLM default platform set. Signed-off-by: zyang-dev <267119621+zyang-dev@users.noreply.github.com> --- src/lib/onboard/vllm-menu.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib/onboard/vllm-menu.ts b/src/lib/onboard/vllm-menu.ts index c135e34ffb..d3e340d8f4 100644 --- a/src/lib/onboard/vllm-menu.ts +++ b/src/lib/onboard/vllm-menu.ts @@ -72,7 +72,8 @@ export function buildVllmMenuEntries(opts: BuildVllmMenuOptions): VllmMenuEntry[ if ( userChoseManagedVllm || (opts.vllmProfile && - (opts.experimental || MANAGED_VLLM_DEFAULT_PLATFORMS.has(opts.platform as NvidiaPlatform))) + (opts.experimental || + (opts.platform && MANAGED_VLLM_DEFAULT_PLATFORMS.has(opts.platform)))) ) { const verb = opts.hasVllmImage ? "Start" : "Install"; const profileLabel = opts.vllmProfile?.name ?? "no profile detected"; From 3d5965fc1ddddd8819ecb3b820debbc75cff8275 Mon Sep 17 00:00:00 2001 From: zyang-dev <267119621+zyang-dev@users.noreply.github.com> Date: Wed, 20 May 2026 13:42:56 -0700 Subject: [PATCH 3/4] Update the vLLM menu unit test to import the source helper directly instead of relying on generated dist declarations. Signed-off-by: zyang-dev <267119621+zyang-dev@users.noreply.github.com> --- src/lib/onboard/vllm-menu.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/onboard/vllm-menu.test.ts b/src/lib/onboard/vllm-menu.test.ts index 84096b1584..8ceabdc434 100644 --- a/src/lib/onboard/vllm-menu.test.ts +++ b/src/lib/onboard/vllm-menu.test.ts @@ -5,7 +5,7 @@ import assert from "node:assert/strict"; import { describe, it } from "vitest"; -import { buildVllmMenuEntries } from "../../../dist/lib/onboard/vllm-menu"; +import { buildVllmMenuEntries } from "./vllm-menu"; describe("buildVllmMenuEntries", () => { it("returns no entries when nothing is running, no profile, and no opt-in", () => { From b1d36761d276c403d6f918d38d7fe737eaf55b57 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 20 May 2026 15:11:42 -0700 Subject: [PATCH 4/4] docs: clarify managed vLLM DGX defaults --- .../nemoclaw-user-configure-inference/SKILL.md | 14 +++++++------- .../references/inference-options.md | 16 +++++++++------- .../references/best-practices.md | 8 ++++---- .../skills/nemoclaw-user-get-started/SKILL.md | 9 +++++---- .../references/release-notes.md | 2 +- ci/platform-matrix.json | 4 ++-- docs/about/release-notes.md | 2 +- docs/about/release-notes.mdx | 2 +- docs/get-started/quickstart.md | 4 ++-- docs/get-started/quickstart.mdx | 4 ++-- docs/inference/inference-options.md | 15 ++++++++------- docs/inference/inference-options.mdx | 15 ++++++++------- docs/inference/use-local-inference.md | 14 +++++++------- docs/inference/use-local-inference.mdx | 14 +++++++------- docs/security/best-practices.md | 8 ++++---- docs/security/best-practices.mdx | 8 ++++---- 16 files changed, 72 insertions(+), 67 deletions(-) diff --git a/.agents/skills/nemoclaw-user-configure-inference/SKILL.md b/.agents/skills/nemoclaw-user-configure-inference/SKILL.md index 625cc0c8a1..9936cc3da1 100644 --- a/.agents/skills/nemoclaw-user-configure-inference/SKILL.md +++ b/.agents/skills/nemoclaw-user-configure-inference/SKILL.md @@ -242,7 +242,7 @@ $ NEMOCLAW_PROVIDER=anthropicCompatible \ nemoclaw onboard --non-interactive ``` -## Step 4: vLLM (Experimental) +## Step 4: vLLM When vLLM is already running on `localhost:8000`, NemoClaw can detect it automatically and query the `/v1/models` endpoint to determine the loaded model. On supported Linux hosts with NVIDIA GPUs, the onboard wizard can also install or start a managed vLLM container for you. @@ -254,7 +254,8 @@ $ nemoclaw onboard ``` If vLLM is already running, NemoClaw detects the running model and validates the endpoint. -If vLLM is not running and your host matches a managed profile, set `NEMOCLAW_EXPERIMENTAL=1`, rerun `nemoclaw onboard`, and select the **Install vLLM** or **Start vLLM** entry. +If vLLM is not running and your host matches a DGX Spark or DGX Station managed profile, NemoClaw shows the **Install vLLM** or **Start vLLM** entry by default. +Generic Linux NVIDIA GPU hosts still require `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm` before the managed entry appears. NemoClaw pulls the vLLM image, downloads model weights into `~/.cache/huggingface`, starts the `nemoclaw-vllm` container on `localhost:8000`, and prints progress markers while the model loads. The first run can take 10 to 30 minutes. Later runs reuse the cached image and model weights. @@ -281,11 +282,11 @@ $ NEMOCLAW_PROVIDER=vllm \ nemoclaw onboard --non-interactive ``` -Install or start managed vLLM when a supported profile is detected: +Install or start managed vLLM when a supported profile is detected. +On DGX Spark and DGX Station, `NEMOCLAW_PROVIDER=install-vllm` is enough for non-interactive runs; add `NEMOCLAW_EXPERIMENTAL=1` on generic Linux NVIDIA GPU hosts. ```console -$ NEMOCLAW_EXPERIMENTAL=1 \ - NEMOCLAW_PROVIDER=install-vllm \ +$ NEMOCLAW_PROVIDER=install-vllm \ nemoclaw onboard --non-interactive ``` @@ -312,8 +313,7 @@ Gated models require a Hugging Face token; export it before onboarding so NemoCl ```console $ export HF_TOKEN= -$ NEMOCLAW_EXPERIMENTAL=1 \ - NEMOCLAW_PROVIDER=install-vllm \ +$ NEMOCLAW_PROVIDER=install-vllm \ NEMOCLAW_VLLM_MODEL=deepseek-r1-distill-70b \ nemoclaw onboard --non-interactive ``` diff --git a/.agents/skills/nemoclaw-user-configure-inference/references/inference-options.md b/.agents/skills/nemoclaw-user-configure-inference/references/inference-options.md index a4fb1b8058..c0ac259514 100644 --- a/.agents/skills/nemoclaw-user-configure-inference/references/inference-options.md +++ b/.agents/skills/nemoclaw-user-configure-inference/references/inference-options.md @@ -30,15 +30,16 @@ NemoClaw uses provider-specific local tokens for those routes, and rebuilds of l | Hermes Provider | Hermes only | OpenAI-compatible route | Available when onboarding Hermes Agent through `nemohermes` | | Local Ollama | Caveated | Local Ollama API | Available when Ollama is installed or running on the host | | Local NVIDIA NIM | Experimental | Local OpenAI-compatible | Requires `NEMOCLAW_EXPERIMENTAL=1` and a NIM-capable GPU | -| Local vLLM | Experimental | Local OpenAI-compatible | Requires `NEMOCLAW_EXPERIMENTAL=1` and a server already running on `localhost:8000` | +| Local vLLM (already running) | Caveated | Local OpenAI-compatible | Appears in the onboarding menu when NemoClaw detects a server already on `localhost:8000`. No flag required. | +| Local vLLM (managed install/start) | Caveated | Local OpenAI-compatible | Appears by default on DGX Spark and DGX Station. Generic Linux NVIDIA GPU hosts require `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls/starts a vLLM container on a supported NVIDIA GPU host. | ## Provider Options The onboard wizard presents the following provider options by default. The first six are always available. Ollama appears when it is installed or running on the host. -Experimental local vLLM appears when NemoClaw detects a running vLLM server. -The managed install/start vLLM entry appears when you opt in and NemoClaw detects a supported NVIDIA GPU host profile. +Local vLLM appears when NemoClaw detects a running vLLM server. +The managed install/start vLLM entry appears by default on DGX Spark and DGX Station, and appears on generic Linux NVIDIA GPU hosts after opt-in. | Option | Description | Curated models | |--------|-------------|----------------| @@ -80,15 +81,16 @@ To use the router in scripted setup, set: $ NEMOCLAW_PROVIDER=routed NVIDIA_API_KEY= nemoclaw onboard --non-interactive ``` -## Experimental Options +## Caveated Local Options -The following local inference options are experimental. -Local NIM and managed vLLM install/start require `NEMOCLAW_EXPERIMENTAL=1`; an already-running vLLM server appears directly in the onboarding selection list. +The following local inference options are caveated. +Local NIM and generic Linux managed vLLM install/start require `NEMOCLAW_EXPERIMENTAL=1`; DGX Spark and DGX Station managed vLLM entries appear by default. +An already-running vLLM server appears directly in the onboarding selection list. | Option | Condition | Notes | |--------|-----------|-------| | Local NVIDIA NIM | NIM-capable GPU detected | Pulls and manages a NIM container. | -| Local vLLM | vLLM running on `localhost:8000`, or a supported DGX Spark, DGX Station, or Linux NVIDIA GPU profile | Auto-detects the loaded model when vLLM is already running. Can install or start a managed vLLM container for supported profiles after experimental opt-in. | +| Local vLLM | vLLM running on `localhost:8000`, or a supported DGX Spark, DGX Station, or Linux NVIDIA GPU profile | Auto-detects the loaded model when vLLM is already running. Can install or start a managed vLLM container by default on DGX Spark/Station and after opt-in on generic Linux NVIDIA GPU hosts. | For setup instructions, refer to Use a Local Inference Server (use the `nemoclaw-user-configure-inference` skill). diff --git a/.agents/skills/nemoclaw-user-configure-security/references/best-practices.md b/.agents/skills/nemoclaw-user-configure-security/references/best-practices.md index 643d2af012..3f07323ef1 100644 --- a/.agents/skills/nemoclaw-user-configure-security/references/best-practices.md +++ b/.agents/skills/nemoclaw-user-configure-security/references/best-practices.md @@ -438,13 +438,13 @@ Different inference providers have different trust and cost profiles. ### Experimental Providers -The `NEMOCLAW_EXPERIMENTAL=1` environment variable gates local NVIDIA NIM and local vLLM. +The `NEMOCLAW_EXPERIMENTAL=1` environment variable gates local NVIDIA NIM and generic Linux managed vLLM install/start. DGX Spark and DGX Station managed vLLM entries are offered by default, and an already-running vLLM server on `localhost:8000` is offered in the menu without a flag, because selecting either is an explicit user action. | Aspect | Detail | |---|---| -| Default | Disabled. The onboarding wizard does not show these providers. | -| What you can change | Set `NEMOCLAW_EXPERIMENTAL=1` before running `nemoclaw onboard`. | -| Risk if relaxed | NemoClaw has not fully validated these providers. NIM requires a NIM-capable GPU. vLLM must already be running on `localhost:8000`. Misconfiguration can cause failed inference or unexpected behavior. | +| Default | Local NVIDIA NIM and generic Linux managed vLLM install/start are hidden. DGX Spark and DGX Station managed vLLM entries, plus already-running vLLM on `localhost:8000`, are offered when detected. | +| What you can change | Set `NEMOCLAW_EXPERIMENTAL=1` before running `nemoclaw onboard` to surface Local NIM and generic Linux managed vLLM. To request only the managed vLLM path non-interactively, set `NEMOCLAW_PROVIDER=install-vllm`. | +| Risk if selected | NemoClaw has not fully validated these providers. NIM requires a NIM-capable GPU. The managed vLLM path pulls a container image and starts it on a supported NVIDIA GPU host. Misconfiguration can cause failed inference or unexpected behavior. | | Recommendation | Use experimental providers only for evaluation. Do not rely on them for always-on assistants. | ## Posture Profiles diff --git a/.agents/skills/nemoclaw-user-get-started/SKILL.md b/.agents/skills/nemoclaw-user-get-started/SKILL.md index ebc7fc6b13..ba64594645 100644 --- a/.agents/skills/nemoclaw-user-get-started/SKILL.md +++ b/.agents/skills/nemoclaw-user-get-started/SKILL.md @@ -218,12 +218,13 @@ $ NEMOCLAW_PROVIDER=routed NVIDIA_API_KEY= nemoclaw onboard --non-inte The router listens on the host at port `4000`. The sandbox still calls `https://inference.local/v1`, so do not point in-sandbox tools at the host router port directly. -**Experimental: Local NIM and Local vLLM:** +**Local NIM and Local vLLM:** -These options appear when `NEMOCLAW_EXPERIMENTAL=1` is set and the prerequisites are met. +Local NVIDIA NIM and generic Linux managed vLLM require `NEMOCLAW_EXPERIMENTAL=1`. +DGX Spark and DGX Station managed vLLM entries, plus already-running vLLM on `localhost:8000`, appear when detected. -- **Local NVIDIA NIM** requires a NIM-capable GPU. NemoClaw pulls and manages a NIM container. -- **Local vLLM** uses a vLLM server already running on `localhost:8000`, or installs and starts a managed vLLM container on supported DGX Spark, DGX Station, and Linux NVIDIA GPU hosts. NemoClaw auto-detects the loaded model. +- **Local NVIDIA NIM** requires a NIM-capable GPU and `NEMOCLAW_EXPERIMENTAL=1`. NemoClaw pulls and manages a NIM container. +- **Local vLLM** uses a vLLM server already running on `localhost:8000`, or installs and starts a managed vLLM container on supported DGX Spark, DGX Station, and Linux NVIDIA GPU hosts. Generic Linux managed install/start requires `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw auto-detects the loaded model. For setup, refer to Use a Local Inference Server (use the `nemoclaw-user-configure-inference` skill). diff --git a/.agents/skills/nemoclaw-user-overview/references/release-notes.md b/.agents/skills/nemoclaw-user-overview/references/release-notes.md index 6e8a44cb10..ac53ea6b0d 100644 --- a/.agents/skills/nemoclaw-user-overview/references/release-notes.md +++ b/.agents/skills/nemoclaw-user-overview/references/release-notes.md @@ -68,7 +68,7 @@ NemoClaw v0.0.40 improves onboarding reliability, local inference setup, and san - The Docker-driver gateway startup check waits for the gateway port to accept TCP connections before it reports the gateway as healthy, and startup failures now include child process exit details. - Local Ollama setup requires the authenticated reverse proxy token on every native Ollama API route, including `GET /api/tags`. - The Linux Ollama install path preflights `zstd` before running the official installer and explains why each sudo-backed setup step needs elevated privileges. -- The onboarding provider menu offers an already-running local vLLM server directly when `localhost:8000` responds, while managed vLLM install and start options remain behind the experimental opt-in. +- The onboarding provider menu offers an already-running local vLLM server directly when `localhost:8000` responds. Managed vLLM install and start options now appear by default on DGX Spark and DGX Station, while generic Linux NVIDIA GPU hosts remain behind the experimental opt-in. - Policy tier defaults are filtered by active agent support, so presets such as Brave Search are not reapplied to agents that do not support that integration. - `nemoclaw connect` checks dashboard forward reachability with a TCP probe before it reports a forward as stale. - Sandbox startup captures a known-good OpenClaw config baseline and restores it on restart if `/sandbox/.openclaw/openclaw.json` becomes empty. diff --git a/ci/platform-matrix.json b/ci/platform-matrix.json index c47470e7a9..6d9c78e577 100644 --- a/ci/platform-matrix.json +++ b/ci/platform-matrix.json @@ -141,9 +141,9 @@ }, { "name": "Local vLLM (managed install/start)", - "status": "experimental", + "status": "caveated", "endpoint_type": "Local OpenAI-compatible", - "notes": "Requires `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls/starts a vLLM container on a supported NVIDIA GPU host." + "notes": "Appears by default on DGX Spark and DGX Station. Generic Linux NVIDIA GPU hosts require `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls/starts a vLLM container on a supported NVIDIA GPU host." } ] } diff --git a/docs/about/release-notes.md b/docs/about/release-notes.md index 8a0818abdc..fa54edb88b 100644 --- a/docs/about/release-notes.md +++ b/docs/about/release-notes.md @@ -88,7 +88,7 @@ NemoClaw v0.0.40 improves onboarding reliability, local inference setup, and san - The Docker-driver gateway startup check waits for the gateway port to accept TCP connections before it reports the gateway as healthy, and startup failures now include child process exit details. - Local Ollama setup requires the authenticated reverse proxy token on every native Ollama API route, including `GET /api/tags`. - The Linux Ollama install path preflights `zstd` before running the official installer and explains why each sudo-backed setup step needs elevated privileges. -- The onboarding provider menu offers an already-running local vLLM server directly when `localhost:8000` responds, while managed vLLM install and start options remain behind the experimental opt-in. +- The onboarding provider menu offers an already-running local vLLM server directly when `localhost:8000` responds. Managed vLLM install and start options now appear by default on DGX Spark and DGX Station, while generic Linux NVIDIA GPU hosts remain behind the experimental opt-in. - Policy tier defaults are filtered by active agent support, so presets such as Brave Search are not reapplied to agents that do not support that integration. - `nemoclaw connect` checks dashboard forward reachability with a TCP probe before it reports a forward as stale. - Sandbox startup captures a known-good OpenClaw config baseline and restores it on restart if `/sandbox/.openclaw/openclaw.json` becomes empty. diff --git a/docs/about/release-notes.mdx b/docs/about/release-notes.mdx index 2269c73a26..4cf9ae8108 100644 --- a/docs/about/release-notes.mdx +++ b/docs/about/release-notes.mdx @@ -75,7 +75,7 @@ NemoClaw v0.0.40 improves onboarding reliability, local inference setup, and san - The Docker-driver gateway startup check waits for the gateway port to accept TCP connections before it reports the gateway as healthy, and startup failures now include child process exit details. - Local Ollama setup requires the authenticated reverse proxy token on every native Ollama API route, including `GET /api/tags`. - The Linux Ollama install path preflights `zstd` before running the official installer and explains why each sudo-backed setup step needs elevated privileges. -- The onboarding provider menu offers an already-running local vLLM server directly when `localhost:8000` responds, while managed vLLM install and start options remain behind the experimental opt-in. +- The onboarding provider menu offers an already-running local vLLM server directly when `localhost:8000` responds. Managed vLLM install and start options now appear by default on DGX Spark and DGX Station, while generic Linux NVIDIA GPU hosts remain behind the experimental opt-in. - Policy tier defaults are filtered by active agent support, so presets such as Brave Search are not reapplied to agents that do not support that integration. - `nemoclaw connect` checks dashboard forward reachability with a TCP probe before it reports a forward as stale. - Sandbox startup captures a known-good OpenClaw config baseline and restores it on restart if `/sandbox/.openclaw/openclaw.json` becomes empty. diff --git a/docs/get-started/quickstart.md b/docs/get-started/quickstart.md index bb91af7153..fa5c3acfb4 100644 --- a/docs/get-started/quickstart.md +++ b/docs/get-started/quickstart.md @@ -253,12 +253,12 @@ The sandbox still calls `https://inference.local/v1`, so do not point in-sandbox ::: -:::{dropdown} Experimental: Local NIM and Local vLLM +:::{dropdown} Local NIM and Local vLLM :icon: beaker - **Local NVIDIA NIM** appears when `NEMOCLAW_EXPERIMENTAL=1` is set and the host has a NIM-capable GPU. NemoClaw pulls and manages a NIM container. - **Local vLLM (already running)** appears whenever NemoClaw detects a vLLM server on `localhost:8000`. No flag is required for the menu entry. NemoClaw auto-detects the loaded model. -- **Local vLLM (managed install/start)** requires `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls and starts a vLLM container on supported DGX Spark, DGX Station, and Linux NVIDIA GPU hosts. +- **Local vLLM (managed install/start)** appears by default on DGX Spark and DGX Station. Generic Linux NVIDIA GPU hosts require `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls and starts a vLLM container on supported hosts. For setup, refer to [Use a Local Inference Server](../inference/use-local-inference.md). ::: diff --git a/docs/get-started/quickstart.mdx b/docs/get-started/quickstart.mdx index 3d7171ff03..0e4441267a 100644 --- a/docs/get-started/quickstart.mdx +++ b/docs/get-started/quickstart.mdx @@ -232,11 +232,11 @@ The sandbox still calls `https://inference.local/v1`, so do not point in-sandbox - + - **Local NVIDIA NIM** appears when `NEMOCLAW_EXPERIMENTAL=1` is set and the host has a NIM-capable GPU. NemoClaw pulls and manages a NIM container. - **Local vLLM (already running)** appears whenever NemoClaw detects a vLLM server on `localhost:8000`. No flag is required for the menu entry. NemoClaw auto-detects the loaded model. -- **Local vLLM (managed install/start)** requires `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls and starts a vLLM container on supported DGX Spark, DGX Station, and Linux NVIDIA GPU hosts. +- **Local vLLM (managed install/start)** appears by default on DGX Spark and DGX Station. Generic Linux NVIDIA GPU hosts require `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls and starts a vLLM container on supported hosts. For setup, refer to [Use a Local Inference Server](/inference/use-local-inference). diff --git a/docs/inference/inference-options.md b/docs/inference/inference-options.md index 26816f471a..497a4116cf 100644 --- a/docs/inference/inference-options.md +++ b/docs/inference/inference-options.md @@ -52,7 +52,7 @@ NemoClaw uses provider-specific local tokens for those routes, and rebuilds of l | Local Ollama | Caveated | Local Ollama API | Available when Ollama is installed or running on the host | | Local NVIDIA NIM | Experimental | Local OpenAI-compatible | Requires `NEMOCLAW_EXPERIMENTAL=1` and a NIM-capable GPU | | Local vLLM (already running) | Caveated | Local OpenAI-compatible | Appears in the onboarding menu when NemoClaw detects a server already on `localhost:8000`. No flag required. | -| Local vLLM (managed install/start) | Experimental | Local OpenAI-compatible | Requires `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls/starts a vLLM container on a supported NVIDIA GPU host. | +| Local vLLM (managed install/start) | Caveated | Local OpenAI-compatible | Appears by default on DGX Spark and DGX Station. Generic Linux NVIDIA GPU hosts require `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls/starts a vLLM container on a supported NVIDIA GPU host. | ## Provider Options @@ -60,8 +60,8 @@ NemoClaw uses provider-specific local tokens for those routes, and rebuilds of l The onboard wizard presents the following provider options by default. The first six are always available. Ollama appears when it is installed or running on the host. -Experimental local vLLM appears when NemoClaw detects a running vLLM server. -The managed install/start vLLM entry appears when you opt in and NemoClaw detects a supported NVIDIA GPU host profile. +Local vLLM appears when NemoClaw detects a running vLLM server. +The managed install/start vLLM entry appears by default on DGX Spark and DGX Station, and appears on generic Linux NVIDIA GPU hosts after opt-in. | Option | Description | Curated models | |--------|-------------|----------------| @@ -125,15 +125,16 @@ NemoClaw probes only that interpreter and aborts with the failure reason if it d Relative command names such as `python3.12` are rejected; use `command -v python3.12` to find the absolute path. If `python -m venv` itself fails for a probe-clean interpreter (for example, a corrupt ensurepip seed), NemoClaw retries with the next healthy candidate when no pin is set; with a pin set, the failure stops onboarding so you can fix or repoint the pinned python. -## Experimental Options +## Caveated Local Options -The following local inference options are experimental. -Local NIM and managed vLLM install/start require `NEMOCLAW_EXPERIMENTAL=1`; an already-running vLLM server appears directly in the onboarding selection list. +The following local inference options are caveated. +Local NIM and generic Linux managed vLLM install/start require `NEMOCLAW_EXPERIMENTAL=1`; DGX Spark and DGX Station managed vLLM entries appear by default. +An already-running vLLM server appears directly in the onboarding selection list. | Option | Condition | Notes | |--------|-----------|-------| | Local NVIDIA NIM | NIM-capable GPU detected | Pulls and manages a NIM container. | -| Local vLLM | vLLM running on `localhost:8000`, or a supported DGX Spark, DGX Station, or Linux NVIDIA GPU profile | Auto-detects the loaded model when vLLM is already running. Can install or start a managed vLLM container for supported profiles after experimental opt-in. | +| Local vLLM | vLLM running on `localhost:8000`, or a supported DGX Spark, DGX Station, or Linux NVIDIA GPU profile | Auto-detects the loaded model when vLLM is already running. Can install or start a managed vLLM container by default on DGX Spark/Station and after opt-in on generic Linux NVIDIA GPU hosts. | For setup instructions, refer to [Use a Local Inference Server](use-local-inference.md). diff --git a/docs/inference/inference-options.mdx b/docs/inference/inference-options.mdx index 65a0256fe2..9fa6aea8eb 100644 --- a/docs/inference/inference-options.mdx +++ b/docs/inference/inference-options.mdx @@ -39,7 +39,7 @@ NemoClaw uses provider-specific local tokens for those routes, and rebuilds of l | Local Ollama | Caveated | Local Ollama API | Available when Ollama is installed or running on the host | | Local NVIDIA NIM | Experimental | Local OpenAI-compatible | Requires `NEMOCLAW_EXPERIMENTAL=1` and a NIM-capable GPU | | Local vLLM (already running) | Caveated | Local OpenAI-compatible | Appears in the onboarding menu when NemoClaw detects a server already on `localhost:8000`. No flag required. | -| Local vLLM (managed install/start) | Experimental | Local OpenAI-compatible | Requires `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls/starts a vLLM container on a supported NVIDIA GPU host. | +| Local vLLM (managed install/start) | Caveated | Local OpenAI-compatible | Appears by default on DGX Spark and DGX Station. Generic Linux NVIDIA GPU hosts require `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm`. NemoClaw pulls/starts a vLLM container on a supported NVIDIA GPU host. | {/* provider-status:end */} ## Provider Options @@ -47,8 +47,8 @@ NemoClaw uses provider-specific local tokens for those routes, and rebuilds of l The onboard wizard presents the following provider options by default. The first six are always available. Ollama appears when it is installed or running on the host. -Experimental local vLLM appears when NemoClaw detects a running vLLM server. -The managed install/start vLLM entry appears when you opt in and NemoClaw detects a supported NVIDIA GPU host profile. +Local vLLM appears when NemoClaw detects a running vLLM server. +The managed install/start vLLM entry appears by default on DGX Spark and DGX Station, and appears on generic Linux NVIDIA GPU hosts after opt-in. | Option | Description | Curated models | |--------|-------------|----------------| @@ -112,15 +112,16 @@ NemoClaw probes only that interpreter and aborts with the failure reason if it d Relative command names such as `python3.12` are rejected; use `command -v python3.12` to find the absolute path. If `python -m venv` itself fails for a probe-clean interpreter (for example, a corrupt ensurepip seed), NemoClaw retries with the next healthy candidate when no pin is set; with a pin set, the failure stops onboarding so you can fix or repoint the pinned python. -## Experimental Options +## Caveated Local Options -The following local inference options are experimental. -Local NIM and managed vLLM install/start require `NEMOCLAW_EXPERIMENTAL=1`; an already-running vLLM server appears directly in the onboarding selection list. +The following local inference options are caveated. +Local NIM and generic Linux managed vLLM install/start require `NEMOCLAW_EXPERIMENTAL=1`; DGX Spark and DGX Station managed vLLM entries appear by default. +An already-running vLLM server appears directly in the onboarding selection list. | Option | Condition | Notes | |--------|-----------|-------| | Local NVIDIA NIM | NIM-capable GPU detected | Pulls and manages a NIM container. | -| Local vLLM | vLLM running on `localhost:8000`, or a supported DGX Spark, DGX Station, or Linux NVIDIA GPU profile | Auto-detects the loaded model when vLLM is already running. Can install or start a managed vLLM container for supported profiles after experimental opt-in. | +| Local vLLM | vLLM running on `localhost:8000`, or a supported DGX Spark, DGX Station, or Linux NVIDIA GPU profile | Auto-detects the loaded model when vLLM is already running. Can install or start a managed vLLM container by default on DGX Spark/Station and after opt-in on generic Linux NVIDIA GPU hosts. | For setup instructions, refer to [Use a Local Inference Server](/inference/use-local-inference). diff --git a/docs/inference/use-local-inference.md b/docs/inference/use-local-inference.md index 6d9bd822d5..16ebf208cc 100644 --- a/docs/inference/use-local-inference.md +++ b/docs/inference/use-local-inference.md @@ -256,7 +256,7 @@ $ NEMOCLAW_PROVIDER=anthropicCompatible \ nemoclaw onboard --non-interactive ``` -## vLLM (Experimental) +## vLLM When vLLM is already running on `localhost:8000`, NemoClaw can detect it automatically and query the `/v1/models` endpoint to determine the loaded model. On supported Linux hosts with NVIDIA GPUs, the onboard wizard can also install or start a managed vLLM container for you. @@ -268,7 +268,8 @@ $ nemoclaw onboard ``` If vLLM is already running, NemoClaw detects the running model and validates the endpoint. -If vLLM is not running and your host matches a managed profile, set `NEMOCLAW_EXPERIMENTAL=1`, rerun `nemoclaw onboard`, and select the **Install vLLM** or **Start vLLM** entry. +If vLLM is not running and your host matches a DGX Spark or DGX Station managed profile, NemoClaw shows the **Install vLLM** or **Start vLLM** entry by default. +Generic Linux NVIDIA GPU hosts still require `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm` before the managed entry appears. NemoClaw pulls the vLLM image, downloads model weights into `~/.cache/huggingface`, starts the `nemoclaw-vllm` container on `localhost:8000`, and prints progress markers while the model loads. The first run can take 10 to 30 minutes. Later runs reuse the cached image and model weights. @@ -295,11 +296,11 @@ $ NEMOCLAW_PROVIDER=vllm \ nemoclaw onboard --non-interactive ``` -Install or start managed vLLM when a supported profile is detected: +Install or start managed vLLM when a supported profile is detected. +On DGX Spark and DGX Station, `NEMOCLAW_PROVIDER=install-vllm` is enough for non-interactive runs; add `NEMOCLAW_EXPERIMENTAL=1` on generic Linux NVIDIA GPU hosts. ```console -$ NEMOCLAW_EXPERIMENTAL=1 \ - NEMOCLAW_PROVIDER=install-vllm \ +$ NEMOCLAW_PROVIDER=install-vllm \ nemoclaw onboard --non-interactive ``` @@ -326,8 +327,7 @@ Gated models require a Hugging Face token; export it before onboarding so NemoCl ```console $ export HF_TOKEN= -$ NEMOCLAW_EXPERIMENTAL=1 \ - NEMOCLAW_PROVIDER=install-vllm \ +$ NEMOCLAW_PROVIDER=install-vllm \ NEMOCLAW_VLLM_MODEL=deepseek-r1-distill-70b \ nemoclaw onboard --non-interactive ``` diff --git a/docs/inference/use-local-inference.mdx b/docs/inference/use-local-inference.mdx index b27c3eae2b..30387a9006 100644 --- a/docs/inference/use-local-inference.mdx +++ b/docs/inference/use-local-inference.mdx @@ -242,7 +242,7 @@ $ NEMOCLAW_PROVIDER=anthropicCompatible \ nemoclaw onboard --non-interactive ``` -## vLLM (Experimental) +## vLLM When vLLM is already running on `localhost:8000`, NemoClaw can detect it automatically and query the `/v1/models` endpoint to determine the loaded model. On supported Linux hosts with NVIDIA GPUs, the onboard wizard can also install or start a managed vLLM container for you. @@ -254,7 +254,8 @@ $ nemoclaw onboard ``` If vLLM is already running, NemoClaw detects the running model and validates the endpoint. -If vLLM is not running and your host matches a managed profile, set `NEMOCLAW_EXPERIMENTAL=1`, rerun `nemoclaw onboard`, and select the **Install vLLM** or **Start vLLM** entry. +If vLLM is not running and your host matches a DGX Spark or DGX Station managed profile, NemoClaw shows the **Install vLLM** or **Start vLLM** entry by default. +Generic Linux NVIDIA GPU hosts still require `NEMOCLAW_EXPERIMENTAL=1` or `NEMOCLAW_PROVIDER=install-vllm` before the managed entry appears. NemoClaw pulls the vLLM image, downloads model weights into `~/.cache/huggingface`, starts the `nemoclaw-vllm` container on `localhost:8000`, and prints progress markers while the model loads. The first run can take 10 to 30 minutes. Later runs reuse the cached image and model weights. @@ -281,11 +282,11 @@ $ NEMOCLAW_PROVIDER=vllm \ nemoclaw onboard --non-interactive ``` -Install or start managed vLLM when a supported profile is detected: +Install or start managed vLLM when a supported profile is detected. +On DGX Spark and DGX Station, `NEMOCLAW_PROVIDER=install-vllm` is enough for non-interactive runs; add `NEMOCLAW_EXPERIMENTAL=1` on generic Linux NVIDIA GPU hosts. ```console -$ NEMOCLAW_EXPERIMENTAL=1 \ - NEMOCLAW_PROVIDER=install-vllm \ +$ NEMOCLAW_PROVIDER=install-vllm \ nemoclaw onboard --non-interactive ``` @@ -312,8 +313,7 @@ Gated models require a Hugging Face token; export it before onboarding so NemoCl ```console $ export HF_TOKEN= -$ NEMOCLAW_EXPERIMENTAL=1 \ - NEMOCLAW_PROVIDER=install-vllm \ +$ NEMOCLAW_PROVIDER=install-vllm \ NEMOCLAW_VLLM_MODEL=deepseek-r1-distill-70b \ nemoclaw onboard --non-interactive ``` diff --git a/docs/security/best-practices.md b/docs/security/best-practices.md index bd5eb9dd78..d8a9e6f2c5 100644 --- a/docs/security/best-practices.md +++ b/docs/security/best-practices.md @@ -498,13 +498,13 @@ Different inference providers have different trust and cost profiles. ### Experimental Providers -The `NEMOCLAW_EXPERIMENTAL=1` environment variable gates local NVIDIA NIM and the managed vLLM install/start path. An already-running vLLM server on `localhost:8000` is offered in the menu without a flag, because selecting it is an explicit user action. +The `NEMOCLAW_EXPERIMENTAL=1` environment variable gates local NVIDIA NIM and generic Linux managed vLLM install/start. DGX Spark and DGX Station managed vLLM entries are offered by default, and an already-running vLLM server on `localhost:8000` is offered in the menu without a flag, because selecting either is an explicit user action. | Aspect | Detail | |---|---| -| Default | Local NVIDIA NIM and managed vLLM install/start are hidden. Already-running vLLM on `localhost:8000` is offered when detected. | -| What you can change | Set `NEMOCLAW_EXPERIMENTAL=1` before running `nemoclaw onboard` to surface Local NIM and managed vLLM. To request only the managed vLLM path non-interactively, set `NEMOCLAW_PROVIDER=install-vllm`. | -| Risk if relaxed | NemoClaw has not fully validated these providers. NIM requires a NIM-capable GPU. The managed vLLM path pulls a container image and starts it on a supported NVIDIA GPU host. Misconfiguration can cause failed inference or unexpected behavior. | +| Default | Local NVIDIA NIM and generic Linux managed vLLM install/start are hidden. DGX Spark and DGX Station managed vLLM entries, plus already-running vLLM on `localhost:8000`, are offered when detected. | +| What you can change | Set `NEMOCLAW_EXPERIMENTAL=1` before running `nemoclaw onboard` to surface Local NIM and generic Linux managed vLLM. To request only the managed vLLM path non-interactively, set `NEMOCLAW_PROVIDER=install-vllm`. | +| Risk if selected | NemoClaw has not fully validated these providers. NIM requires a NIM-capable GPU. The managed vLLM path pulls a container image and starts it on a supported NVIDIA GPU host. Misconfiguration can cause failed inference or unexpected behavior. | | Recommendation | Use experimental providers only for evaluation. Do not rely on them for always-on assistants. | ## Posture Profiles diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 208c806e42..ec087a2159 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -461,13 +461,13 @@ Different inference providers have different trust and cost profiles. ### Experimental Providers -The `NEMOCLAW_EXPERIMENTAL=1` environment variable gates local NVIDIA NIM and the managed vLLM install/start path. An already-running vLLM server on `localhost:8000` is offered in the menu without a flag, because selecting it is an explicit user action. +The `NEMOCLAW_EXPERIMENTAL=1` environment variable gates local NVIDIA NIM and generic Linux managed vLLM install/start. DGX Spark and DGX Station managed vLLM entries are offered by default, and an already-running vLLM server on `localhost:8000` is offered in the menu without a flag, because selecting either is an explicit user action. | Aspect | Detail | |---|---| -| Default | Local NVIDIA NIM and managed vLLM install/start are hidden. Already-running vLLM on `localhost:8000` is offered when detected. | -| What you can change | Set `NEMOCLAW_EXPERIMENTAL=1` before running `nemoclaw onboard` to surface Local NIM and managed vLLM. To request only the managed vLLM path non-interactively, set `NEMOCLAW_PROVIDER=install-vllm`. | -| Risk if relaxed | NemoClaw has not fully validated these providers. NIM requires a NIM-capable GPU. The managed vLLM path pulls a container image and starts it on a supported NVIDIA GPU host. Misconfiguration can cause failed inference or unexpected behavior. | +| Default | Local NVIDIA NIM and generic Linux managed vLLM install/start are hidden. DGX Spark and DGX Station managed vLLM entries, plus already-running vLLM on `localhost:8000`, are offered when detected. | +| What you can change | Set `NEMOCLAW_EXPERIMENTAL=1` before running `nemoclaw onboard` to surface Local NIM and generic Linux managed vLLM. To request only the managed vLLM path non-interactively, set `NEMOCLAW_PROVIDER=install-vllm`. | +| Risk if selected | NemoClaw has not fully validated these providers. NIM requires a NIM-capable GPU. The managed vLLM path pulls a container image and starts it on a supported NVIDIA GPU host. Misconfiguration can cause failed inference or unexpected behavior. | | Recommendation | Use experimental providers only for evaluation. Do not rely on them for always-on assistants. | ## Posture Profiles