diff --git a/README.md b/README.md
index f9cdc39..c1826e7 100644
--- a/README.md
+++ b/README.md
@@ -148,6 +148,17 @@ benchloop run --model your-model --provider openai_compat --endpoint http://your
 
 The CLI flag takes precedence over the env var. For Ollama and local providers without auth, neither is needed.
 
+When you use multiple OpenAI-compatible endpoints, set per-endpoint keys with
+`BENCHLOOP_OPENAI_KEYS`. Entries are comma-separated `endpoint=key` pairs and
+the endpoint must match the base URL without a trailing slash:
+
+```bash
+export BENCHLOOP_OPENAI_KEYS="http://127.0.0.1:8000=sk-local,https://openrouter.ai/api=sk-or-..."
+```
+
+BenchLoop uses the matching endpoint-specific key first, then falls back to
+`OPENAI_API_KEY`.
+
 ### Launch the local dashboard
 
 v0.2.0+ ships the full FastAPI + React dashboard inside the wheel. After `pipx install benchloop-cli`:
diff --git a/bench_loop/dashboard/api/routes/benchmark.py b/bench_loop/dashboard/api/routes/benchmark.py
index 28385db..95c270e 100644
--- a/bench_loop/dashboard/api/routes/benchmark.py
+++ b/bench_loop/dashboard/api/routes/benchmark.py
@@ -352,7 +352,9 @@ async def get_run(run_id: str):
     """Get detailed run result."""
     # Check active runs first
     if run_id in _active_runs:
-        return _active_runs[run_id]
+        state = dict(_active_runs[run_id])
+        state.pop("task", None)
+        return state
 
     # Check disk
     run_dir = RUNS_DIR / run_id
diff --git a/bench_loop/dashboard/api/routes/chat.py b/bench_loop/dashboard/api/routes/chat.py
index cb8645e..afa53dd 100644
--- a/bench_loop/dashboard/api/routes/chat.py
+++ b/bench_loop/dashboard/api/routes/chat.py
@@ -8,6 +8,8 @@
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel
 
+from bench_loop.providers.openai_compat import _auth_headers as _openai_headers
+
 router = APIRouter()
 
 
@@ -61,6 +63,7 @@ async def chat_generate(req: ChatRequest):
                 resp = await client.post(
                     f"{req.endpoint.rstrip('/')}/v1/chat/completions",
                     json=payload,
+                    headers=_openai_headers(req.endpoint),
                 )
                 resp.raise_for_status()
                 data = resp.json()
diff --git a/bench_loop/dashboard/api/routes/models.py b/bench_loop/dashboard/api/routes/models.py
index ab32081..e09a6d2 100644
--- a/bench_loop/dashboard/api/routes/models.py
+++ b/bench_loop/dashboard/api/routes/models.py
@@ -11,6 +11,8 @@
 from pydantic import BaseModel
 import httpx
 
+from bench_loop.providers.openai_compat import _auth_headers as _openai_headers
+
 try:
     from sse_starlette.sse import EventSourceResponse
 except Exception:  # pragma: no cover
@@ -195,7 +197,7 @@ async def _fetch_ollama_models(endpoint: str) -> list[dict]:
 async def _fetch_openai_models(endpoint: str) -> list[dict]:
     """Fetch models from an OpenAI-compatible endpoint."""
     async with httpx.AsyncClient(timeout=5) as client:
-        resp = await client.get(f"{endpoint.rstrip('/')}/v1/models")
+        resp = await client.get(f"{endpoint.rstrip('/')}/v1/models", headers=_openai_headers(endpoint))
         resp.raise_for_status()
     raw = resp.json().get("data", [])
     return [
@@ -232,7 +234,19 @@ async def _probe_provider(provider: dict) -> dict | None:
         return None
 
 
-_OPENAI_HINT_PORTS = {1234, 1337, 5001, 8000, 8080, 8081}
+_OPENAI_HINT_PORTS = {1234, 1337, 5001, 8000, 8080, 8081, 8088, 10531, 11451}
+_OPENAI_HINT_HOSTS = {"api.openai.com", "openrouter.ai"}
+
+
+def _is_openai_compat_endpoint(endpoint: str) -> bool:
+    from urllib.parse import urlparse
+
+    try:
+        parsed = urlparse(endpoint if "://" in endpoint else f"http://{endpoint}")
+    except Exception:
+        return False
+    hostname = (parsed.hostname or "").lower()
+    return parsed.port in _OPENAI_HINT_PORTS or hostname in _OPENAI_HINT_HOSTS
 
 
 @router.get("/models")
@@ -240,12 +254,7 @@ async def list_models(endpoint: str = Query(default="")):
     """List models. If endpoint specified, query that. Otherwise auto-detect local providers."""
     if endpoint:
         # If the port is well-known for OpenAI-compatible servers, try that first.
-        from urllib.parse import urlparse
-        try:
-            port = urlparse(endpoint).port
-        except Exception:
-            port = None
-        order: list[str] = ["openai", "ollama"] if port in _OPENAI_HINT_PORTS else ["ollama", "openai"]
+        order: list[str] = ["openai", "ollama"] if _is_openai_compat_endpoint(endpoint) else ["ollama", "openai"]
         last_error: str | None = None
         for kind in order:
             try:
@@ -307,39 +316,68 @@ async def preflight_model(
     """
     ep = endpoint.rstrip("/")
 
-    # Version check against our support table (fast path, no model load needed)
-    version = await _fetch_ollama_version(ep)
-    installed_tuple = _version_tuple(version) if version else (0,)
-    # We don't have the details dict for an arbitrary model name — do a best
-    # effort classification from the name alone.
-    support = _classify_model_support(model, {"family": "", "quantization_level": ""})
-    required = support["required_version"]
-    if required and installed_tuple < _version_tuple(required):
+    is_openai_compat = _is_openai_compat_endpoint(ep)
+
+    # Ollama version checks only apply to Ollama endpoints. OpenAI-compatible
+    # servers such as llama.cpp/vLLM do not expose /api/version and should not
+    # be blocked by Ollama architecture/version rules.
+    version = None
+    if not is_openai_compat:
+        version = await _fetch_ollama_version(ep)
+        installed_tuple = _version_tuple(version) if version else (0,)
+        # We don't have the details dict for an arbitrary model name — do a best
+        # effort classification from the name alone.
+        support = _classify_model_support(model, {"family": "", "quantization_level": ""})
+        required = support["required_version"]
+        if required and installed_tuple < _version_tuple(required):
+            return {
+                "ok": False,
+                "reason": "version_mismatch",
+                "message": (
+                    f"Model `{model}` needs Ollama {required}+ ({support['reason']}). "
+                    f"Installed: {version or 'unknown'}. Upgrade with "
+                    f"`brew upgrade ollama` then restart `ollama serve`."
+                ),
+                "required_version": required,
+                "provider_version": version,
+                "raw": None,
+            }
+
+    # Actual load test: minimum-cost chat round-trip
+    try:
+        async with httpx.AsyncClient(timeout=15) as client:
+            if is_openai_compat:
+                resp = await client.post(
+                    f"{ep}/v1/chat/completions",
+                    json={
+                        "model": model,
+                        "messages": [{"role": "user", "content": "ok"}],
+                        "stream": False,
+                        "max_tokens": 16,
+                    },
+                    headers=_openai_headers(ep),
+                )
+            else:
+                resp = await client.post(
+                    f"{ep}/api/chat",
+                    json={
+                        "model": model,
+                        "messages": [{"role": "user", "content": "ok"}],
+                        "stream": False,
+                        "options": {"num_predict": 1},
+                    },
+                )
+    except httpx.ReadTimeout as exc:
         return {
-            "ok": False,
-            "reason": "version_mismatch",
+            "ok": True,
+            "reason": "load_timeout",
             "message": (
-                f"Model `{model}` needs Ollama {required}+ ({support['reason']}). "
-                f"Installed: {version or 'unknown'}. Upgrade with "
-                f"`brew upgrade ollama` then restart `ollama serve`."
+                f"Model `{model}` did not answer within the quick preflight window. "
+                "Continuing with the benchmark so large models can finish loading normally."
             ),
-            "required_version": required,
+            "raw": str(exc),
             "provider_version": version,
-            "raw": None,
         }
-
-    # Actual load test: minimum-cost chat round-trip
-    try:
-        async with httpx.AsyncClient(timeout=15) as client:
-            resp = await client.post(
-                f"{ep}/api/chat",
-                json={
-                    "model": model,
-                    "messages": [{"role": "user", "content": "ok"}],
-                    "stream": False,
-                    "options": {"num_predict": 1},
-                },
-            )
     except httpx.HTTPError as exc:
         return {
             "ok": False,
@@ -386,6 +424,14 @@ async def preflight_model(
         }
 
     if "model" in lower and "not found" in lower:
+        if is_openai_compat:
+            return {
+                "ok": False,
+                "reason": "not_found",
+                "message": f"`{model}` is not available at {ep}. Check the model id from `/v1/models`.",
+                "raw": raw,
+                "provider_version": version,
+            }
         return {
             "ok": False,
             "reason": "not_found",
diff --git a/bench_loop/hardware.py b/bench_loop/hardware.py
index 26c05ec..e308e4f 100644
--- a/bench_loop/hardware.py
+++ b/bench_loop/hardware.py
@@ -92,6 +92,16 @@ def _env_hardware_overrides() -> dict[str, object]:
     return overrides
 
 
+def _parse_nvidia_float(value: str) -> float | None:
+    cleaned = value.strip()
+    if not cleaned or cleaned.upper() == "[N/A]":
+        return None
+    try:
+        return float(cleaned)
+    except ValueError:
+        return None
+
+
 def _detect_gpu() -> dict[str, object]:
     overrides = _env_hardware_overrides()
     if overrides.get("gpu") or overrides.get("hardware_label"):
@@ -133,8 +143,9 @@ def _detect_gpu() -> dict[str, object]:
         if len(parts) < 3:
             continue
         name, memory_mb, temp_c = parts[0], parts[1], parts[2]
-        memory_gb = float(memory_mb) / 1024 if memory_mb else 0.0
-        temperature = float(temp_c) if temp_c else None
+        memory_value = _parse_nvidia_float(memory_mb)
+        temperature = _parse_nvidia_float(temp_c)
+        memory_gb = memory_value / 1024 if memory_value is not None else 0.0
         details.append({"name": name, "memory_gb": memory_gb, "temperature_c": temperature})
         names.append(name)
         total_memory_gb += memory_gb
diff --git a/bench_loop/providers/openai_compat.py b/bench_loop/providers/openai_compat.py
index 1fab53c..edab9b6 100644
--- a/bench_loop/providers/openai_compat.py
+++ b/bench_loop/providers/openai_compat.py
@@ -15,9 +15,20 @@
 _STREAM_TIMEOUT = httpx.Timeout(connect=15.0, read=600.0, write=60.0, pool=60.0)
 
 
-def _auth_headers() -> dict[str, str]:
+def _api_key_for_endpoint(endpoint: str) -> str:
+    ep = endpoint.rstrip("/")
+    raw = os.getenv("BENCHLOOP_OPENAI_KEYS", "").strip()
+    if raw:
+        for chunk in raw.split(","):
+            url, sep, key = chunk.partition("=")
+            if sep and url.strip().rstrip("/") == ep:
+                return key.strip()
+    return os.getenv("OPENAI_API_KEY", "").strip()
+
+
+def _auth_headers(endpoint: str) -> dict[str, str]:
     headers = {"Content-Type": "application/json"}
-    api_key = os.getenv("OPENAI_API_KEY", "")
+    api_key = _api_key_for_endpoint(endpoint)
     if api_key:
         headers["Authorization"] = f"Bearer {api_key}"
     return headers
@@ -27,7 +38,7 @@ async def list_models(endpoint: str) -> list[str]:
     base_url = endpoint.rstrip("/")
     try:
         async with httpx.AsyncClient(timeout=_HTTP_TIMEOUT) as client:
-            response = await client.get(f"{base_url}/v1/models", headers=_auth_headers())
+            response = await client.get(f"{base_url}/v1/models", headers=_auth_headers(endpoint))
             response.raise_for_status()
     except Exception:
         return []
@@ -63,7 +74,7 @@ async def chat(endpoint: str, model: str, messages: list[dict[str, Any]], **kwar
             response = await client.post(
                 f"{base_url}/v1/chat/completions",
                 json=payload,
-                headers=_auth_headers(),
+                headers=_auth_headers(endpoint),
             )
             response.raise_for_status()
     except Exception as exc:
@@ -168,7 +179,7 @@ async def chat_streaming(
                 "POST",
                 f"{base_url}/v1/chat/completions",
                 json=payload,
-                headers=_auth_headers(),
+                headers=_auth_headers(endpoint),
             ) as response:
                 response.raise_for_status()
                 async for line in response.aiter_lines():
diff --git a/tests/test_openai_compat_endpoints.py b/tests/test_openai_compat_endpoints.py
new file mode 100644
index 0000000..29bb768
--- /dev/null
+++ b/tests/test_openai_compat_endpoints.py
@@ -0,0 +1,111 @@
+from __future__ import annotations
+
+import pytest
+
+from bench_loop.dashboard.api.routes import chat as chat_routes
+from bench_loop.dashboard.api.routes import models
+from bench_loop.providers import openai_compat
+
+
+def test_openai_endpoint_detection_by_port_and_host():
+    assert models._is_openai_compat_endpoint("http://127.0.0.1:8088")
+    assert models._is_openai_compat_endpoint("http://localhost:11451")
+    assert models._is_openai_compat_endpoint("https://openrouter.ai/api")
+    assert models._is_openai_compat_endpoint("https://api.openai.com/v1")
+    assert not models._is_openai_compat_endpoint("http://localhost:11434")
+
+
+def test_endpoint_specific_openai_key_takes_precedence(monkeypatch):
+    monkeypatch.setenv(
+        "BENCHLOOP_OPENAI_KEYS",
+        "http://127.0.0.1:8000=sk-local,https://openrouter.ai/api=sk-openrouter",
+    )
+    monkeypatch.setenv("OPENAI_API_KEY", "sk-global")
+
+    assert openai_compat._api_key_for_endpoint("http://127.0.0.1:8000/") == "sk-local"
+    assert openai_compat._api_key_for_endpoint("https://openrouter.ai/api") == "sk-openrouter"
+    assert openai_compat._api_key_for_endpoint("http://127.0.0.1:9000") == "sk-global"
+
+
+@pytest.mark.asyncio
+async def test_preflight_uses_openai_chat_completions_for_openai_endpoint(monkeypatch):
+    calls: list[dict] = []
+
+    async def fail_ollama_version(endpoint: str):  # pragma: no cover - should not be called
+        raise AssertionError("OpenAI-compatible preflight must not check Ollama version")
+
+    class Response:
+        status_code = 200
+        text = ""
+
+    class Client:
+        def __init__(self, timeout):
+            self.timeout = timeout
+
+        async def __aenter__(self):
+            return self
+
+        async def __aexit__(self, exc_type, exc, tb):
+            return False
+
+        async def post(self, url, json=None, headers=None):
+            calls.append({"url": url, "json": json, "headers": headers})
+            return Response()
+
+    monkeypatch.setattr(models, "_fetch_ollama_version", fail_ollama_version)
+    monkeypatch.setattr(models.httpx, "AsyncClient", Client)
+
+    result = await models.preflight_model(
+        endpoint="http://127.0.0.1:8088",
+        model="stepfun-ai/Step-3.7-Flash",
+    )
+
+    assert result["ok"] is True
+    assert calls[0]["url"] == "http://127.0.0.1:8088/v1/chat/completions"
+    assert calls[0]["json"]["max_tokens"] == 16
+    assert "options" not in calls[0]["json"]
+
+
+@pytest.mark.asyncio
+async def test_chat_route_forwards_openai_endpoint_auth_headers(monkeypatch):
+    monkeypatch.setenv("BENCHLOOP_OPENAI_KEYS", "http://127.0.0.1:8088=sk-local")
+    calls: list[dict] = []
+
+    class Response:
+        def raise_for_status(self):
+            return None
+
+        def json(self):
+            return {
+                "choices": [{"message": {"content": "ok"}}],
+                "usage": {"prompt_tokens": 1, "completion_tokens": 1},
+            }
+
+    class Client:
+        def __init__(self, timeout):
+            self.timeout = timeout
+
+        async def __aenter__(self):
+            return self
+
+        async def __aexit__(self, exc_type, exc, tb):
+            return False
+
+        async def post(self, url, json=None, headers=None):
+            calls.append({"url": url, "json": json, "headers": headers})
+            return Response()
+
+    monkeypatch.setattr(chat_routes.httpx, "AsyncClient", Client)
+
+    result = await chat_routes.chat_generate(
+        chat_routes.ChatRequest(
+            model="local-model",
+            endpoint="http://127.0.0.1:8088",
+            provider="openai_compat",
+            prompt="Say ok",
+        )
+    )
+
+    assert result["message"]["content"] == "ok"
+    assert calls[0]["url"] == "http://127.0.0.1:8088/v1/chat/completions"
+    assert calls[0]["headers"]["Authorization"] == "Bearer sk-local"
diff --git a/tests/test_runtime_robustness.py b/tests/test_runtime_robustness.py
new file mode 100644
index 0000000..839a1b0
--- /dev/null
+++ b/tests/test_runtime_robustness.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+import pytest
+
+from bench_loop import hardware
+from bench_loop.dashboard.api.routes import benchmark
+
+
+def test_nvidia_smi_na_values_do_not_crash_gpu_detection(monkeypatch):
+    monkeypatch.delenv("BENCHLOOP_GPU", raising=False)
+    monkeypatch.delenv("BENCHLOOP_HARDWARE_LABEL", raising=False)
+    monkeypatch.setattr(hardware.shutil, "which", lambda name: "/usr/bin/nvidia-smi")
+    monkeypatch.setattr(
+        hardware,
+        "_run_command",
+        lambda command: "NVIDIA GB10 Grace Blackwell, [N/A], [N/A]",
+    )
+
+    result = hardware._detect_gpu()
+
+    assert result["gpu"] == "NVIDIA GB10 Grace Blackwell"
+    assert result["gpu_memory_gb"] == 0.0
+    assert result["gpu_temperature_c"] is None
+    assert result["gpu_details"] == [
+        {"name": "NVIDIA GB10 Grace Blackwell", "memory_gb": 0.0, "temperature_c": None}
+    ]
+
+
+@pytest.mark.asyncio
+async def test_active_run_response_omits_internal_task(monkeypatch):
+    internal_task = object()
+    monkeypatch.setitem(
+        benchmark._active_runs,
+        "active123",
+        {
+            "run_id": "active123",
+            "status": "running",
+            "task": internal_task,
+            "events": [],
+        },
+    )
+
+    result = await benchmark.get_run("active123")
+
+    assert result["status"] == "running"
+    assert "task" not in result
+    assert benchmark._active_runs["active123"]["task"] is internal_task