Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 87 additions & 10 deletions scripts/stack_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ def _scrub_invisible(obj):
COMPOSE = STACK_ROOT / "docker-compose.yml"
MONITOR = STACK_ROOT / "data" / "hermes" / "scripts" / "github_monitor.py"
HERMES_DOCKERFILE = STACK_ROOT / "hermes" / "Dockerfile"
# ComfyUI ships no version in docker-compose.yml (it runs from a 3rd-party boot
# image); the real installed version is build-stamped in this file.
COMFYUI_VERSION_FILE = STACK_ROOT / "data" / "comfyui-storage" / "ComfyUI" / "comfyui_version.py"
# LiteLLM has no version pin anywhere (model-gateway is FROM
# ghcr.io/berriai/litellm:main-stable, a rolling tag) — read it live from the
# running container instead.
MODEL_GATEWAY_CONTAINER = os.environ.get("MODEL_GATEWAY_CONTAINER", "ordo-ai-stack-model-gateway-1")

# All services to monitor (sources of truth).
#
Expand All @@ -78,24 +85,32 @@ def _scrub_invisible(obj):
"pin_source": "dockerfile"},
}

# Current pinned versions (synced from docker-compose.yml)
# Last-resort fallbacks if a version can't be read from its real source.
# NOTE: ComfyUI and LiteLLM are intentionally absent — they are resolved live
# (see resolve_current_version). Do NOT add stale hardcodes for them; a wrong
# value here silently produces a misleading audit (the old "v0.20.1" ComfyUI pin
# was compared against upstream while the box actually ran 0.17.0).
PINNED = {
"n8n": "2.20.0",
"Open WebUI": "v0.9.2",
"Qdrant": "v1.17.1",
"Caddy": "2.11.2",
"llama.cpp": "server-cuda",
"LiteLLM": "latest",
"ComfyUI": "v0.20.1",
"llama.cpp": "server-cuda", # rolling tag — classifies as ROLLING (manual review)
"oauth2-proxy":"latest-alpine",
}


def run_cmd(cmd, timeout=30):
"""Run a command and return (stdout, stderr, returncode)."""
"""Run a command and return (stdout, stderr, returncode).

Force UTF-8 decoding with replacement: GitHub release bodies routinely carry
non-ASCII bytes, and on a non-UTF-8 locale (e.g. a Windows host's cp1252)
the default decode raises mid-read, leaving stdout=None and crashing callers.
"""
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
return result.stdout, result.stderr, result.returncode
result = subprocess.run(cmd, capture_output=True, text=True,
encoding="utf-8", errors="replace", timeout=timeout)
return (result.stdout or ""), (result.stderr or ""), result.returncode
except subprocess.TimeoutExpired:
return "", "timeout", 1

Expand All @@ -109,6 +124,56 @@ def read_hermes_pin():
return m.group(1) if m else None


def read_comfyui_version():
"""Installed ComfyUI version, build-stamped in comfyui_version.py (e.g. 0.17.0).

ComfyUI has no pin in docker-compose.yml, so without this the monitor used a
hardcoded guess that drifted from reality. Returns None if the file is
missing/unreadable (caller falls back to ROLLING/manual).
"""
if not COMFYUI_VERSION_FILE.exists():
return None
try:
m = re.search(r'__version__\s*=\s*["\']([\d.]+)["\']',
COMFYUI_VERSION_FILE.read_text())
except OSError:
return None
return m.group(1) if m else None


def read_litellm_version():
"""Live LiteLLM version from the running model-gateway container (e.g. 1.82.3).

LiteLLM is pinned only by the rolling `main-stable` image tag, so the
installed package is the single source of truth. Returns None if the
container is down or docker is unavailable (caller falls back to ROLLING).
"""
cmd = ["docker", "exec", MODEL_GATEWAY_CONTAINER, "python", "-c",
"import importlib.metadata as m; print(m.version('litellm'))"]
stdout, _, rc = run_cmd(cmd, timeout=20)
if rc != 0 or not stdout.strip():
return None
version = stdout.strip().splitlines()[-1].strip()
return version if re.match(r"^\d", version) else None


def resolve_current_version(name, compose_versions):
"""Best source of truth for a service's currently-deployed version.

Most services read from docker-compose.yml. ComfyUI and LiteLLM have no
usable pin there and are read from their live/build-stamped source instead.
"""
if name == "ComfyUI":
live = read_comfyui_version()
if live:
return live
if name == "LiteLLM":
live = read_litellm_version()
if live:
return live
return compose_versions.get(name, PINNED.get(name, "unknown"))


def fetch_tag_sha(repo, tag):
"""Resolve a tag name to its commit SHA via the GitHub API.

Expand Down Expand Up @@ -271,7 +336,11 @@ def classify_severity(current, latest, body=""):
l_parts = [int(x) for x in re.findall(r'\d+', clean_latest)]

if not p_parts or not l_parts:
return "MEDIUM", f"Version format unknown ({clean_current} → {clean_latest})"
# No comparable semver — the current pin is a rolling tag or a
# source-built image (e.g. llama.cpp 'server-cuda'). Don't pretend
# it's a minor update; flag it for manual review instead.
return "ROLLING", (f"Pinned by rolling tag/built image ('{clean_current}') — "
f"rebuild to pull latest ({clean_latest}); review release notes")

max_len = max(len(p_parts), len(l_parts))
p_parts.extend([0] * (max_len - len(p_parts)))
Expand Down Expand Up @@ -484,8 +553,9 @@ def main():
all_updates[name] = latest_tag
continue

# Compose-pinned services (the original path).
current = compose_versions.get(name, PINNED.get(name, "unknown"))
# Compose-pinned services (the original path), plus live-resolved
# current versions for ComfyUI/LiteLLM (no usable compose pin).
current = resolve_current_version(name, compose_versions)

if latest_tag is None:
results["services"][name] = {
Expand Down Expand Up @@ -552,6 +622,7 @@ def main():
high = []
medium = []
low = []
rolling = []
safe = []

for name, info in results["services"].items():
Expand All @@ -572,6 +643,8 @@ def main():
medium.append(entry)
elif sev == "LOW":
low.append(entry)
elif sev == "ROLLING":
rolling.append(entry)
else:
safe.append(entry)

Expand All @@ -591,6 +664,10 @@ def main():
print("## 🟢 LOW (Patch update)\n")
for entry in low:
print(entry)
if rolling:
print("## 🔁 ROLLING / MANUAL (rebuild to update)\n")
for entry in rolling:
print(entry)
if safe:
print("## ✅ SAFE (Up to date)\n")
for s in safe:
Expand Down
56 changes: 56 additions & 0 deletions tests/test_stack_monitor_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""stack_monitor version-resolution + rolling-tag classification.

Regression coverage for the audit fix: services pinned by a rolling tag or built
from source (llama.cpp 'server-cuda', litellm 'main-stable') have no comparable
semver and must be flagged ROLLING for manual review — NOT silently reported as a
MEDIUM "version format unknown" update. ComfyUI/LiteLLM current versions are read
from their real source instead of a stale hardcode.
"""
from __future__ import annotations

import importlib.util
from pathlib import Path

_PATH = Path(__file__).resolve().parent.parent / "scripts" / "stack_monitor.py"
_spec = importlib.util.spec_from_file_location("stack_monitor_versions_under_test", _PATH)
sm = importlib.util.module_from_spec(_spec)
_spec.loader.exec_module(sm)


def test_rolling_tag_is_rolling_not_medium():
sev, msg = sm.classify_severity("server-cuda", "b4567", "ordinary release notes")
assert sev == "ROLLING"
assert "rebuild" in msg.lower()


def test_built_image_tag_is_rolling():
sev, _ = sm.classify_severity("main-stable", "v1.89.2", "ordinary release notes")
assert sev == "ROLLING"


def test_security_beats_rolling():
# A CVE in the latest notes must win even when current is a rolling tag.
sev, _ = sm.classify_severity("server-cuda", "b1", "fixes CVE-2026-1234 buffer overflow")
assert sev == "CRITICAL"


def test_real_semver_minor_update():
# ComfyUI 0.17.0 -> 0.25.1 is a genuine, comparable update.
sev, _ = sm.classify_severity("0.17.0", "v0.25.1", "minor changes")
assert sev == "MEDIUM"


def test_real_semver_already_current():
sev, _ = sm.classify_severity("1.89.2", "v1.89.2", "no change")
assert sev == "SAFE"


def test_comfyui_resolves_from_version_file_when_present():
# Only assert when the file exists in this checkout (it does in the live repo).
if sm.COMFYUI_VERSION_FILE.exists():
assert sm.read_comfyui_version() == sm.resolve_current_version("ComfyUI", {})


def test_unknown_current_falls_back_to_rolling_not_crash():
sev, _ = sm.classify_severity("unknown", "v1.2.3", "notes")
assert sev == "ROLLING"
Loading