ArietidsZ
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 33 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎PROJECT_STRUCTURE.md‎
Lines changed: 2 additions & 0 deletions b/‎PROJECT_STRUCTURE.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 96 additions & 55 deletions b/‎README.md‎
Lines changed: 96 additions & 55 deletions
diff --git a/‎adapter_api.py‎
Lines changed: 108 additions & 16 deletions b/‎adapter_api.py‎
Lines changed: 108 additions & 16 deletions
@@ -0,0 +1,33 @@
+name: CI
+
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+
+jobs:
+  lint-and-test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest]
+        python-version: ["3.11"]
+
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install project and dev dependencies
+        run: python -m pip install -e ".[dev]"
+
+      - name: Lint with Ruff
+        run: python -m ruff check .
+
+      - name: Run tests with pytest
+        run: python -m pytest
@@ -11,9 +11,11 @@ All source files are in the workspace root. Layering is preserved by filename pr
 /cli.py
 /core_config.py
 /service_media.py
+/service_ollama.py
 /service_transcribe.py
 /service_summarize.py
 /service_pipeline.py
+/adapter_gui.py
 /adapter_api.py
 /adapter_rss.py
 /adapter_storage.py
@@ -1,83 +1,124 @@
-# Video RSS Aggregator
+# Video RSS Aggregator (Qwen 3.5 Vision, 4-bit)
 
-Intelligent video summarization and RSS feed generation powered by Qwen3 models on NVIDIA CUDA.
+This project has been rebuilt around Qwen 3.5 multimodal models and a strict local VRAM budget.
 
-- **ASR**: Qwen/Qwen3-ASR-1.7B (via `qwen-asr`)
-- **Summarization**: Qwen/Qwen3-8B-AWQ (via vLLM)
-- **Storage**: PostgreSQL
-- **API**: FastAPI
+- Inference backend: Ollama (Windows-native, no WSL required)
+- Default models: Qwen 3.5 4-bit (`q4_K_M`) tiers
+- Storage: SQLite (`.data/vra.db`)
+- API: FastAPI
+
+## Design Goals
+
+- Use Qwen 3.5 vision-capable small models for summarization quality.
+- Keep total app VRAM use within `8GB` by default.
+- Prefer 4-bit model variants for quality/efficiency balance.
+- Keep setup simple for Windows users (no WSL).
+- Use scene-aware frame extraction with timeline coverage for better visual context.
 
 ## Requirements
 
 - Python 3.11+
-- NVIDIA GPU with CUDA (Windows / Linux)
-- PostgreSQL 15+
-- ffmpeg on PATH
+- Windows 10/11
+- NVIDIA GPU with at least 8GB VRAM
+- Ollama installed on Windows: https://ollama.com/download/windows
+- `ffmpeg` and `ffprobe` on `PATH`
 
-## Quick Start
+## Quick Start (Windows)
 
 ```bash
-# Create environment
 python -m venv .venv
-.venv\Scripts\activate   # Windows
-# source .venv/bin/activate  # Linux
-
-# Install
+.venv\Scripts\activate
 pip install -e .
+```
 
-# Configure
-set DATABASE_URL=postgresql://user:pass@localhost:5432/video_rss
+Run bootstrap (auto-pulls configured models if missing):
 
-# Run
-python -m vra serve --bind 0.0.0.0:8080
+```bash
+python -m vra bootstrap
 ```
 
-Models are downloaded from Hugging Face automatically on first run.
+Start server:
+
+```bash
+python -m vra serve --bind 127.0.0.1:8080
+```
+
+Then open `http://127.0.0.1:8080/` for the guided installation + configuration GUI.
+The setup page includes one-click diagnostics for Python, FFmpeg/FFprobe, yt-dlp, and Ollama reachability.
+
+## 4-bit Model Defaults
+
+Default model priority:
 
-## Project Layout
+1. `qwen3.5:4b-q4_K_M`
+2. `qwen3.5:2b-q4_K_M`
+3. `qwen3.5:0.8b-q8_0` (safety floor when smaller than 2B is needed)
 
-The codebase is organized by layer and naming prefix in the project root:
+Each processing job selects one model up front based on configured VRAM budget,
+current runtime VRAM usage, and workload size (transcript + frames).
+The selected model is pinned for that job; there is no mid-processing model fallback.
 
-- `core_*.py`: core runtime config (`core_config.py`)
-- `service_*.py`: media preparation, transcription, summarization, and orchestration
-- `adapter_*.py`: FastAPI interface, RSS rendering, and database adapter
-- `cli.py`: CLI commands (`serve`, `verify`)
-- `vra.py`: module entry for `python -m vra`
+## Video Processing Intelligence
+
+- Scene-aware frame candidate extraction (`ffmpeg` scene score) to catch shot changes.
+- Uniform timeline sampling fallback/fill to keep temporal coverage when scene cuts are sparse.
+- Deduplication by frame content hash before final frame set is sent to the model.
+- Model preselection per job using VRAM headroom and estimated per-request overhead.
+- SQLite runs in WAL mode with tuned pragmas for better concurrent read/write stability.
+
+## Runtime Commands
+
+```bash
+python -m vra bootstrap
+python -m vra status
+python -m vra verify --source "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+python -m vra benchmark --source "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+```
+
+`benchmark` compares `scene_aware` vs `uniform_only` extraction on the same source,
+reports frame uniqueness metrics, and (by default) runs both through summarization to
+show latency and output-shape differences.
 
 ## Environment Variables
 
 | Variable | Default | Description |
 |---|---|---|
-| `DATABASE_URL` | *(required)* | PostgreSQL connection string |
-| `BIND_ADDRESS` | `0.0.0.0:8080` | HTTP server bind address |
-| `API_KEY` | *(none)* | Optional bearer token for auth |
-| `VRA_STORAGE_DIR` | `.data` | Local storage for downloads and audio |
-| `VRA_ASR_MODEL` | `Qwen/Qwen3-ASR-1.7B` | ASR model name or path |
-| `VRA_LLM_MODEL` | `Qwen/Qwen3-8B-AWQ` | Summarization model name or path |
-| `VRA_GPU_MEMORY_UTILIZATION` | `0.8` | vLLM GPU memory fraction |
-| `VRA_ASR_DEVICE` | `cuda:0` | PyTorch device for ASR |
-| `VRA_ASR_MAX_TOKENS` | `4096` | Max tokens for ASR output |
-| `VRA_LLM_MAX_TOKENS` | `2048` | Max tokens for summarization |
-| `VRA_RSS_TITLE` | `Video RSS Aggregator` | RSS feed title |
-| `VRA_RSS_LINK` | `http://localhost:8080/rss` | RSS feed self-link |
-| `VRA_RSS_DESCRIPTION` | `Video summaries` | RSS feed description |
+| `BIND_ADDRESS` | `127.0.0.1:8080` | API bind address |
+| `API_KEY` | *(none)* | Optional bearer/API-key auth |
+| `VRA_STORAGE_DIR` | `.data` | Download/frame/subtitle storage |
+| `VRA_DATABASE_PATH` | `.data/vra.db` | SQLite database path |
+| `VRA_OLLAMA_BASE_URL` | `http://127.0.0.1:11434` | Ollama API base URL |
+| `VRA_MODEL_PRIMARY` | `qwen3.5:4b-q4_K_M` | First-choice model |
+| `VRA_MODEL_FALLBACK` | `qwen3.5:2b-q4_K_M` | Second-priority model |
+| `VRA_MODEL_MIN` | `qwen3.5:0.8b-q8_0` | Lowest-priority model |
+| `VRA_AUTO_PULL_MODELS` | `true` | Pull missing models automatically |
+| `VRA_VRAM_BUDGET_MB` | `8192` | Max VRAM budget across the app |
+| `VRA_MODEL_SIZE_BUDGET_RATIO` | `0.75` | Share of budget for base model weight |
+| `VRA_MODEL_SELECTION_RESERVE_MB` | `768` | VRAM safety reserve kept free during model selection |
+| `VRA_CONTEXT_TOKENS` | `3072` | Context window per request |
+| `VRA_MAX_OUTPUT_TOKENS` | `768` | Summary output cap |
+| `VRA_MAX_FRAMES` | `5` | Max sampled frames per source |
+| `VRA_FRAME_SCENE_DETECTION` | `true` | Enable scene-aware frame selection |
+| `VRA_FRAME_SCENE_THRESHOLD` | `0.28` | Scene change sensitivity (`ffmpeg` scene score threshold) |
+| `VRA_FRAME_SCENE_MIN_FRAMES` | `2` | Minimum detected scene frames before blending with uniform sampling |
+| `VRA_MAX_TRANSCRIPT_CHARS` | `16000` | Subtitle transcript cap |
+| `VRA_RSS_TITLE` | `Video RSS Aggregator` | RSS title |
+| `VRA_RSS_LINK` | `http://127.0.0.1:8080/rss` | RSS self-link |
+| `VRA_RSS_DESCRIPTION` | `Video summaries` | RSS description |
 
 ## API
 
-### `GET /health`
-Returns health status.
+- `GET /` (GUI setup + configuration workspace)
+- `GET /health`
+- `GET /setup/config`
+- `GET /setup/diagnostics`
+- `POST /setup/bootstrap`
+- `GET /runtime`
+- `POST /ingest`
+- `POST /process`
+- `GET /rss?limit=20`
 
-### `POST /ingest`
-Ingest an RSS/Atom feed. Body: `{"feed_url": "...", "process": true, "max_items": 5}`
+## Notes
 
-### `POST /process`
-Process a single video/audio source. Body: `{"source_url": "...", "title": "..."}`
-
-### `GET /rss?limit=20`
-Returns summarized content as RSS 2.0 XML.
-
-## Verification
-
-```bash
-python -m vra verify --feed-url "https://example.com/feed.xml" --source "/path/to/audio.wav"
-```
+- GUI setup/configuration workspace is available at `/` when the server is running.
+- This version is optimized for local, Windows-native operation first.
@@ -1,13 +1,18 @@
 from __future__ import annotations
 
-import os
-from dataclasses import asdict
+import platform
+import shutil
+import sys
 from datetime import datetime, timezone
+from importlib.util import find_spec
 
 from fastapi import Depends, FastAPI, Header, HTTPException, Query
-from fastapi.responses import Response
+from fastapi.responses import HTMLResponse, Response
 from pydantic import BaseModel
 
+from adapter_gui import render_setup_page
+from core_config import Config
+from service_media import runtime_dependency_report
 from service_pipeline import Pipeline
 
 
@@ -22,17 +27,13 @@ class ProcessRequest(BaseModel):
     title: str | None = None
 
 
-def create_app(pipeline: Pipeline, api_key: str | None = None) -> FastAPI:
+def create_app(pipeline: Pipeline, config: Config) -> FastAPI:
     app = FastAPI(title="Video RSS Aggregator", version="0.1.0")
 
-    rss_title = os.environ.get("VRA_RSS_TITLE", "Video RSS Aggregator")
-    rss_link = os.environ.get("VRA_RSS_LINK", "http://localhost:8080/rss")
-    rss_desc = os.environ.get("VRA_RSS_DESCRIPTION", "Video summaries")
-
     def _check_auth(
         authorization: str | None = Header(None), x_api_key: str | None = Header(None)
     ):
-        if api_key is None:
+        if config.api_key is None:
             return
         token = None
         if authorization:
@@ -41,33 +42,124 @@ def _check_auth(
                 token = parts[1]
         if token is None:
             token = x_api_key
-        if token != api_key:
+        if token != config.api_key:
             raise HTTPException(status_code=401, detail="unauthorized")
 
     @app.get("/health")
     async def health():
         return {"status": "ok", "timestamp": datetime.now(timezone.utc).isoformat()}
 
+    @app.get("/", response_class=HTMLResponse)
+    async def setup_home():
+        return render_setup_page(config)
+
+    @app.get("/setup/config")
+    async def setup_config():
+        return {
+            "bind_address": f"{config.bind_host}:{config.bind_port}",
+            "storage_dir": config.storage_dir,
+            "database_path": config.database_path,
+            "ollama_base_url": config.ollama_base_url,
+            "model_priority": list(config.model_priority),
+            "vram_budget_mb": config.vram_budget_mb,
+            "model_selection_reserve_mb": config.model_selection_reserve_mb,
+            "max_frames": config.max_frames,
+            "frame_scene_detection": config.frame_scene_detection,
+            "frame_scene_threshold": config.frame_scene_threshold,
+            "frame_scene_min_frames": config.frame_scene_min_frames,
+            "api_key_required": config.api_key is not None,
+            "quick_commands": {
+                "bootstrap": "python -m vra bootstrap",
+                "status": "python -m vra status",
+                "serve": "python -m vra serve --bind 127.0.0.1:8080",
+            },
+        }
+
+    @app.get("/setup/diagnostics")
+    async def setup_diagnostics():
+        media_tools = runtime_dependency_report()
+        yt_dlp_cmd = shutil.which("yt-dlp")
+        ytdlp = {
+            "command": yt_dlp_cmd,
+            "module_available": find_spec("yt_dlp") is not None,
+        }
+        ytdlp["available"] = bool(ytdlp["command"] or ytdlp["module_available"])
+
+        ollama: dict[str, object] = {
+            "base_url": config.ollama_base_url,
+            "reachable": False,
+            "version": None,
+            "models_found": 0,
+            "error": None,
+        }
+        try:
+            runtime = await pipeline.runtime_status()
+            ollama["reachable"] = True
+            ollama["version"] = runtime.get("ollama_version")
+            local_models = runtime.get("local_models", {})
+            ollama["models_found"] = len(local_models)
+        except Exception as exc:
+            ollama["error"] = str(exc)
+
+        ffmpeg_ok = bool(media_tools["ffmpeg"].get("available"))
+        ffprobe_ok = bool(media_tools["ffprobe"].get("available"))
+        ytdlp_ok = bool(ytdlp["available"])
+        ollama_ok = bool(ollama["reachable"])
+
+        return {
+            "platform": {
+                "system": platform.system(),
+                "release": platform.release(),
+                "python_version": sys.version.split()[0],
+                "python_executable": sys.executable,
+            },
+            "dependencies": {
+                "ffmpeg": media_tools["ffmpeg"],
+                "ffprobe": media_tools["ffprobe"],
+                "yt_dlp": ytdlp,
+                "ollama": ollama,
+            },
+            "ready": ffmpeg_ok and ffprobe_ok and ytdlp_ok and ollama_ok,
+        }
+
+    @app.post("/setup/bootstrap")
+    async def setup_bootstrap(_=Depends(_check_auth)):
+        return await pipeline.bootstrap_models()
+
     @app.post("/ingest")
     async def ingest(req: IngestRequest, _=Depends(_check_auth)):
         report = await pipeline.ingest_feed(req.feed_url, req.process, req.max_items)
-        return asdict(report)
+        return {
+            "feed_title": report.feed_title,
+            "item_count": report.item_count,
+            "processed_count": report.processed_count,
+        }
 
     @app.post("/process")
     async def process(req: ProcessRequest, _=Depends(_check_auth)):
         report = await pipeline.process_source(req.source_url, req.title)
         return {
             "source_url": report.source_url,
             "title": report.title,
-            "transcription": asdict(report.transcription)
-            if report.transcription
-            else None,
-            "summary": asdict(report.summary) if report.summary else None,
+            "transcript_chars": report.transcript_chars,
+            "frame_count": report.frame_count,
+            "summary": {
+                "summary": report.summary.summary,
+                "key_points": report.summary.key_points,
+                "visual_highlights": report.summary.visual_highlights,
+                "model_used": report.summary.model_used,
+                "vram_mb": report.summary.vram_mb,
+                "error": report.summary.error,
+            },
         }
 
     @app.get("/rss")
     async def rss_feed(limit: int = Query(20, ge=1, le=200)):
-        xml = await pipeline.rss_feed(rss_title, rss_link, rss_desc, limit)
+        xml = await pipeline.rss_feed(limit)
         return Response(content=xml, media_type="application/rss+xml")
 
+    @app.get("/runtime")
+    async def runtime(_=Depends(_check_auth)):
+        return await pipeline.runtime_status()
+
     return app