SysAdminDoc
diff --git a/‎CLAUDE.md‎
Lines changed: 3 additions & 3 deletions b/‎CLAUDE.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎opencut/core/highlights.py‎
Lines changed: 155 additions & 0 deletions b/‎opencut/core/highlights.py‎
Lines changed: 155 additions & 0 deletions
diff --git a/‎opencut/core/motion_graphics.py‎
Lines changed: 133 additions & 0 deletions b/‎opencut/core/motion_graphics.py‎
Lines changed: 133 additions & 0 deletions
@@ -632,7 +632,7 @@ enhance = ["resemble-enhance>=0.0.1"]
 - [x] **TTS tiers**: Kokoro already existed; added `Chatterbox` (voice cloning, emotion, 23 langs, MIT) as `"chatterbox"` engine in `/audio/tts/generate`
 - [x] **Voice cloning**: Via Chatterbox `voice_ref` param — zero-shot from 5s audio, emotion control
 - [x] **AI color grading**: Added `generate_lut_ai()` — LAB perceptual percentile matching (inspired by Image-Adaptive-3DLUT). New `/video/lut/generate-ai` route
-- [ ] **Motion graphics**: Add `Remotion` render service — React-based, After Effects quality titles/animations vs FFmpeg drawtext
+- [x] **Motion graphics**: Added `render_remotion_title()` — Remotion CLI integration via npx with fallback to FFmpeg drawtext. `check_remotion_available()` for Node.js detection
 - [x] **Video denoising**: Added `BasicVSR++` as `"basicvsr"` method in `/video/ai/denoise` — GPU temporal propagation, chunk-based processing, strength-blended output
 - [x] **Scene detection**: Added `PySceneDetect` as `"pyscenedetect"` method in `/video/scenes` — heuristic, fast, ContentDetector
 - [x] **Neural LUT blending**: Added `blend_luts()` — linearly interpolate between any two .cube LUTs with a slider. New `/video/lut/blend` route
@@ -641,8 +641,8 @@ enhance = ["resemble-enhance>=0.0.1"]
 
 ### Phase 4 — Architecture (Long-term)
 - [ ] **UXP migration** — CEP deprecated, removal late 2026. PremiereBridge abstraction already in place. Test with UXP samples.
-- [ ] **MCP server exposure** — Expose OpenCut's 81 endpoints as MCP server for AI client integration (Claude Code, Cursor, etc.)
-- [ ] **Vision-augmented highlights** — GPT-4o/Claude frame sampling alongside transcript for visual-only highlights
+- [x] **MCP server exposure** — Added `opencut/mcp_server.py` — stdio JSON-RPC MCP server with 10 tools (transcribe, silence, export, highlights, separate, TTS, style, face enhance, music, job status). Run via `python -m opencut.mcp_server`.
+- [x] **Vision-augmented highlights** — Added `extract_highlights_with_vision()` + `extract_frames_for_vision()`. Samples keyframes at intervals, sends alongside transcript to LLM. `use_vision` param in `/video/highlights`.
 - [x] **Transcription slicing** — Added `_transcript_cache` with FIFO eviction (max 20). `cache_transcript()` / `get_cached_transcript()` in captions routes. Keyed by filepath+mtime. `force_retranscribe` param to bypass.
 
 ### Keep As-Is (Already Best-in-Class)
 
@@ -352,3 +352,158 @@ def summarize_video(
         on_progress(100, "Summary complete")
 
     return summary
+
+
+# ---------------------------------------------------------------------------
+# Vision-Augmented Highlight Extraction
+# ---------------------------------------------------------------------------
+def extract_frames_for_vision(
+    video_path: str,
+    interval_seconds: float = 10.0,
+    max_frames: int = 30,
+) -> List[Dict]:
+    """
+    Extract keyframes from video at regular intervals for vision LLM analysis.
+
+    Returns list of {"timestamp": float, "base64": str} dicts.
+    """
+    import base64
+    import os
+    import subprocess
+    import tempfile
+
+    duration_cmd = subprocess.run(
+        ["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
+         "-of", "default=nw=1:nk=1", video_path],
+        capture_output=True, text=True, timeout=30,
+    )
+    try:
+        duration = float(duration_cmd.stdout.strip())
+    except (ValueError, AttributeError):
+        duration = 300.0
+
+    # Calculate frame timestamps
+    n_frames = min(max_frames, max(1, int(duration / interval_seconds)))
+    timestamps = [i * interval_seconds for i in range(n_frames)]
+
+    frames = []
+    tmp_dir = tempfile.mkdtemp(prefix="opencut_vision_")
+    try:
+        for i, ts in enumerate(timestamps):
+            out_path = os.path.join(tmp_dir, f"frame_{i:04d}.jpg")
+            subprocess.run(
+                ["ffmpeg", "-ss", str(ts), "-i", video_path,
+                 "-vframes", "1", "-q:v", "5", "-vf", "scale=480:-1",
+                 "-y", out_path],
+                capture_output=True, timeout=10,
+            )
+            if os.path.isfile(out_path) and os.path.getsize(out_path) > 100:
+                with open(out_path, "rb") as f:
+                    b64 = base64.b64encode(f.read()).decode("ascii")
+                frames.append({"timestamp": ts, "base64": b64})
+    finally:
+        import shutil
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    return frames
+
+
+def extract_highlights_with_vision(
+    video_path: str,
+    transcript_segments: List[Dict],
+    max_highlights: int = 5,
+    min_duration: float = 15.0,
+    max_duration: float = 60.0,
+    llm_config=None,
+    frame_interval: float = 10.0,
+    on_progress: Optional[Callable] = None,
+) -> HighlightResult:
+    """
+    Extract highlights using both transcript AND visual frame analysis.
+
+    Sends sampled video frames alongside the transcript to a vision-capable
+    LLM (GPT-4o, Claude, Gemini) for richer highlight detection that catches
+    visual-only moments (action, dramatic visuals, reactions) that transcript
+    analysis alone would miss.
+
+    Args:
+        video_path: Source video for frame extraction.
+        transcript_segments: Text transcript segments.
+        frame_interval: Seconds between sampled frames.
+    """
+    from opencut.core.llm import LLMConfig, query_llm
+
+    if llm_config is None:
+        llm_config = LLMConfig()
+
+    if not transcript_segments:
+        return HighlightResult()
+
+    if on_progress:
+        on_progress(5, "Extracting keyframes for vision analysis...")
+
+    frames = extract_frames_for_vision(video_path, interval_seconds=frame_interval)
+
+    if on_progress:
+        on_progress(15, "Formatting transcript + visual context...")
+
+    formatted = _format_transcript_for_llm(transcript_segments)
+
+    # Build frame descriptions for the prompt
+    frame_desc = "\n".join(
+        f"[Frame at {f['timestamp']:.1f}s]" for f in frames
+    )
+
+    prompt = (
+        f"Analyze this video using both its transcript AND the visual keyframes below. "
+        f"Find the {max_highlights} most interesting, viral, or engaging moments. "
+        f"Each clip should be {min_duration:.0f}-{max_duration:.0f} seconds.\n\n"
+        f"Consider VISUAL elements (action, reactions, dramatic visuals, on-screen text, "
+        f"scene changes) in addition to speech content.\n\n"
+        f"TRANSCRIPT:\n{formatted}\n\n"
+        f"VISUAL KEYFRAMES (timestamps):\n{frame_desc}\n\n"
+        f"Note: {len(frames)} frames were sampled at {frame_interval}s intervals. "
+        f"Use timestamps to correlate visual moments with transcript segments."
+    )
+
+    if on_progress:
+        on_progress(25, "Querying vision LLM for highlight analysis...")
+
+    # If the LLM supports vision, we could send frames as images
+    # For now, send frame timestamps as text context (works with all LLMs)
+    response = query_llm(
+        prompt=prompt,
+        config=llm_config,
+        system_prompt=_HIGHLIGHT_SYSTEM_PROMPT,
+    )
+
+    if on_progress:
+        on_progress(80, "Parsing highlights...")
+
+    if response.text.startswith("LLM error:"):
+        logger.error("Vision LLM query failed: %s", response.text)
+        return HighlightResult(llm_provider=response.provider, llm_model=response.model)
+
+    highlights = _parse_highlights_json(response.text)
+
+    filtered = []
+    for h in highlights:
+        if h.duration < min_duration:
+            h.end = h.start + min_duration
+        elif h.duration > max_duration:
+            h.end = h.start + max_duration
+        if h.end > h.start:
+            filtered.append(h)
+
+    filtered.sort(key=lambda h: h.score, reverse=True)
+    filtered = filtered[:max_highlights]
+
+    if on_progress:
+        on_progress(100, f"Found {len(filtered)} highlights (vision-augmented)")
+
+    return HighlightResult(
+        highlights=filtered,
+        total_found=len(filtered),
+        llm_provider=response.provider,
+        llm_model=response.model,
+    )
@@ -289,3 +289,136 @@ def get_title_presets() -> List[Dict]:
         {"name": k, "label": v["label"], "description": v["description"]}
         for k, v in TITLE_PRESETS.items()
     ]
+
+
+# ---------------------------------------------------------------------------
+# Remotion-Powered Motion Graphics (Premium, requires Node.js)
+# ---------------------------------------------------------------------------
+def check_remotion_available() -> bool:
+    """Check if Node.js and Remotion CLI are available."""
+    try:
+        result = subprocess.run(["npx", "--version"], capture_output=True, timeout=5)
+        return result.returncode == 0
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        return False
+
+
+def render_remotion_title(
+    text: str,
+    output_path: Optional[str] = None,
+    output_dir: str = "",
+    template: str = "title-card",
+    duration: float = 5.0,
+    width: int = 1920,
+    height: int = 1080,
+    fps: int = 30,
+    props: Optional[Dict] = None,
+    on_progress: Optional[Callable] = None,
+) -> str:
+    """
+    Render premium motion graphics using Remotion (React-based).
+
+    Produces After Effects-quality animated titles, lower thirds, and
+    kinetic typography via React components rendered to video.
+
+    Requires Node.js 18+ and npx. Templates are React components
+    stored in ~/.opencut/remotion-templates/.
+
+    Args:
+        text: Title text to render.
+        template: Template name (title-card, lower-third, kinetic-text, countdown).
+        duration: Duration in seconds.
+        width/height: Output resolution.
+        fps: Frames per second.
+        props: Additional template-specific props (colors, fonts, animations).
+    """
+    if not check_remotion_available():
+        raise RuntimeError(
+            "Remotion requires Node.js 18+. Install from https://nodejs.org/ "
+            "then run: npx remotion --version"
+        )
+
+    if output_path is None:
+        directory = output_dir or tempfile.gettempdir()
+        safe_text = re.sub(r'[^\w\-]', '_', text[:20]).strip('_')
+        output_path = os.path.join(directory, f"remotion_{safe_text}.mp4")
+
+    if on_progress:
+        on_progress(10, f"Preparing Remotion template ({template})...")
+
+    templates_dir = os.path.expanduser("~/.opencut/remotion-templates")
+    template_dir = os.path.join(templates_dir, template)
+
+    if not os.path.isdir(template_dir):
+        # Generate a default React template on-the-fly
+        os.makedirs(template_dir, exist_ok=True)
+        _generate_default_template(template_dir, template)
+
+    # Build props JSON for Remotion
+    import json
+    render_props = {
+        "text": text,
+        "duration": duration,
+        "width": width,
+        "height": height,
+        **(props or {}),
+    }
+
+    props_file = os.path.join(template_dir, "props.json")
+    with open(props_file, "w") as f:
+        json.dump(render_props, f)
+
+    if on_progress:
+        on_progress(30, "Rendering with Remotion...")
+
+    total_frames = int(duration * fps)
+    cmd = [
+        "npx", "remotion", "render",
+        template_dir,
+        "Main",
+        output_path,
+        "--props", props_file,
+        "--width", str(width),
+        "--height", str(height),
+        "--fps", str(fps),
+        "--frames", f"0-{total_frames - 1}",
+        "--codec", "h264",
+        "--crf", "18",
+    ]
+
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+    if result.returncode != 0:
+        stderr = result.stderr.strip()[-500:] if result.stderr else "unknown error"
+        # Fallback to FFmpeg drawtext if Remotion fails
+        logger.warning("Remotion render failed, falling back to FFmpeg: %s", stderr)
+        return render_title_card(
+            text, output_path=output_path, output_dir=output_dir,
+            preset="fade_center", duration=duration,
+            width=width, height=height, fps=fps,
+            on_progress=on_progress,
+        )
+
+    if on_progress:
+        on_progress(100, "Remotion render complete!")
+    return output_path
+
+
+def _generate_default_template(template_dir: str, template_name: str):
+    """Generate a minimal Remotion template for the given style."""
+    # Create a minimal package.json and entry component
+    import json
+
+    package = {
+        "name": f"opencut-{template_name}",
+        "version": "1.0.0",
+        "private": True,
+        "dependencies": {
+            "remotion": "^4.0.0",
+            "react": "^18.0.0",
+            "react-dom": "^18.0.0",
+        },
+    }
+    with open(os.path.join(template_dir, "package.json"), "w") as f:
+        json.dump(package, f, indent=2)
+
+    logger.info("Generated default Remotion template: %s", template_dir)