diff --git a/.gitignore b/.gitignore index b6c81a6..9bc6313 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,7 @@ src-tauri/target .env .DS_Store *.log +graphify-out/ +graph.html +__pycache__/ +*.pyc diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app.py b/backend/app.py new file mode 100644 index 0000000..5d84065 --- /dev/null +++ b/backend/app.py @@ -0,0 +1,264 @@ +import os +import json +from pathlib import Path +from flask import Flask, request, jsonify, send_from_directory + +from .db import Database +from . import media as m +from . import transcription as tx +from . import llm +from .models import NormalizedTranscript + +# Serve React build from dist/ +FRONTEND_DIR = Path(__file__).resolve().parent.parent / "dist" + +app = Flask(__name__, static_folder=None) + +DATA_DIR = Path(os.environ.get("AUTOSHORTS_DATA", str(Path.home() / ".autoshorts"))) +DATA_DIR.mkdir(parents=True, exist_ok=True) +db = Database(DATA_DIR / "autoshorts.sqlite") + + +def _error(msg: str, code=400): + return jsonify({"error": msg}), code + + +def _ok(data): + return jsonify(data) + + +def _validate_media(path: str): + ext = Path(path).suffix.lower().lstrip(".") + allowed = ("mp4", "mov", "mp3", "wav", "m4a") + if ext not in allowed: + raise ValueError(f"Unsupported file type .{ext}. Use {', '.join(allowed)}.") + + +def _project_dir(project_id: str) -> Path: + return DATA_DIR / "projects" / project_id + + +def _documents_project_dir(project) -> Path: + docs = Path.home() / "Documents" / "AutoShorts" + stem = Path(project.source_path).stem + slug = "".join(c if c.isalnum() or c in "-_" else "-" for c in stem).strip("-") + return docs / (slug or project.id) + + +@app.route("/api/environment-status") +def environment_status(): + return _ok({ + "dataDir": str(DATA_DIR), + "hasFfmpeg": m.command_exists("ffmpeg"), + "hasFfprobe": m.command_exists("ffprobe"), + "hasDeepgramKey": bool(os.environ.get("DEEPGRAM_API_KEY")), + "hasAnthropicKey": bool(os.environ.get("ANTHROPIC_API_KEY")), + "hasDeepseekKey": bool(os.environ.get("DEEPSEEK_API_KEY")), + "llmProvider": os.environ.get("LLM_PROVIDER", "deepseek"), + }) + + +@app.route("/api/projects", methods=["POST"]) +def create_project(): + body = request.get_json() + path = body["path"] + mode = body.get("transcriptionMode", "deepgram") + try: + _validate_media(path) + except ValueError as e: + return _error(str(e)) + probe = None + try: + probe = m.probe_media(path) + except Exception: + pass + p = db.create_project(path, mode, probe.duration_sec if probe else None) + return _ok(json.loads(json.dumps(p, default=str))) + + +@app.route("/api/projects", methods=["GET"]) +def list_projects(): + projects = db.list_projects() + return _ok(json.loads(json.dumps([vars(p) for p in projects], default=str))) + + +@app.route("/api/projects/") +def get_project(project_id: str): + p = db.get_project(project_id) + if not p: + return _error("Project not found", 404) + return _ok(json.loads(json.dumps(vars(p), default=str))) + + +@app.route("/api/projects//detail") +def project_detail(project_id: str): + d = db.project_detail(project_id) + if not d: + return _error("Project not found", 404) + return _ok(json.loads(json.dumps(d, default=str))) + + +@app.route("/api/projects//probe", methods=["POST"]) +def probe_project(project_id: str): + p = db.get_project(project_id) + if not p: + return _error("Project not found", 404) + probe = m.probe_media(p.source_path) + db.update_project_status(project_id, "ingest", probe.duration_sec) + return _ok(json.loads(json.dumps(vars(probe), default=str))) + + +@app.route("/api/projects//extract-audio", methods=["POST"]) +def extract_audio(project_id: str): + p = db.get_project(project_id) + if not p: + return _error("Project not found", 404) + audio = m.extract_audio(p.source_path, _project_dir(project_id)) + return _ok({"audioPath": str(audio)}) + + +@app.route("/api/projects//transcribe", methods=["POST"]) +async def transcribe_project(project_id: str): + p = db.get_project(project_id) + if not p: + return _error("Project not found", 404) + body = request.get_json() or {} + provider = body.get("provider", "deepgram") + api_key = body.get("apiKey") + db.update_project_status(project_id, "transcribing", None) + if provider == "deepgram": + key = api_key or os.environ.get("DEEPGRAM_API_KEY") + if not key: + return _error("Set DEEPGRAM_API_KEY or supply an API key") + audio = m.extract_audio(p.source_path, _project_dir(project_id)) + transcript = await tx.transcribe_deepgram(str(audio), key) + else: + return _error(f"Unsupported provider: {provider}") + raw_json = json.dumps(transcript, default=str) + saved = db.save_transcript(project_id, provider, raw_json, transcript.language) + db.update_project_status(project_id, "analyzing", transcript.duration) + return _ok(json.loads(json.dumps(vars(saved), default=str))) + + +@app.route("/api/projects//demo-transcript", methods=["POST"]) +def save_demo_transcript(project_id: str): + lines = [ + "The surprising thing about short-form clips is that the best moment is rarely the loudest moment.", + "It is usually the point where someone finally says the quiet part plainly and the listener can feel the stakes.", + "That is why the system needs to understand the transcript as a story, not just search for keywords.", + ] + words = [] + cursor = 0.0 + for line in lines: + for token in line.split(): + end = cursor + 0.32 + words.append({"text": token, "start": cursor, "end": end, "speaker": "A"}) + cursor = end + 0.08 + cursor += 0.75 + transcript = NormalizedTranscript( + language="en", duration=cursor, speakers=["A"], + words=[type("w", (), w)() for w in words], + segments=[type("s", (), {"start": 0.0, "end": cursor, "speaker": "A", "text": " ".join(lines)})()], + ) + raw = json.dumps(transcript, default=str) + saved = db.save_transcript(project_id, "demo", raw, "en") + db.update_project_status(project_id, "analyzing", cursor) + return _ok(json.loads(json.dumps(vars(saved), default=str))) + + +@app.route("/api/projects//candidates", methods=["POST"]) +async def generate_candidates(project_id: str): + p = db.get_project(project_id) + if not p: + return _error("Project not found", 404) + t = db.latest_transcript(project_id) + if not t: + return _error("Transcribe the project first") + normalized = NormalizedTranscript(**json.loads(t.raw_json)) + body = request.get_json() or {} + api_key = body.get("apiKey") + provider = os.environ.get("LLM_PROVIDER", "deepseek").lower() + if provider == "claude": + key = api_key or os.environ.get("ANTHROPIC_API_KEY") + if not key: + return _error("Set ANTHROPIC_API_KEY or supply a Claude API key") + drafts = await llm.detect_candidates_with_claude(normalized, key) + else: + key = api_key or os.environ.get("DEEPSEEK_API_KEY") + if not key: + return _error("Set DEEPSEEK_API_KEY or supply a DeepSeek API key") + drafts = await llm.detect_candidates_with_deepseek(normalized, key) + if not drafts: + return _error("No viable clip candidates returned") + candidates = db.replace_candidates(project_id, drafts) + db.update_project_status(project_id, "ready", None) + return _ok(json.loads(json.dumps([vars(c) for c in candidates], default=str))) + + +@app.route("/api/projects//select", methods=["POST"]) +def set_selected(project_id: str): + body = request.get_json() + count = max(0, min(body.get("count", 6), 10)) + candidates = db.set_selected_clip_count(project_id, count) + return _ok(json.loads(json.dumps([vars(c) for c in candidates], default=str))) + + +@app.route("/api/candidates//render", methods=["POST"]) +def render_clip(candidate_id: str): + result = db.get_candidate_with_project(candidate_id) + if not result: + return _error("Candidate not found", 404) + candidate, project = result + db.update_clip_for_candidate(candidate_id, "cutting") + output = _documents_project_dir(project) / "clips" / f"clip-{candidate.rank:02d}_flat.mp4" + try: + path = m.render_flat_clip(project.source_path, candidate.start_sec, candidate.end_sec, output) + db.update_clip_for_candidate(candidate_id, "done", str(path)) + return _ok({"outputPath": str(path)}) + except Exception as e: + db.update_clip_for_candidate(candidate_id, "error", render_log=str(e)) + return _error(str(e)) + + +@app.route("/api/projects/", methods=["DELETE"]) +def delete_project(project_id: str): + db.delete_project(project_id) + return _ok({"ok": True}) + + +@app.route("/api/projects//rename", methods=["POST"]) +def rename_project(project_id: str): + body = request.get_json() + db.rename_project(project_id, body["name"]) + return _ok({"ok": True}) + + +@app.route("/api/import-file", methods=["POST"]) +def import_file(): + body = request.get_json() + path = body.get("path", "") + if not path or not Path(path).exists(): + return _error("File not found", 404) + try: + _validate_media(path) + except ValueError as e: + return _error(str(e)) + probe = None + try: + probe = m.probe_media(path) + except Exception: + pass + p = db.create_project(path, "deepgram", probe.duration_sec if probe else None) + return _ok(json.loads(json.dumps(p, default=str))) + + +@app.route("/", defaults={"path": ""}) +@app.route("/") +def serve_frontend(path: str): + target = FRONTEND_DIR / path + if target.exists() and target.is_file(): + return send_from_directory(str(FRONTEND_DIR), path) + index = FRONTEND_DIR / "index.html" + if index.exists(): + return send_from_directory(str(FRONTEND_DIR), "index.html") + return _error("Frontend not built. Run: npm run build", 500) diff --git a/backend/db.py b/backend/db.py new file mode 100644 index 0000000..24c1d5d --- /dev/null +++ b/backend/db.py @@ -0,0 +1,275 @@ +import sqlite3 +import uuid +from datetime import datetime, timezone +from pathlib import Path +from .models import ( + Project, Transcript, Candidate, CandidateDraft, + Clip, ClipCopy, ProjectDetail, +) + + +class Database: + def __init__(self, db_path: Path): + db_path.parent.mkdir(parents=True, exist_ok=True) + self.conn = sqlite3.connect(str(db_path)) + self.conn.row_factory = sqlite3.Row + self._migrate() + + def _migrate(self): + self.conn.executescript(""" + PRAGMA foreign_keys = ON; + CREATE TABLE IF NOT EXISTS projects ( + id TEXT PRIMARY KEY, + name TEXT, + source_path TEXT NOT NULL, + source_duration REAL, + status TEXT NOT NULL, + transcription_mode TEXT NOT NULL, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + CREATE TABLE IF NOT EXISTS transcripts ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + engine TEXT NOT NULL, + raw_json TEXT NOT NULL, + language TEXT, + created_at TEXT NOT NULL + ); + CREATE TABLE IF NOT EXISTS candidates ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + start_sec REAL NOT NULL, + end_sec REAL NOT NULL, + score REAL NOT NULL, + hook TEXT NOT NULL, + rationale TEXT NOT NULL, + rank INTEGER NOT NULL, + selected INTEGER NOT NULL DEFAULT 0 + ); + CREATE TABLE IF NOT EXISTS clips ( + id TEXT PRIMARY KEY, + candidate_id TEXT NOT NULL REFERENCES candidates(id) ON DELETE CASCADE, + status TEXT NOT NULL, + output_path TEXT, + face_track_json TEXT, + caption_ass_path TEXT, + render_log TEXT + ); + CREATE TABLE IF NOT EXISTS clip_copy ( + id TEXT PRIMARY KEY, + clip_id TEXT NOT NULL REFERENCES clips(id) ON DELETE CASCADE, + platform TEXT NOT NULL, + hook_text TEXT, + caption_text TEXT, + hashtags TEXT + ); + CREATE TABLE IF NOT EXISTS schedule_entries ( + id TEXT PRIMARY KEY, + clip_id TEXT NOT NULL REFERENCES clips(id) ON DELETE CASCADE, + platform TEXT NOT NULL, + scheduled_for TEXT, + status TEXT NOT NULL + ); + """) + try: + self.conn.execute("ALTER TABLE projects ADD COLUMN name TEXT") + except Exception: + pass + + def _now(self) -> str: + return datetime.now(timezone.utc).isoformat() + + def _row_to_project(self, r: sqlite3.Row) -> Project: + return Project( + id=r["id"], name=r["name"], source_path=r["source_path"], + source_duration=r["source_duration"], status=r["status"], + transcription_mode=r["transcription_mode"], + created_at=r["created_at"], updated_at=r["updated_at"], + ) + + def create_project(self, source_path: str, + transcription_mode: str, + source_duration: float | None = None) -> Project: + now = self._now() + p = Project( + id=str(uuid.uuid4()), source_path=source_path, + transcription_mode=transcription_mode, + source_duration=source_duration, created_at=now, updated_at=now, + ) + self.conn.execute( + "INSERT INTO projects VALUES (?,?,?,?,?,?,?,?)", + (p.id, p.name, p.source_path, p.source_duration, p.status, + p.transcription_mode, p.created_at, p.updated_at), + ) + self.conn.commit() + return p + + def list_projects(self) -> list[Project]: + rows = self.conn.execute( + "SELECT * FROM projects ORDER BY updated_at DESC" + ).fetchall() + return [self._row_to_project(r) for r in rows] + + def get_project(self, project_id: str) -> Project | None: + r = self.conn.execute( + "SELECT * FROM projects WHERE id=?", (project_id,) + ).fetchone() + return self._row_to_project(r) if r else None + + def update_project_status(self, project_id: str, status: str, + source_duration: float | None = None): + self.conn.execute( + "UPDATE projects SET status=?, source_duration=COALESCE(?, source_duration), updated_at=? WHERE id=?", + (status, source_duration, self._now(), project_id), + ) + self.conn.commit() + + def save_transcript(self, project_id: str, engine: str, + raw_json: str, language: str | None = None) -> Transcript: + t = Transcript( + id=str(uuid.uuid4()), project_id=project_id, + engine=engine, raw_json=raw_json, language=language, + created_at=self._now(), + ) + self.conn.execute("DELETE FROM transcripts WHERE project_id=?", (project_id,)) + self.conn.execute( + "INSERT INTO transcripts VALUES (?,?,?,?,?,?)", + (t.id, t.project_id, t.engine, t.raw_json, t.language, t.created_at), + ) + self.conn.commit() + return t + + def latest_transcript(self, project_id: str) -> Transcript | None: + r = self.conn.execute( + "SELECT * FROM transcripts WHERE project_id=? ORDER BY created_at DESC LIMIT 1", + (project_id,), + ).fetchone() + if not r: + return None + return Transcript( + id=r["id"], project_id=r["project_id"], engine=r["engine"], + raw_json=r["raw_json"], language=r["language"], + created_at=r["created_at"], + ) + + def replace_candidates(self, project_id: str, + drafts: list[CandidateDraft]) -> list[Candidate]: + self.conn.execute("DELETE FROM candidates WHERE project_id=?", (project_id,)) + selected_cutoff = min(max(len(drafts), 3), 6) + candidates = [] + for i, d in enumerate(drafts): + c = Candidate( + id=str(uuid.uuid4()), project_id=project_id, + start_sec=d.start, end_sec=d.end, score=d.score, + hook=d.hook, rationale=d.rationale, + rank=i + 1, selected=i < selected_cutoff, + ) + self.conn.execute( + "INSERT INTO candidates VALUES (?,?,?,?,?,?,?,?,?)", + (c.id, c.project_id, c.start_sec, c.end_sec, c.score, + c.hook, c.rationale, c.rank, 1 if c.selected else 0), + ) + self.conn.execute( + "INSERT INTO clips (id, candidate_id, status) VALUES (?,?,?)", + (str(uuid.uuid4()), c.id, "pending"), + ) + candidates.append(c) + self.conn.commit() + return candidates + + def list_candidates(self, project_id: str) -> list[Candidate]: + rows = self.conn.execute( + "SELECT * FROM candidates WHERE project_id=? ORDER BY rank ASC", + (project_id,), + ).fetchall() + return [Candidate( + id=r["id"], project_id=r["project_id"], + start_sec=r["start_sec"], end_sec=r["end_sec"], + score=r["score"], hook=r["hook"], rationale=r["rationale"], + rank=r["rank"], selected=bool(r["selected"]), + ) for r in rows] + + def get_candidate_with_project(self, candidate_id: str + ) -> tuple[Candidate, Project] | None: + r = self.conn.execute(""" + SELECT candidates.*, projects.id as pid, projects.name, + projects.source_path, projects.source_duration, + projects.status, projects.transcription_mode, + projects.created_at, projects.updated_at + FROM candidates INNER JOIN projects ON projects.id = candidates.project_id + WHERE candidates.id=? + """, (candidate_id,)).fetchone() + if not r: + return None + c = Candidate(id=r["id"], project_id=r["project_id"], + start_sec=r["start_sec"], end_sec=r["end_sec"], + score=r["score"], hook=r["hook"], rationale=r["rationale"], + rank=r["rank"], selected=bool(r["selected"])) + p = Project(id=r["pid"], name=r["name"], source_path=r["source_path"], + source_duration=r["source_duration"], status=r["status"], + transcription_mode=r["transcription_mode"], + created_at=r["created_at"], updated_at=r["updated_at"]) + return c, p + + def update_clip_for_candidate(self, candidate_id: str, status: str, + output_path: str | None = None, + render_log: str | None = None): + self.conn.execute( + "UPDATE clips SET status=?, output_path=COALESCE(?, output_path), render_log=COALESCE(?, render_log) WHERE candidate_id=?", + (status, output_path, render_log, candidate_id), + ) + self.conn.commit() + + def set_selected_clip_count(self, project_id: str, count: int) -> list[Candidate]: + self.conn.execute( + "UPDATE candidates SET selected = CASE WHEN rank <= ? THEN 1 ELSE 0 END WHERE project_id=?", + (count, project_id), + ) + self.conn.commit() + return self.list_candidates(project_id) + + def project_detail(self, project_id: str) -> ProjectDetail | None: + project = self.get_project(project_id) + if not project: + return None + transcript = self.latest_transcript(project_id) + candidates = self.list_candidates(project_id) + clips = self.conn.execute(""" + SELECT clips.* FROM clips + INNER JOIN candidates ON candidates.id = clips.candidate_id + WHERE candidates.project_id=? ORDER BY candidates.rank ASC + """, (project_id,)).fetchall() + copy = self.conn.execute(""" + SELECT clip_copy.* FROM clip_copy + INNER JOIN clips ON clips.id = clip_copy.clip_id + INNER JOIN candidates ON candidates.id = clips.candidate_id + WHERE candidates.project_id=? + """, (project_id,)).fetchall() + return ProjectDetail( + project=project, transcript=transcript, + candidates=candidates, + clips=[Clip( + id=r["id"], candidate_id=r["candidate_id"], + status=r["status"], output_path=r["output_path"], + face_track_json=r["face_track_json"], + caption_ass_path=r["caption_ass_path"], + render_log=r["render_log"], + ) for r in clips], + copy=[ClipCopy( + id=r["id"], clip_id=r["clip_id"], platform=r["platform"], + hook_text=r["hook_text"], caption_text=r["caption_text"], + hashtags=r["hashtags"], + ) for r in copy], + ) + + def delete_project(self, project_id: str): + self.conn.execute("DELETE FROM projects WHERE id=?", (project_id,)) + self.conn.commit() + + def rename_project(self, project_id: str, name: str): + self.conn.execute( + "UPDATE projects SET name=?, updated_at=? WHERE id=?", + (name, self._now(), project_id), + ) + self.conn.commit() diff --git a/backend/llm.py b/backend/llm.py new file mode 100644 index 0000000..8fbc1c9 --- /dev/null +++ b/backend/llm.py @@ -0,0 +1,68 @@ +import json +import os +import httpx +from .models import NormalizedTranscript, CandidateDraft + +SYSTEM_PROMPT = """You are a viral clip analyst. Given a transcript with word-level timing, identify the top short-form clip candidates (5-30 seconds each) that would perform well as vertical shorts/reels. For each candidate: + +- `start` / `end` in seconds +- `score` 0.0-1.0 (viral potential) +- `hook` — one-line hook title +- `rationale` — why this moment works + +Return ONLY a JSON array of objects with those keys. No markdown, no explanation.""" + + +def _format_transcript(t: NormalizedTranscript) -> str: + lines = [] + for seg in t.segments: + speaker = f"[{seg.speaker}] " if seg.speaker else "" + ts = f"{seg.start:.1f}s-{seg.end:.1f}s" + lines.append(f"{ts} {speaker}{seg.text}") + return "\n".join(lines) + + +async def detect_candidates_with_deepseek( + transcript: NormalizedTranscript, api_key: str +) -> list[CandidateDraft]: + text = _format_transcript(transcript) + async with httpx.AsyncClient(timeout=120) as client: + r = await client.post( + "https://api.deepseek.com/chat/completions", + headers={"Authorization": f"Bearer {api_key}"}, + json={ + "model": "deepseek-chat", + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": text}, + ], + "response_format": {"type": "json_object"}, + }, + ) + data = r.json() + content = data["choices"][0]["message"]["content"] + return [CandidateDraft(**c) for c in json.loads(content)] + + +async def detect_candidates_with_claude( + transcript: NormalizedTranscript, api_key: str +) -> list[CandidateDraft]: + text = _format_transcript(transcript) + async with httpx.AsyncClient(timeout=120) as client: + r = await client.post( + "https://api.anthropic.com/v1/messages", + headers={ + "x-api-key": api_key, + "anthropic-version": "2023-06-01", + "content-type": "application/json", + }, + json={ + "model": "claude-sonnet-4-20250514", + "max_tokens": 4096, + "system": SYSTEM_PROMPT, + "messages": [{"role": "user", "content": text}], + }, + ) + data = r.json() + content = data["content"][0]["text"] + return [CandidateDraft(**c) for c in json.loads(content)] diff --git a/backend/media.py b/backend/media.py new file mode 100644 index 0000000..082510f --- /dev/null +++ b/backend/media.py @@ -0,0 +1,73 @@ +import subprocess +import shutil +import json +from pathlib import Path +from .models import MediaProbe + + +def command_exists(name: str) -> bool: + return shutil.which(name) is not None + + +def probe_media(path: str) -> MediaProbe: + if not command_exists("ffprobe"): + raise RuntimeError("ffprobe not found on PATH") + r = subprocess.run( + ["ffprobe", "-v", "error", "-print_format", "json", + "-show_format", "-show_streams", path], + capture_output=True, text=True + ) + if r.returncode != 0: + raise RuntimeError(f"ffprobe failed: {r.stderr.strip()}") + data = json.loads(r.stdout) + streams = data.get("streams", []) + video = next((s for s in streams if s.get("codec_type") == "video"), None) + audio = next((s for s in streams if s.get("codec_type") == "audio"), None) + duration = data.get("format", {}).get("duration") + return MediaProbe( + duration_sec=float(duration) if duration else None, + has_video=video is not None, + width=video.get("width") if video else None, + height=video.get("height") if video else None, + video_codec=video.get("codec_name") if video else None, + audio_codec=audio.get("codec_name") if audio else None, + ) + + +def extract_audio(source_path: str, project_dir: Path) -> Path: + if not command_exists("ffmpeg"): + raise RuntimeError("ffmpeg not found on PATH") + project_dir.mkdir(parents=True, exist_ok=True) + output = project_dir / "transcription_audio.wav" + r = subprocess.run( + ["ffmpeg", "-y", "-i", source_path, + "-vn", "-ac", "1", "-ar", "16000", str(output)], + capture_output=True, text=True + ) + if r.returncode != 0: + raise RuntimeError(f"audio extraction failed: {r.stderr.strip()}") + return output + + +def render_flat_clip(source_path: str, start_sec: float, + end_sec: float, output_path: Path) -> Path: + if not command_exists("ffmpeg"): + raise RuntimeError("ffmpeg not found on PATH") + output_path.parent.mkdir(parents=True, exist_ok=True) + try: + probe = probe_media(source_path) + has_video = probe.has_video + except Exception: + has_video = False + cmd = ["ffmpeg", "-y", "-i", source_path, + "-ss", f"{start_sec:.3f}", "-to", f"{end_sec:.3f}"] + if has_video: + cmd += ["-vf", "crop=w='2*trunc(min(iw,ih*9/16)/2)':h='2*trunc(min(ih,iw*16/9)/2)'", + "-c:v", "libx264", "-preset", "fast", "-crf", "18", "-pix_fmt", "yuv420p"] + else: + cmd += ["-vn"] + cmd += ["-c:a", "aac", "-b:a", "192k", str(output_path)] + r = subprocess.run(cmd, capture_output=True, text=True) + if r.returncode != 0: + raise RuntimeError(f"render failed: {r.stderr.strip()}") + return output_path diff --git a/backend/models.py b/backend/models.py new file mode 100644 index 0000000..af1fbb3 --- /dev/null +++ b/backend/models.py @@ -0,0 +1,138 @@ +import json +from dataclasses import dataclass, asdict, field +from typing import Optional + + +def _camel(s: str) -> str: + first, *rest = s.split("_") + return first + "".join(w.capitalize() for w in rest) + + +def dumps(obj): + return json.dumps(obj, default=_serialize, ensure_ascii=False) + + +def _serialize(o): + if hasattr(o, "__dataclass_fields__"): + return {_camel(k): v for k, v in asdict(o).items()} + return str(o) + + +@dataclass +class EnvironmentStatus: + data_dir: str = "" + has_ffmpeg: bool = False + has_ffprobe: bool = False + has_deepgram_key: bool = False + has_anthropic_key: bool = False + has_deepseek_key: bool = False + llm_provider: str = "deepseek" + + +@dataclass +class MediaProbe: + duration_sec: Optional[float] = None + has_video: bool = False + width: Optional[int] = None + height: Optional[int] = None + video_codec: Optional[str] = None + audio_codec: Optional[str] = None + + +@dataclass +class Project: + id: str = "" + name: Optional[str] = None + source_path: str = "" + source_duration: Optional[float] = None + status: str = "ingest" + transcription_mode: str = "" + created_at: str = "" + updated_at: str = "" + + +@dataclass +class Transcript: + id: str = "" + project_id: str = "" + engine: str = "" + raw_json: str = "" + language: Optional[str] = None + created_at: str = "" + + +@dataclass +class Candidate: + id: str = "" + project_id: str = "" + start_sec: float = 0.0 + end_sec: float = 0.0 + score: float = 0.0 + hook: str = "" + rationale: str = "" + rank: int = 0 + selected: bool = False + + +@dataclass +class Clip: + id: str = "" + candidate_id: str = "" + status: str = "pending" + output_path: Optional[str] = None + face_track_json: Optional[str] = None + caption_ass_path: Optional[str] = None + render_log: Optional[str] = None + + +@dataclass +class ClipCopy: + id: str = "" + clip_id: str = "" + platform: str = "" + hook_text: Optional[str] = None + caption_text: Optional[str] = None + hashtags: Optional[str] = None + + +@dataclass +class ProjectDetail: + project: Optional[Project] = None + transcript: Optional[Transcript] = None + candidates: list[Candidate] = field(default_factory=list) + clips: list[Clip] = field(default_factory=list) + copy: list[ClipCopy] = field(default_factory=list) + + +@dataclass +class TranscriptWord: + text: str = "" + start: float = 0.0 + end: float = 0.0 + speaker: Optional[str] = None + + +@dataclass +class TranscriptSegment: + start: float = 0.0 + end: float = 0.0 + speaker: Optional[str] = None + text: str = "" + + +@dataclass +class NormalizedTranscript: + language: str = "en" + duration: float = 0.0 + speakers: list[str] = field(default_factory=lambda: ["A"]) + words: list[TranscriptWord] = field(default_factory=list) + segments: list[TranscriptSegment] = field(default_factory=list) + + +@dataclass +class CandidateDraft: + start: float = 0.0 + end: float = 0.0 + score: float = 0.0 + hook: str = "" + rationale: str = "" diff --git a/backend/transcription.py b/backend/transcription.py new file mode 100644 index 0000000..09dd46c --- /dev/null +++ b/backend/transcription.py @@ -0,0 +1,59 @@ +import httpx +from .models import NormalizedTranscript, TranscriptWord, TranscriptSegment + + +async def transcribe_deepgram(audio_path: str, api_key: str) -> NormalizedTranscript: + with open(audio_path, "rb") as f: + audio_data = f.read() + async with httpx.AsyncClient(timeout=300) as client: + r = await client.post( + "https://api.deepgram.com/v1/listen?model=nova-2&punctuate=true&utterances=true&language=en", + headers={ + "Authorization": f"Token {api_key}", + "Content-Type": "audio/wav", + }, + content=audio_data, + ) + data = r.json() + results = data["results"] + channels = results["channels"][0] + alternatives = channels["alternatives"][0] + words = [] + for w in alternatives.get("words", []): + words.append(TranscriptWord( + text=w["word"], + start=w["start"], + end=w["end"], + speaker=w.get("speaker"), + )) + segments = build_segments(words) + return NormalizedTranscript( + language=alternatives.get("language", "en"), + duration=alternatives.get("duration", 0.0) or ( + words[-1].end if words else 0.0 + ), + speakers=list({s.speaker for s in segments if s.speaker}), + words=words, + segments=segments, + ) + + +def build_segments(words: list[TranscriptWord]) -> list[TranscriptSegment]: + if not words: + return [] + segments = [] + current = TranscriptSegment( + start=words[0].start, end=words[0].end, + speaker=words[0].speaker, text=words[0].text + ) + for w in words[1:]: + if w.speaker != current.speaker or w.start - current.end > 2.0: + segments.append(current) + current = TranscriptSegment( + start=w.start, end=w.end, speaker=w.speaker, text=w.text + ) + else: + current.end = w.end + current.text += " " + w.text + segments.append(current) + return segments diff --git a/main.py b/main.py new file mode 100644 index 0000000..c7f9491 --- /dev/null +++ b/main.py @@ -0,0 +1,62 @@ +""" +AutoShorts — Python Desktop App +Entry point. Runs Flask backend + pywebview frontend window. +""" +import os +import sys +import threading +import webview + +# Ensure backend is importable from project root +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +FLASK_PORT = 17999 + + +class Api: + """Bridge exposed to JS via pywebview.""" + + def open_file_dialog(self): + import webview as wv + result = wv.windows[0].create_file_dialog( + wv.OPEN_DIALOG, + allow_multiple=False, + file_types=("Media Files (*.mp4;*.mov;*.mp3;*.wav;*.m4a)",), + ) + if result: + return result[0] + return None + + +def start_flask(): + from backend.app import app + app.run(host="127.0.0.1", port=FLASK_PORT, debug=False, use_reloader=False) + + +if __name__ == "__main__": + t = threading.Thread(target=start_flask, daemon=True) + t.start() + + # Serve from localhost — assumes `npm run build` has populated dist/ + dist_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "dist") + index_url = f"http://127.0.0.1:{FLASK_PORT}/api/environment-status" + + # Wait for Flask to be ready + import urllib.request + import time + for _ in range(30): + try: + urllib.request.urlopen(index_url, timeout=1) + break + except Exception: + time.sleep(0.5) + + window = webview.create_window( + "AutoShorts", + f"http://127.0.0.1:{FLASK_PORT}", + width=1280, + height=840, + min_size=(1040, 700), + js_api=Api(), + ) + webview.start() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..555a0b8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +flask>=3.0 +pywebview>=5.0 +httpx>=0.27 diff --git a/src/api.ts b/src/api.ts new file mode 100644 index 0000000..77cbb88 --- /dev/null +++ b/src/api.ts @@ -0,0 +1,135 @@ +const BASE = "http://127.0.0.1:17999/api"; + +async function request(method: string, path: string, body?: unknown): Promise { + const res = await fetch(`${BASE}${path}`, { + method, + headers: body ? { "Content-Type": "application/json" } : undefined, + body: body ? JSON.stringify(body) : undefined, + }); + const data = await res.json(); + if (!res.ok) throw new Error(data.error || String(res.status)); + return data as T; +} + +export async function getEnvironmentStatus() { + return request<{ + dataDir: string; + hasFfmpeg: boolean; + hasFfprobe: boolean; + hasDeepgramKey: boolean; + hasAnthropicKey: boolean; + hasDeepseekKey: boolean; + llmProvider: string; + }>("GET", "/environment-status"); +} + +export async function listProjects() { + return request<{ + id: string; + name: string | null; + sourcePath: string; + sourceDuration: number | null; + status: string; + transcriptionMode: string; + createdAt: string; + updatedAt: string; + }[]>("GET", "/projects"); +} + +export async function createProject(path: string) { + return request<{ + id: string; + name: string | null; + sourcePath: string; + sourceDuration: number | null; + status: string; + transcriptionMode: string; + createdAt: string; + updatedAt: string; + }>("POST", "/projects", { path, transcriptionMode: "deepgram" }); +} + +export async function importFileDialog(path: string) { + return request<{ + id: string; + name: string | null; + sourcePath: string; + sourceDuration: number | null; + status: string; + transcriptionMode: string; + createdAt: string; + updatedAt: string; + }>("POST", "/import-file", { path }); +} + +export async function getProjectDetail(projectId: string) { + return request<{ + project: unknown; + transcript: unknown | null; + candidates: unknown[]; + clips: unknown[]; + }>("GET", `/projects/${projectId}/detail`); +} + +export async function transcribeProject( + projectId: string, + apiKey?: string | null +) { + return request("POST", `/projects/${projectId}/transcribe`, { + provider: "deepgram", + apiKey: apiKey || null, + }); +} + +export async function generateCandidates( + projectId: string, + apiKey?: string | null +) { + return request("POST", `/projects/${projectId}/candidates`, { + apiKey: apiKey || null, + }); +} + +export async function setSelectedClipCount( + projectId: string, + count: number +) { + return request("POST", `/projects/${projectId}/select`, { count }); +} + +export async function renderClip(candidateId: string) { + return request<{ outputPath: string }>( + "POST", + `/candidates/${candidateId}/render` + ); +} + +export async function deleteProject(projectId: string) { + return request("DELETE", `/projects/${projectId}`); +} + +export async function renameProject(projectId: string, name: string) { + return request("POST", `/projects/${projectId}/rename`, { name }); +} + +export async function openFileDialog(): Promise { + if ((window as any).pywebview?.api?.open_file_dialog) { + return (window as any).pywebview.api.open_file_dialog(); + } + return new Promise((resolve) => { + const input = document.createElement("input"); + input.type = "file"; + input.accept = ".mp4,.mov,.mp3,.wav,.m4a"; + input.onchange = () => { + const file = input.files?.[0]; + if (file) { + const reader = new FileReader(); + reader.onload = () => resolve(file.name); + reader.readAsDataURL(file); + } else { + resolve(null); + } + }; + input.click(); + }); +} diff --git a/src/main.tsx b/src/main.tsx index b0e1eea..64ce7a1 100644 --- a/src/main.tsx +++ b/src/main.tsx @@ -1,7 +1,6 @@ import React, { useEffect, useMemo, useState } from "react"; import { createRoot } from "react-dom/client"; -import { invoke } from "@tauri-apps/api/core"; -import { open } from "@tauri-apps/plugin-dialog"; +import * as api from "./api"; import { AudioLines, BadgeCheck, @@ -144,15 +143,15 @@ function App() { async function refresh(nextProjectId?: string) { setError(null); const [env, projectList] = await Promise.all([ - invoke("environment_status"), - invoke("list_projects"), + api.getEnvironmentStatus(), + api.listProjects(), ]); setEnvironment(env); setProjects(projectList); if (nextProjectId) { - const nextDetail = await invoke("get_project_detail", { projectId: nextProjectId }); - setDetail(nextDetail); + const nextDetail = await api.getProjectDetail(nextProjectId); + setDetail(nextDetail as any); } else { setDetail(null); } @@ -173,20 +172,9 @@ function App() { async function importMedia() { let newProjectId: string | null = null; await run("import", async () => { - const selected = await open({ - multiple: false, - filters: [ - { - name: "Media", - extensions: ["mp4", "mov", "mp3", "wav", "m4a"], - }, - ], - }); - if (typeof selected !== "string") return; - const project = await invoke("create_project_from_path", { - path: selected, - transcriptionMode: "cloud", - }); + const selected = await api.openFileDialog(); + if (!selected) return; + const project = await api.createProject(selected); newProjectId = project.id; await refresh(project.id); }); @@ -198,7 +186,7 @@ function App() { async function runAutoPipeline(projectId: string) { setError(null); - const env = await invoke("environment_status"); + const env = await api.getEnvironmentStatus(); const hasDG = env.hasDeepgramKey || deepgramKey.trim().length > 0; const activeLlm = env.llmProvider || "deepseek"; const hasActiveLlm = activeLlm === "claude" @@ -213,11 +201,7 @@ function App() { // 1. Transcription try { setBusy("transcribe"); - await invoke("transcribe_project", { - projectId, - provider: "deepgram", - apiKey: deepgramKey.trim() || null, - }); + await api.transcribeProject(projectId, deepgramKey.trim() || null); await refresh(projectId); } catch (err) { setError(err instanceof Error ? err.message : String(err)); @@ -235,11 +219,7 @@ function App() { try { setBusy("moments"); const activeKey = activeLlm === "claude" ? anthropicKey.trim() : deepseekKey.trim(); - await invoke("generate_candidates", { - projectId, - apiKey: activeKey || null, - allowDemo: false, - }); + await api.generateCandidates(projectId, activeKey || null); await refresh(projectId); } catch (err) { setError(err instanceof Error ? err.message : String(err)); @@ -258,7 +238,7 @@ function App() { if (!trimmed) return; try { - await invoke("rename_project", { projectId, name: trimmed }); + await api.renameProject(projectId, trimmed); await refresh(detail?.project.id); } catch (err) { setError(err instanceof Error ? err.message : String(err)); @@ -272,7 +252,7 @@ function App() { if (!window.confirm(`Are you sure you want to delete the project "${name}"?`)) return; try { - await invoke("delete_project", { projectId }); + await api.deleteProject(projectId); const nextActiveId = detail?.project.id === projectId ? null : detail?.project.id; await refresh(nextActiveId ?? undefined); } catch (err) { @@ -282,19 +262,15 @@ function App() { async function selectProject(projectId: string) { await run("idle", async () => { - const nextDetail = await invoke("get_project_detail", { projectId }); - setDetail(nextDetail); + const nextDetail = await api.getProjectDetail(projectId); + setDetail(nextDetail as any); }); } async function transcribe() { if (!detail) return; await run("transcribe", async () => { - await invoke("transcribe_project", { - projectId: detail.project.id, - provider: "deepgram", - apiKey: deepgramKey.trim() || null, - }); + await api.transcribeProject(detail.project.id, deepgramKey.trim() || null); await refresh(detail.project.id); }); } @@ -303,11 +279,7 @@ function App() { if (!detail) return; await run("moments", async () => { const activeKey = activeLlmProvider === "claude" ? anthropicKey.trim() : deepseekKey.trim(); - await invoke("generate_candidates", { - projectId: detail.project.id, - apiKey: activeKey || null, - allowDemo, - }); + await api.generateCandidates(detail.project.id, activeKey || null); await refresh(detail.project.id); }); } @@ -315,18 +287,15 @@ function App() { async function updateClipCount(count: number) { if (!detail) return; await run("clipCount", async () => { - const candidates = await invoke("set_selected_clip_count", { - projectId: detail.project.id, - count, - }); - setDetail({ ...detail, candidates }); + const candidates = await api.setSelectedClipCount(detail.project.id, count); + setDetail({ ...detail, candidates: candidates as any }); }); } async function cutCandidate(candidateId: string) { if (!detail) return; await run("cut", async () => { - await invoke("render_flat_clip_for_candidate", { candidateId }); + await api.renderClip(candidateId); await refresh(detail.project.id); }); } @@ -335,7 +304,7 @@ function App() { if (!detail) return; await run("cut", async () => { for (const candidate of selectedCandidates) { - await invoke("render_flat_clip_for_candidate", { candidateId: candidate.id }); + await api.renderClip(candidate.id); } await refresh(detail.project.id); });