Skip to content

Commit aad18a1

Browse files
committed
fix: batch 27 — 17 audit fixes for v1.4.0 new code
MCP server: job_id path injection blocked (regex validation), filepath traversal blocked (defense-in-depth), CSRF retry on 403 (backend restart), stale version fixed (uses __version__), JSON parse error response per spec SeamlessM4T: model loaded ONCE for all segments (was loading 2.3GB per-segment causing OOM). GPU cleanup in finally block. Transcript cache: only used for SRT/VTT/JSON export (not styled captions). Cached segments now have .start/.end/.text/.words attrs. Queue allowlist: added 4 missing v1.4.0 endpoints (ace-step, ai-denoise, ai-lut, lut-blend) TTS engine allowlist, ACE-Step result validation, BasicVSR++ missing weights error, LUT blend size mismatch error, Chatterbox sr fallback, ClearerVoice output validation, audio-separator stems filtering, waveform button type, PySceneDetect start boundary, Remotion template component, LUT name reserved word validation
1 parent 9e20b08 commit aad18a1

13 files changed

Lines changed: 170 additions & 43 deletions

File tree

extension/com.opencut.panel/client/main.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5268,6 +5268,7 @@
52685268
parent = parent.parentNode;
52695269
if (!parent || parent.querySelector(".waveform-audio-btn")) continue;
52705270
var btn = document.createElement("button");
5271+
btn.type = "button";
52715272
btn.className = "btn-outline btn-sm waveform-audio-btn";
52725273
btn.textContent = "Preview Waveform";
52735274
btn.style.marginBottom = "6px";

opencut/core/audio_enhance.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,9 @@ def enhance_speech_clearvoice(
348348
# Write result using the library's write method
349349
cv.write(result, output_path=output_path)
350350

351+
if not os.path.isfile(output_path) or os.path.getsize(output_path) == 0:
352+
raise RuntimeError("ClearerVoice produced empty or missing output file")
353+
351354
if on_progress:
352355
on_progress(100, "Audio enhanced!")
353356

opencut/core/lut_library.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -849,14 +849,18 @@ def _parse_cube(path):
849849
ba = vals_a[i][2] * (1 - blend) + vals_b[i][2] * blend
850850
f.write(f"{_clamp(ra):.6f} {_clamp(ga):.6f} {_clamp(ba):.6f}\n")
851851
else:
852-
# Sizes differ — generate identity-blended output
852+
raise ValueError(
853+
f"LUT size mismatch: {lut_a_name} has {len(vals_a)} entries, "
854+
f"{lut_b_name} has {len(vals_b)} entries (need {total_entries} for size {size}). "
855+
f"Both LUTs must have the same cube size for blending."
856+
)
857+
# Unreachable fallback
853858
for b_i in range(size):
854859
for g_i in range(size):
855860
for r_i in range(size):
856861
r = r_i / (size - 1)
857862
g = g_i / (size - 1)
858863
b = b_i / (size - 1)
859-
# Just write identity when sizes mismatch (safe fallback)
860864
f.write(f"{r:.6f} {g:.6f} {b:.6f}\n")
861865

862866
if on_progress:

opencut/core/motion_graphics.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,4 +421,28 @@ def _generate_default_template(template_dir: str, template_name: str):
421421
with open(os.path.join(template_dir, "package.json"), "w") as f:
422422
json.dump(package, f, indent=2)
423423

424+
# Create a minimal Main composition component
425+
comp_code = """import {Composition, useCurrentFrame, useVideoConfig, interpolate} from 'remotion';
426+
import React from 'react';
427+
428+
const Main = () => {
429+
const frame = useCurrentFrame();
430+
const {fps, durationInFrames, width, height} = useVideoConfig();
431+
const opacity = interpolate(frame, [0, fps * 0.5], [0, 1], {extrapolateRight: 'clamp'});
432+
const outOpacity = interpolate(frame, [durationInFrames - fps * 0.5, durationInFrames], [1, 0], {extrapolateLeft: 'clamp'});
433+
return (
434+
<div style={{width, height, display: 'flex', alignItems: 'center', justifyContent: 'center', background: '#1e1e2e', opacity: Math.min(opacity, outOpacity)}}>
435+
<h1 style={{color: '#cdd6f4', fontSize: 64, fontFamily: 'sans-serif', textAlign: 'center', padding: 40}}>Title</h1>
436+
</div>
437+
);
438+
};
439+
440+
export const RemotionRoot = () => (
441+
<Composition id="Main" component={Main} durationInFrames={150} fps={30} width={1920} height={1080} />
442+
);
443+
"""
444+
os.makedirs(os.path.join(template_dir, "src"), exist_ok=True)
445+
with open(os.path.join(template_dir, "src", "index.tsx"), "w") as f:
446+
f.write(comp_code)
447+
424448
logger.info("Generated default Remotion template: %s", template_dir)

opencut/core/music_ai.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,12 +310,20 @@ def generate_music_ace_step(
310310

311311
# Save output
312312
import soundfile as sf
313-
audio_data = result["audio"]
313+
if isinstance(result, dict):
314+
audio_data = result.get("audio")
315+
elif hasattr(result, "cpu"):
316+
audio_data = result # tensor returned directly
317+
else:
318+
raise RuntimeError(f"Unexpected ACE-Step result type: {type(result)}")
319+
if audio_data is None:
320+
raise RuntimeError("ACE-Step produced no audio output")
314321
if hasattr(audio_data, "cpu"):
315322
audio_data = audio_data.cpu().numpy()
316323
if audio_data.ndim == 2:
317324
audio_data = audio_data.T
318-
sf.write(output_path, audio_data, result.get("sample_rate", 44100))
325+
sr = result.get("sample_rate", 44100) if isinstance(result, dict) else 44100
326+
sf.write(output_path, audio_data, sr)
319327

320328
# Free GPU
321329
try:

opencut/core/scene_detect.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -502,15 +502,16 @@ def detect_scenes_pyscenedetect(
502502
if on_progress:
503503
on_progress(80, "Building scene boundaries...")
504504

505-
boundaries = []
505+
boundaries = [SceneBoundary(time=0.0, frame=0, score=1.0, label="Start")]
506506
for i, (start, end) in enumerate(scene_list):
507507
start_sec = start.get_seconds()
508-
boundaries.append(SceneBoundary(
509-
time=round(start_sec, 3),
510-
frame=start.get_frames(),
511-
score=1.0,
512-
label=f"Scene {i + 1}",
513-
))
508+
if start_sec > 0.01: # Skip if scene starts at 0 (duplicate of Start)
509+
boundaries.append(SceneBoundary(
510+
time=round(start_sec, 3),
511+
frame=start.get_frames(),
512+
score=1.0,
513+
label=f"Scene {i + 1}",
514+
))
514515

515516
total_scenes = len(boundaries)
516517
avg_scene = duration / total_scenes if total_scenes > 0 else duration

opencut/core/video_ai.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,11 @@ def _denoise_basicvsr(
486486
ckpt = torch.load(weights_path, map_location=device, weights_only=True)
487487
model.load_state_dict(ckpt.get("params", ckpt.get("params_ema", ckpt)), strict=False)
488488
else:
489-
logger.warning("BasicVSR++ weights not found at %s — using untrained model", weights_path)
489+
raise RuntimeError(
490+
f"BasicVSR++ weights not found at {weights_path}. "
491+
"Download from https://github.com/ckkelvinchan/BasicVSR_PlusPlus "
492+
"and place as ~/.opencut/models/basicvsrpp_denoise.pth"
493+
)
490494

491495
model.eval()
492496

opencut/core/voice_gen.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,8 @@ def chatterbox_generate(
365365
on_progress(80, "Saving audio...")
366366

367367
import torchaudio
368-
torchaudio.save(output_path, wav, model.sr)
368+
sample_rate = getattr(model, "sr", getattr(model, "sample_rate", 24000))
369+
torchaudio.save(output_path, wav, sample_rate)
369370

370371
# Free GPU memory
371372
try:

opencut/mcp_server.py

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,29 +15,37 @@
1515

1616
import json
1717
import logging
18+
import re
1819
import sys
1920
import urllib.error
2021
import urllib.request
2122

23+
from opencut import __version__
24+
2225
logger = logging.getLogger("opencut.mcp")
2326

2427
BACKEND_URL = "http://127.0.0.1:5679"
2528
_csrf_token = ""
2629

2730

31+
def _refresh_csrf():
32+
"""Fetch fresh CSRF token from backend."""
33+
global _csrf_token
34+
try:
35+
req = urllib.request.Request(f"{BACKEND_URL}/health")
36+
with urllib.request.urlopen(req, timeout=5) as resp:
37+
body = json.loads(resp.read())
38+
_csrf_token = body.get("csrf_token", "")
39+
except Exception:
40+
pass
41+
42+
2843
def _api(method, path, data=None):
2944
"""Call the OpenCut Flask backend."""
3045
global _csrf_token
3146

32-
# Get CSRF token if we don't have one
3347
if not _csrf_token:
34-
try:
35-
req = urllib.request.Request(f"{BACKEND_URL}/health")
36-
with urllib.request.urlopen(req, timeout=5) as resp:
37-
body = json.loads(resp.read())
38-
_csrf_token = body.get("csrf_token", "")
39-
except Exception:
40-
pass
48+
_refresh_csrf()
4149

4250
url = f"{BACKEND_URL}{path}"
4351
headers = {"Content-Type": "application/json"}
@@ -51,6 +59,16 @@ def _api(method, path, data=None):
5159
with urllib.request.urlopen(req, timeout=120) as resp:
5260
return json.loads(resp.read())
5361
except urllib.error.HTTPError as e:
62+
# Retry once on 403 (stale CSRF token after backend restart)
63+
if e.code == 403 and _csrf_token:
64+
_refresh_csrf()
65+
headers["X-OpenCut-Token"] = _csrf_token
66+
req2 = urllib.request.Request(url, data=body, headers=headers, method=method)
67+
try:
68+
with urllib.request.urlopen(req2, timeout=120) as resp2:
69+
return json.loads(resp2.read())
70+
except Exception:
71+
pass
5472
error_body = e.read().decode(errors="replace")
5573
try:
5674
return json.loads(error_body)
@@ -214,16 +232,34 @@ def _api(method, path, data=None):
214232
}
215233

216234

235+
def _validate_mcp_filepath(args, key="filepath"):
236+
"""Validate filepath arguments at MCP layer (defense-in-depth)."""
237+
path = args.get(key, "")
238+
if not isinstance(path, str):
239+
return False
240+
if ".." in path or "\x00" in path:
241+
return False
242+
return True
243+
244+
217245
def handle_tool_call(tool_name, arguments):
218246
"""Execute an MCP tool call by proxying to the Flask backend."""
219247
if tool_name not in _TOOL_ROUTES:
220248
return {"error": f"Unknown tool: {tool_name}"}
221249

250+
# Validate filepath arguments at MCP layer
251+
for key in ("filepath", "style_image", "voice_ref"):
252+
if key in arguments and not _validate_mcp_filepath(arguments, key):
253+
return {"error": f"Invalid {key}: path traversal or null bytes detected"}
254+
222255
method, path = _TOOL_ROUTES[tool_name]
223256

224257
# Handle special routing
225258
if tool_name == "opencut_job_status":
226259
job_id = arguments.get("job_id", "")
260+
# Validate job_id format (UUID hex + hyphens only)
261+
if not re.match(r'^[a-f0-9-]+$', job_id):
262+
return {"error": "Invalid job_id format"}
227263
path = f"/status/{job_id}"
228264
return _api("GET", path)
229265

@@ -255,6 +291,9 @@ def run_mcp_stdio():
255291
try:
256292
msg = json.loads(line)
257293
except json.JSONDecodeError:
294+
err = {"jsonrpc": "2.0", "id": None, "error": {"code": -32700, "message": "Parse error"}}
295+
sys.stdout.write(json.dumps(err) + "\n")
296+
sys.stdout.flush()
258297
continue
259298

260299
msg_id = msg.get("id")
@@ -270,7 +309,7 @@ def run_mcp_stdio():
270309
"capabilities": {"tools": {}},
271310
"serverInfo": {
272311
"name": "opencut",
273-
"version": "1.3.1",
312+
"version": __version__,
274313
},
275314
},
276315
}

opencut/routes/audio.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -538,9 +538,14 @@ def _process():
538538
output_files = separator.separate(input_audio)
539539

540540
output_paths = []
541+
# Filter to requested stems only
542+
requested = set(stems)
541543
for f in output_files:
542544
if os.path.isfile(f):
543-
output_paths.append(f)
545+
fname = os.path.splitext(os.path.basename(f))[0].lower()
546+
# Match if any requested stem appears in filename
547+
if not requested or any(s in fname for s in requested):
548+
output_paths.append(f)
544549

545550
if temp_audio and os.path.exists(temp_audio):
546551
try:
@@ -1124,6 +1129,8 @@ def tts_generate():
11241129
data = request.get_json(force=True)
11251130
text = data.get("text", "").strip()
11261131
engine = data.get("engine", "edge")
1132+
if engine not in ("edge", "kokoro", "chatterbox"):
1133+
engine = "edge"
11271134
voice = data.get("voice", "en-US-AriaNeural")
11281135
import re as _re_tts
11291136
rate = data.get("rate", "+0%")

0 commit comments

Comments
 (0)