-
Notifications
You must be signed in to change notification settings - Fork 81
Expand file tree
/
Copy pathsample_inventory.py
More file actions
90 lines (75 loc) · 2.23 KB
/
sample_inventory.py
File metadata and controls
90 lines (75 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import json
import subprocess
import hashlib
from pathlib import Path
def _run(cmd):
try:
return subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=30,
check=False
)
except (OSError, subprocess.SubprocessError):
return None
def sha256sum(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()
def probe_sample(sample_path: Path) -> dict:
result = {
"path": str(sample_path),
"sha256": sha256sum(sample_path),
"container": None,
"streams": [],
"caption_types_detected": [],
"duration_sec": None,
}
# ---- ffprobe ----
ffprobe = _run([
"ffprobe",
"-v", "error",
"-show_format",
"-show_streams",
"-print_format", "json",
str(sample_path)
])
if ffprobe and ffprobe.returncode == 0:
try:
meta = json.loads(ffprobe.stdout)
fmt = meta.get("format", {})
result["container"] = fmt.get("format_name")
if "duration" in fmt:
result["duration_sec"] = float(fmt["duration"])
for s in meta.get("streams", []):
result["streams"].append({
"type": s.get("codec_type"),
"codec": s.get("codec_name")
})
except (ValueError, KeyError):
pass
# ---- CCExtractor ----
cce = _run([
"ccextractor",
str(sample_path),
"-stdout"
])
if cce and cce.returncode == 0:
stderr = (cce.stderr or "").lower()
if "608" in stderr:
result["caption_types_detected"].append("CEA-608")
if "708" in stderr:
result["caption_types_detected"].append("CEA-708")
if "dvb" in stderr:
result["caption_types_detected"].append("DVB")
return result
def inventory_samples(sample_root: Path) -> list:
inventory = []
for p in sample_root.rglob("*"):
if p.is_file():
inventory.append(probe_sample(p))
return inventory