11from __future__ import annotations
22
33import asyncio
4+ from dataclasses import dataclass
5+ import sys
46import uuid
57from pathlib import Path
8+ from urllib .parse import unquote , urlparse
69
710import httpx
811
912
13+ _DIRECT_MEDIA_EXTENSIONS = {
14+ ".aac" ,
15+ ".flac" ,
16+ ".m4a" ,
17+ ".mka" ,
18+ ".mp3" ,
19+ ".mp4" ,
20+ ".ogg" ,
21+ ".opus" ,
22+ ".wav" ,
23+ ".webm" ,
24+ }
25+
26+
27+ @dataclass (slots = True )
28+ class PreparedAudio :
29+ audio_path : Path
30+ title : str | None = None
31+
32+
1033async def ensure_storage_dirs (storage_dir : str ) -> None :
1134 base = Path (storage_dir )
1235 (base / "raw" ).mkdir (parents = True , exist_ok = True )
@@ -24,12 +47,17 @@ async def download_to_file(client: httpx.AsyncClient, url: str, dest: Path) -> N
2447
2548async def extract_audio_ffmpeg (input_path : Path , output_path : Path ) -> None :
2649 proc = await asyncio .create_subprocess_exec (
27- "ffmpeg" , "-y" ,
28- "-i" , str (input_path ),
50+ "ffmpeg" ,
51+ "-y" ,
52+ "-i" ,
53+ str (input_path ),
2954 "-vn" ,
30- "-acodec" , "pcm_s16le" ,
31- "-ar" , "16000" ,
32- "-ac" , "1" ,
55+ "-acodec" ,
56+ "pcm_s16le" ,
57+ "-ar" ,
58+ "16000" ,
59+ "-ac" ,
60+ "1" ,
3361 str (output_path ),
3462 stdout = asyncio .subprocess .DEVNULL ,
3563 stderr = asyncio .subprocess .PIPE ,
@@ -43,24 +71,125 @@ def _is_url(value: str) -> bool:
4371 return value .startswith ("http://" ) or value .startswith ("https://" )
4472
4573
74+ def _url_suffix (url : str ) -> str :
75+ return Path (urlparse (url ).path ).suffix .lower ()
76+
77+
78+ def _looks_like_direct_media_url (url : str ) -> bool :
79+ return _url_suffix (url ) in _DIRECT_MEDIA_EXTENSIONS
80+
81+
82+ def _title_from_url (url : str ) -> str | None :
83+ stem = Path (unquote (urlparse (url ).path )).stem .strip ()
84+ return stem or None
85+
86+
87+ def _title_from_path (path : Path ) -> str | None :
88+ stem = path .stem .strip ()
89+ return stem or None
90+
91+
92+ async def download_with_ytdlp (
93+ url : str ,
94+ raw_dir : Path ,
95+ file_id : uuid .UUID ,
96+ ) -> tuple [Path , str | None ]:
97+ raw_dir .mkdir (parents = True , exist_ok = True )
98+ output_template = str (raw_dir / f"{ file_id } .%(ext)s" )
99+
100+ proc = await asyncio .create_subprocess_exec (
101+ sys .executable ,
102+ "-m" ,
103+ "yt_dlp" ,
104+ "--no-playlist" ,
105+ "--no-warnings" ,
106+ "-f" ,
107+ "bestaudio/best" ,
108+ "--output" ,
109+ output_template ,
110+ "--print" ,
111+ "title:%(title)s" ,
112+ "--print" ,
113+ "after_move:filepath:%(filepath)s" ,
114+ url ,
115+ stdout = asyncio .subprocess .PIPE ,
116+ stderr = asyncio .subprocess .PIPE ,
117+ )
118+ stdout , stderr = await proc .communicate ()
119+ if proc .returncode != 0 :
120+ err = stderr .decode (errors = "replace" ).strip ()
121+ raise RuntimeError (f"yt-dlp failed: { err } " )
122+
123+ out_text = stdout .decode (errors = "replace" )
124+ downloaded_path : Path | None = None
125+ title : str | None = None
126+ for raw_line in out_text .splitlines ():
127+ line = raw_line .strip ()
128+ if not line :
129+ continue
130+ if line .startswith ("title:" ):
131+ value = line .removeprefix ("title:" ).strip ()
132+ if value :
133+ title = value
134+ continue
135+ if line .startswith ("filepath:" ):
136+ value = line .removeprefix ("filepath:" ).strip ()
137+ p = Path (value )
138+ if p .exists ():
139+ downloaded_path = p
140+ continue
141+
142+ p = Path (line )
143+ if p .exists ():
144+ downloaded_path = p
145+
146+ if downloaded_path :
147+ return downloaded_path , title
148+
149+ matches = sorted (
150+ raw_dir .glob (f"{ file_id } .*" ), key = lambda p : p .stat ().st_mtime , reverse = True
151+ )
152+ if matches :
153+ return matches [0 ], title
154+
155+ raise RuntimeError ("yt-dlp completed but no media file was found" )
156+
157+
46158async def prepare_audio (
47159 client : httpx .AsyncClient ,
48160 source : str ,
49161 storage_dir : str ,
50- ) -> Path :
162+ ) -> PreparedAudio :
51163 await ensure_storage_dirs (storage_dir )
52164 base = Path (storage_dir )
53165 file_id = uuid .uuid4 ()
166+ inferred_title : str | None = None
54167
55168 if _is_url (source ):
56- raw_path = base / "raw" / str (file_id )
57- await download_to_file (client , source , raw_path )
58- input_path = raw_path
169+ input_path : Path | None = None
170+
171+ if _looks_like_direct_media_url (source ):
172+ raw_path = base / "raw" / f"{ file_id } { _url_suffix (source )} "
173+ try :
174+ await download_to_file (client , source , raw_path )
175+ input_path = raw_path
176+ inferred_title = _title_from_url (source )
177+ except Exception :
178+ input_path = None
179+
180+ if input_path is None :
181+ input_path , inferred_title = await download_with_ytdlp (
182+ source , base / "raw" , file_id
183+ )
184+
185+ if not inferred_title :
186+ inferred_title = _title_from_url (source )
59187 else :
60188 input_path = Path (source )
61189 if not input_path .exists ():
62190 raise FileNotFoundError (f"Source file not found: { source } " )
191+ inferred_title = _title_from_path (input_path )
63192
64193 output_path = base / "audio" / f"{ file_id } .wav"
65194 await extract_audio_ffmpeg (input_path , output_path )
66- return output_path
195+ return PreparedAudio ( audio_path = output_path , title = inferred_title )
0 commit comments