Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 5 additions & 15 deletions app/audio_capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import numpy as np
import sounddevice as sd

from app.audio_utils import resolve_default_input_device


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -50,12 +52,13 @@ def start(self) -> None:
return

self.flush()
self._stream = self._create_stream(self.device)
device = self.device if self.device is not None else resolve_default_input_device()
self._stream = self._create_stream(device)
try:
self._stream.start()
except Exception:
self._stream.close()
self._stream = self._create_stream(self._fallback_device())
self._stream = self._create_stream(resolve_default_input_device())
self._stream.start()

self._running = True
Expand Down Expand Up @@ -100,19 +103,6 @@ def _create_stream(self, device: int | str | None) -> sd.RawInputStream:
logger.error(msg)
raise AudioCaptureError(msg) from exc

def _fallback_device(self) -> Optional[int]:
try:
devices = sd.query_devices()
for idx, info in enumerate(devices):
if info.get("max_input_channels", 0) > 0:
logger.warning(
"回退至输入设备 #%s (%s)", idx, info.get("name", "unknown")
)
return idx
except Exception as exc:
logger.error("查询音频设备失败: %s", exc)
return None

def _callback(self, in_data, frames, time, status): # type: ignore[override]
if status:
logger.warning("音频流状态: %s", status)
Expand Down
74 changes: 64 additions & 10 deletions app/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,63 @@
DEFAULT_NATIVE_SAMPLE_RATE = 44100


def load_audio_config() -> tuple[int | str | None, int]:
"""从配置文件加载音频设备配置
def resolve_default_input_device() -> int | None:
"""挑选用户实际的默认麦克风。

优先级:
1. ALSA "default" / "pulse" 这两个由 PipeWire/PulseAudio 注入的虚拟
PCM,跟随 wpctl/pavucontrol 选择的默认源。
2. PortAudio 自己认定的 default (sd.default.device[0])。
3. 第一个有输入通道的设备(兜底)。
"""
import sounddevice as sd

try:
devices = list(sd.query_devices())
except Exception as exc:
logger.warning("查询音频设备列表失败: %s", exc)
return None

for preferred in ("default", "pulse"):
for idx, info in enumerate(devices):
if info.get("name") == preferred and info.get("max_input_channels", 0) > 0:
logger.info("使用服务器虚拟设备 #%s (%s)", idx, preferred)
return idx

try:
pa_default = sd.default.device[0]
if pa_default is not None and pa_default >= 0:
info = devices[pa_default]
if info.get("max_input_channels", 0) > 0:
logger.info(
"使用 PortAudio 默认设备 #%s (%s)",
pa_default,
info.get("name", "unknown"),
)
return pa_default
except Exception:
pass

for idx, info in enumerate(devices):
if info.get("max_input_channels", 0) > 0:
logger.info("回退至输入设备 #%s (%s)", idx, info.get("name", "unknown"))
return idx

logger.warning("没有发现可用的音频输入设备")
return None


def load_audio_config() -> tuple[int | str | None, int | None]:
"""从配置文件加载音频设备配置。

Returns:
(device, sample_rate): 设备(可能为 None、整数 ID 或字符串名称)和采样率
(device, sample_rate): 没有配置文件时返回 (None, None),让调用方使用
服务器虚拟设备并直接请求 16 kHz;配置文件存在则按内容返回。
"""
config_file = Path.home() / ".config" / "vocotype" / "audio.conf"
if not config_file.exists():
logger.warning("音频配置文件不存在: %s,使用默认设备", config_file)
return None, DEFAULT_NATIVE_SAMPLE_RATE
logger.info("未找到 %s,使用系统默认输入设备", config_file)
return None, None

try:
import configparser
Expand All @@ -47,7 +94,7 @@ def load_audio_config() -> tuple[int | str | None, int]:
return device_id, sample_rate
except Exception as e:
logger.warning("读取音频配置失败: %s,使用默认设备", e)
return None, DEFAULT_NATIVE_SAMPLE_RATE
return None, None


def resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
Expand All @@ -63,7 +110,14 @@ def resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarra
"""
if orig_sr == target_sr:
return audio
duration = len(audio) / orig_sr
target_length = int(duration * target_sr)
indices = np.linspace(0, len(audio) - 1, target_length)
return np.interp(indices, np.arange(len(audio)), audio.astype(np.float32)).astype(np.int16)

import librosa

float_audio = audio.astype(np.float32) / 32768.0
resampled = librosa.resample(
float_audio,
orig_sr=orig_sr,
target_sr=target_sr,
res_type="soxr_hq",
)
return np.clip(resampled * 32768.0, -32768, 32767).astype(np.int16)
25 changes: 13 additions & 12 deletions fcitx5/backend/audio_recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ def discover_project_root() -> Path:
PROJECT_ROOT = discover_project_root()
sys.path.insert(0, str(PROJECT_ROOT))

from app.audio_utils import load_audio_config, resample_audio, SAMPLE_RATE
from app.audio_utils import (
load_audio_config,
resample_audio,
resolve_default_input_device,
SAMPLE_RATE,
)
from app.wave_writer import write_wav

logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
Expand Down Expand Up @@ -67,16 +72,7 @@ def _resolve_input_device(self):
except Exception as exc:
logger.warning("查询设备 %s 失败: %s", self.device, exc)

try:
devices = sd.query_devices()
for idx, info in enumerate(devices):
if info.get("max_input_channels", 0) > 0:
logger.info("回退至输入设备 #%s (%s)", idx, info.get("name", "unknown"))
return idx
except Exception as exc:
logger.warning("查询输入设备列表失败: %s", exc)

return None
return resolve_default_input_device()

def _resolve_sample_rate(self, device, preferred):
"""选择可用采样率"""
Expand Down Expand Up @@ -225,7 +221,12 @@ def main():
device = args.device if args.device is not None else configured_device
if isinstance(device, str) and device.isdigit():
device = int(device)
sample_rate = args.sample_rate if args.sample_rate != 44100 else configured_sr
# Ask the sound server for 16 kHz directly so PipeWire/PulseAudio resample
# with proper anti-aliasing. Honour an explicit configured rate if set.
if args.sample_rate != 44100:
sample_rate = args.sample_rate
else:
sample_rate = configured_sr if configured_sr else SAMPLE_RATE

# 录音
recorder = AudioRecorder(device, sample_rate)
Expand Down