From dff63d7d5e8ed104da2e5dd9ac81a15f569a2827 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Wed, 6 May 2026 19:01:13 +0800 Subject: [PATCH] audio: pick the server-virtual default mic and resample without aliasing On Linux, sounddevice (PortAudio) only sees ALSA devices. PortAudio has no native PipeWire/PulseAudio host API, so the only way to follow the user's default mic selection from wpctl/pavucontrol is to open the ALSA "default" or "pulse" virtual PCM that PipeWire/PulseAudio inject into ALSA. The previous fallback grabbed the first device with input channels, which on most desktops is a hardware capture card such as hw:0,0, not the one the user actually selected. Recording from the wrong jack and then linearly resampling 44.1 kHz to 16 kHz with np.interp (no anti-aliasing low-pass) produced the garbled output reported in issue #5. Three changes: 1. resolve_default_input_device() picks: explicit override -> ALSA "default" -> ALSA "pulse" -> sd.default.device[0] -> first input device. fcitx5/backend/audio_recorder.py and app/audio_capture.py both delegate to it. 2. Ask the sound server for 16 kHz directly. PipeWire and PulseAudio carry band-limited resamplers, so the client does not need to resample. load_audio_config() now returns (None, None) when no audio.conf is present, signalling "use server default rate", and audio_recorder defaults its target rate to SAMPLE_RATE. 3. Replace np.interp with librosa.resample(res_type="soxr_hq") for the client-side fallback. soxr_hq provides FIR low-pass plus polyphase filtering. librosa is already a runtime dependency. Signed-off-by: Huang Rui --- app/audio_capture.py | 20 +++------ app/audio_utils.py | 74 +++++++++++++++++++++++++++----- fcitx5/backend/audio_recorder.py | 25 +++++------ 3 files changed, 82 insertions(+), 37 deletions(-) diff --git a/app/audio_capture.py b/app/audio_capture.py index 821c054..5856409 100644 --- a/app/audio_capture.py +++ b/app/audio_capture.py @@ -10,6 +10,8 @@ import numpy as np import sounddevice as sd +from app.audio_utils import resolve_default_input_device + logger = logging.getLogger(__name__) @@ -50,12 +52,13 @@ def start(self) -> None: return self.flush() - self._stream = self._create_stream(self.device) + device = self.device if self.device is not None else resolve_default_input_device() + self._stream = self._create_stream(device) try: self._stream.start() except Exception: self._stream.close() - self._stream = self._create_stream(self._fallback_device()) + self._stream = self._create_stream(resolve_default_input_device()) self._stream.start() self._running = True @@ -100,19 +103,6 @@ def _create_stream(self, device: int | str | None) -> sd.RawInputStream: logger.error(msg) raise AudioCaptureError(msg) from exc - def _fallback_device(self) -> Optional[int]: - try: - devices = sd.query_devices() - for idx, info in enumerate(devices): - if info.get("max_input_channels", 0) > 0: - logger.warning( - "回退至输入设备 #%s (%s)", idx, info.get("name", "unknown") - ) - return idx - except Exception as exc: - logger.error("查询音频设备失败: %s", exc) - return None - def _callback(self, in_data, frames, time, status): # type: ignore[override] if status: logger.warning("音频流状态: %s", status) diff --git a/app/audio_utils.py b/app/audio_utils.py index bc9a4eb..fbeefb9 100644 --- a/app/audio_utils.py +++ b/app/audio_utils.py @@ -17,16 +17,63 @@ DEFAULT_NATIVE_SAMPLE_RATE = 44100 -def load_audio_config() -> tuple[int | str | None, int]: - """从配置文件加载音频设备配置 +def resolve_default_input_device() -> int | None: + """挑选用户实际的默认麦克风。 + + 优先级: + 1. ALSA "default" / "pulse" 这两个由 PipeWire/PulseAudio 注入的虚拟 + PCM,跟随 wpctl/pavucontrol 选择的默认源。 + 2. PortAudio 自己认定的 default (sd.default.device[0])。 + 3. 第一个有输入通道的设备(兜底)。 + """ + import sounddevice as sd + + try: + devices = list(sd.query_devices()) + except Exception as exc: + logger.warning("查询音频设备列表失败: %s", exc) + return None + + for preferred in ("default", "pulse"): + for idx, info in enumerate(devices): + if info.get("name") == preferred and info.get("max_input_channels", 0) > 0: + logger.info("使用服务器虚拟设备 #%s (%s)", idx, preferred) + return idx + + try: + pa_default = sd.default.device[0] + if pa_default is not None and pa_default >= 0: + info = devices[pa_default] + if info.get("max_input_channels", 0) > 0: + logger.info( + "使用 PortAudio 默认设备 #%s (%s)", + pa_default, + info.get("name", "unknown"), + ) + return pa_default + except Exception: + pass + + for idx, info in enumerate(devices): + if info.get("max_input_channels", 0) > 0: + logger.info("回退至输入设备 #%s (%s)", idx, info.get("name", "unknown")) + return idx + + logger.warning("没有发现可用的音频输入设备") + return None + + +def load_audio_config() -> tuple[int | str | None, int | None]: + """从配置文件加载音频设备配置。 Returns: - (device, sample_rate): 设备(可能为 None、整数 ID 或字符串名称)和采样率 + (device, sample_rate): 没有配置文件时返回 (None, None),让调用方使用 + 服务器虚拟设备并直接请求 16 kHz;配置文件存在则按内容返回。 """ config_file = Path.home() / ".config" / "vocotype" / "audio.conf" if not config_file.exists(): - logger.warning("音频配置文件不存在: %s,使用默认设备", config_file) - return None, DEFAULT_NATIVE_SAMPLE_RATE + logger.info("未找到 %s,使用系统默认输入设备", config_file) + return None, None try: import configparser @@ -47,7 +94,7 @@ def load_audio_config() -> tuple[int | str | None, int]: return device_id, sample_rate except Exception as e: logger.warning("读取音频配置失败: %s,使用默认设备", e) - return None, DEFAULT_NATIVE_SAMPLE_RATE + return None, None def resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray: @@ -63,7 +110,14 @@ def resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarra """ if orig_sr == target_sr: return audio - duration = len(audio) / orig_sr - target_length = int(duration * target_sr) - indices = np.linspace(0, len(audio) - 1, target_length) - return np.interp(indices, np.arange(len(audio)), audio.astype(np.float32)).astype(np.int16) + + import librosa + + float_audio = audio.astype(np.float32) / 32768.0 + resampled = librosa.resample( + float_audio, + orig_sr=orig_sr, + target_sr=target_sr, + res_type="soxr_hq", + ) + return np.clip(resampled * 32768.0, -32768, 32767).astype(np.int16) diff --git a/fcitx5/backend/audio_recorder.py b/fcitx5/backend/audio_recorder.py index 3014b9d..a54bbb1 100755 --- a/fcitx5/backend/audio_recorder.py +++ b/fcitx5/backend/audio_recorder.py @@ -38,7 +38,12 @@ def discover_project_root() -> Path: PROJECT_ROOT = discover_project_root() sys.path.insert(0, str(PROJECT_ROOT)) -from app.audio_utils import load_audio_config, resample_audio, SAMPLE_RATE +from app.audio_utils import ( + load_audio_config, + resample_audio, + resolve_default_input_device, + SAMPLE_RATE, +) from app.wave_writer import write_wav logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') @@ -67,16 +72,7 @@ def _resolve_input_device(self): except Exception as exc: logger.warning("查询设备 %s 失败: %s", self.device, exc) - try: - devices = sd.query_devices() - for idx, info in enumerate(devices): - if info.get("max_input_channels", 0) > 0: - logger.info("回退至输入设备 #%s (%s)", idx, info.get("name", "unknown")) - return idx - except Exception as exc: - logger.warning("查询输入设备列表失败: %s", exc) - - return None + return resolve_default_input_device() def _resolve_sample_rate(self, device, preferred): """选择可用采样率""" @@ -225,7 +221,12 @@ def main(): device = args.device if args.device is not None else configured_device if isinstance(device, str) and device.isdigit(): device = int(device) - sample_rate = args.sample_rate if args.sample_rate != 44100 else configured_sr + # Ask the sound server for 16 kHz directly so PipeWire/PulseAudio resample + # with proper anti-aliasing. Honour an explicit configured rate if set. + if args.sample_rate != 44100: + sample_rate = args.sample_rate + else: + sample_rate = configured_sr if configured_sr else SAMPLE_RATE # 录音 recorder = AudioRecorder(device, sample_rate)