From dff63d7d5e8ed104da2e5dd9ac81a15f569a2827 Mon Sep 17 00:00:00 2001
From: Huang Rui <vowstar@gmail.com>
Date: Wed, 6 May 2026 19:01:13 +0800
Subject: [PATCH] audio: pick the server-virtual default mic and resample
 without aliasing

On Linux, sounddevice (PortAudio) only sees ALSA devices. PortAudio has
no native PipeWire/PulseAudio host API, so the only way to follow the
user's default mic selection from wpctl/pavucontrol is to open the ALSA
"default" or "pulse" virtual PCM that PipeWire/PulseAudio inject into
ALSA. The previous fallback grabbed the first device with input
channels, which on most desktops is a hardware capture card such as
hw:0,0, not the one the user actually selected.

Recording from the wrong jack and then linearly resampling 44.1 kHz to
16 kHz with np.interp (no anti-aliasing low-pass) produced the garbled
output reported in issue #5.

Three changes:

1. resolve_default_input_device() picks: explicit override -> ALSA
   "default" -> ALSA "pulse" -> sd.default.device[0] -> first input
   device. fcitx5/backend/audio_recorder.py and app/audio_capture.py
   both delegate to it.

2. Ask the sound server for 16 kHz directly. PipeWire and PulseAudio
   carry band-limited resamplers, so the client does not need to
   resample. load_audio_config() now returns (None, None) when no
   audio.conf is present, signalling "use server default rate", and
   audio_recorder defaults its target rate to SAMPLE_RATE.

3. Replace np.interp with librosa.resample(res_type="soxr_hq") for the
   client-side fallback. soxr_hq provides FIR low-pass plus polyphase
   filtering. librosa is already a runtime dependency.

Signed-off-by: Huang Rui <vowstar@gmail.com>
---
 app/audio_capture.py             | 20 +++------
 app/audio_utils.py               | 74 +++++++++++++++++++++++++++-----
 fcitx5/backend/audio_recorder.py | 25 +++++------
 3 files changed, 82 insertions(+), 37 deletions(-)

diff --git a/app/audio_capture.py b/app/audio_capture.py
index 821c054..5856409 100644
--- a/app/audio_capture.py
+++ b/app/audio_capture.py
@@ -10,6 +10,8 @@
 import numpy as np
 import sounddevice as sd
 
+from app.audio_utils import resolve_default_input_device
+
 
 logger = logging.getLogger(__name__)
 
@@ -50,12 +52,13 @@ def start(self) -> None:
                 return
 
             self.flush()
-            self._stream = self._create_stream(self.device)
+            device = self.device if self.device is not None else resolve_default_input_device()
+            self._stream = self._create_stream(device)
             try:
                 self._stream.start()
             except Exception:
                 self._stream.close()
-                self._stream = self._create_stream(self._fallback_device())
+                self._stream = self._create_stream(resolve_default_input_device())
                 self._stream.start()
 
             self._running = True
@@ -100,19 +103,6 @@ def _create_stream(self, device: int | str | None) -> sd.RawInputStream:
             logger.error(msg)
             raise AudioCaptureError(msg) from exc
 
-    def _fallback_device(self) -> Optional[int]:
-        try:
-            devices = sd.query_devices()
-            for idx, info in enumerate(devices):
-                if info.get("max_input_channels", 0) > 0:
-                    logger.warning(
-                        "回退至输入设备 #%s (%s)", idx, info.get("name", "unknown")
-                    )
-                    return idx
-        except Exception as exc:
-            logger.error("查询音频设备失败: %s", exc)
-        return None
-
     def _callback(self, in_data, frames, time, status):  # type: ignore[override]
         if status:
             logger.warning("音频流状态: %s", status)
diff --git a/app/audio_utils.py b/app/audio_utils.py
index bc9a4eb..fbeefb9 100644
--- a/app/audio_utils.py
+++ b/app/audio_utils.py
@@ -17,16 +17,63 @@
 DEFAULT_NATIVE_SAMPLE_RATE = 44100
 
 
-def load_audio_config() -> tuple[int | str | None, int]:
-    """从配置文件加载音频设备配置
+def resolve_default_input_device() -> int | None:
+    """挑选用户实际的默认麦克风。
+
+    优先级：
+      1. ALSA "default" / "pulse" 这两个由 PipeWire/PulseAudio 注入的虚拟
+         PCM，跟随 wpctl/pavucontrol 选择的默认源。
+      2. PortAudio 自己认定的 default (sd.default.device[0])。
+      3. 第一个有输入通道的设备（兜底）。
+    """
+    import sounddevice as sd
+
+    try:
+        devices = list(sd.query_devices())
+    except Exception as exc:
+        logger.warning("查询音频设备列表失败: %s", exc)
+        return None
+
+    for preferred in ("default", "pulse"):
+        for idx, info in enumerate(devices):
+            if info.get("name") == preferred and info.get("max_input_channels", 0) > 0:
+                logger.info("使用服务器虚拟设备 #%s (%s)", idx, preferred)
+                return idx
+
+    try:
+        pa_default = sd.default.device[0]
+        if pa_default is not None and pa_default >= 0:
+            info = devices[pa_default]
+            if info.get("max_input_channels", 0) > 0:
+                logger.info(
+                    "使用 PortAudio 默认设备 #%s (%s)",
+                    pa_default,
+                    info.get("name", "unknown"),
+                )
+                return pa_default
+    except Exception:
+        pass
+
+    for idx, info in enumerate(devices):
+        if info.get("max_input_channels", 0) > 0:
+            logger.info("回退至输入设备 #%s (%s)", idx, info.get("name", "unknown"))
+            return idx
+
+    logger.warning("没有发现可用的音频输入设备")
+    return None
+
+
+def load_audio_config() -> tuple[int | str | None, int | None]:
+    """从配置文件加载音频设备配置。
 
     Returns:
-        (device, sample_rate): 设备（可能为 None、整数 ID 或字符串名称）和采样率
+        (device, sample_rate): 没有配置文件时返回 (None, None)，让调用方使用
+        服务器虚拟设备并直接请求 16 kHz；配置文件存在则按内容返回。
     """
     config_file = Path.home() / ".config" / "vocotype" / "audio.conf"
     if not config_file.exists():
-        logger.warning("音频配置文件不存在: %s，使用默认设备", config_file)
-        return None, DEFAULT_NATIVE_SAMPLE_RATE
+        logger.info("未找到 %s，使用系统默认输入设备", config_file)
+        return None, None
 
     try:
         import configparser
@@ -47,7 +94,7 @@ def load_audio_config() -> tuple[int | str | None, int]:
         return device_id, sample_rate
     except Exception as e:
         logger.warning("读取音频配置失败: %s，使用默认设备", e)
-        return None, DEFAULT_NATIVE_SAMPLE_RATE
+        return None, None
 
 
 def resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
@@ -63,7 +110,14 @@ def resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarra
     """
     if orig_sr == target_sr:
         return audio
-    duration = len(audio) / orig_sr
-    target_length = int(duration * target_sr)
-    indices = np.linspace(0, len(audio) - 1, target_length)
-    return np.interp(indices, np.arange(len(audio)), audio.astype(np.float32)).astype(np.int16)
+
+    import librosa
+
+    float_audio = audio.astype(np.float32) / 32768.0
+    resampled = librosa.resample(
+        float_audio,
+        orig_sr=orig_sr,
+        target_sr=target_sr,
+        res_type="soxr_hq",
+    )
+    return np.clip(resampled * 32768.0, -32768, 32767).astype(np.int16)
diff --git a/fcitx5/backend/audio_recorder.py b/fcitx5/backend/audio_recorder.py
index 3014b9d..a54bbb1 100755
--- a/fcitx5/backend/audio_recorder.py
+++ b/fcitx5/backend/audio_recorder.py
@@ -38,7 +38,12 @@ def discover_project_root() -> Path:
 PROJECT_ROOT = discover_project_root()
 sys.path.insert(0, str(PROJECT_ROOT))
 
-from app.audio_utils import load_audio_config, resample_audio, SAMPLE_RATE
+from app.audio_utils import (
+    load_audio_config,
+    resample_audio,
+    resolve_default_input_device,
+    SAMPLE_RATE,
+)
 from app.wave_writer import write_wav
 
 logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
@@ -67,16 +72,7 @@ def _resolve_input_device(self):
             except Exception as exc:
                 logger.warning("查询设备 %s 失败: %s", self.device, exc)
 
-        try:
-            devices = sd.query_devices()
-            for idx, info in enumerate(devices):
-                if info.get("max_input_channels", 0) > 0:
-                    logger.info("回退至输入设备 #%s (%s)", idx, info.get("name", "unknown"))
-                    return idx
-        except Exception as exc:
-            logger.warning("查询输入设备列表失败: %s", exc)
-
-        return None
+        return resolve_default_input_device()
 
     def _resolve_sample_rate(self, device, preferred):
         """选择可用采样率"""
@@ -225,7 +221,12 @@ def main():
     device = args.device if args.device is not None else configured_device
     if isinstance(device, str) and device.isdigit():
         device = int(device)
-    sample_rate = args.sample_rate if args.sample_rate != 44100 else configured_sr
+    # Ask the sound server for 16 kHz directly so PipeWire/PulseAudio resample
+    # with proper anti-aliasing. Honour an explicit configured rate if set.
+    if args.sample_rate != 44100:
+        sample_rate = args.sample_rate
+    else:
+        sample_rate = configured_sr if configured_sr else SAMPLE_RATE
 
     # 录音
     recorder = AudioRecorder(device, sample_rate)