From 0618a4e76daff4b824d2d5f733521c88a6c6f52b Mon Sep 17 00:00:00 2001 From: Aditya Kar Date: Sun, 28 Jun 2026 12:17:40 -0400 Subject: [PATCH 1/7] feat(media): add option to duck media volume instead of pausing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a "Lower Volume Instead of Pausing" sub-option to "Pause Media During Transcription". When enabled, FluidVoice lowers the system output volume while you dictate and restores it afterward, instead of fully stopping playback — useful for keeping a video audible but quiet during narration. - Add SystemAudioVolumeController, a CoreAudio wrapper that reads/sets the default output device volume (master element with per-channel fallback). - Extend MediaPlaybackService to either pause or duck based on the setting, tracking which action was taken so it reverts exactly what it applied. On restore it leaves the volume untouched if the user changed it mid-dictation, and falls back to pausing if the volume can't be lowered. - Add duckMediaInsteadOfPausing and duckMediaVolumeLevel settings (level defaults to 20%, clamped 5–100%), including backup/restore support. - Add the nested toggle and a level slider to Settings, shown only when media pausing is enabled. Note: CoreAudio output volume is system-wide, so ducking lowers all output from the default device, not just a single app. Behavior is arm64-only, consistent with the existing pause feature. --- Sources/Fluid/Persistence/BackupService.swift | 2 + Sources/Fluid/Persistence/SettingsStore.swift | 31 +++++ .../Fluid/Services/MediaPlaybackService.swift | 110 +++++++++++++-- .../SystemAudioVolumeController.swift | 128 ++++++++++++++++++ Sources/Fluid/UI/SettingsView.swift | 43 ++++++ 5 files changed, 301 insertions(+), 13 deletions(-) create mode 100644 Sources/Fluid/Services/SystemAudioVolumeController.swift diff --git a/Sources/Fluid/Persistence/BackupService.swift b/Sources/Fluid/Persistence/BackupService.swift index 2927dfd7..a2cc61ca 100644 --- a/Sources/Fluid/Persistence/BackupService.swift +++ b/Sources/Fluid/Persistence/BackupService.swift @@ -76,6 +76,8 @@ struct SettingsBackupPayload: Codable, Equatable { let continuousDictationSpacingEnabled: Bool? let contextAwareCapitalizationEnabled: Bool? let pauseMediaDuringTranscription: Bool + let duckMediaInsteadOfPausing: Bool? + let duckMediaVolumeLevel: Double? let vocabularyBoostingEnabled: Bool let customDictionaryEntries: [SettingsStore.CustomDictionaryEntry] let selectedDictationPromptID: String? diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index 82d0fbec..0fdae67d 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2764,6 +2764,8 @@ final class SettingsStore: ObservableObject { continuousDictationSpacingEnabled: self.continuousDictationSpacingEnabled, contextAwareCapitalizationEnabled: self.contextAwareCapitalizationEnabled, pauseMediaDuringTranscription: self.pauseMediaDuringTranscription, + duckMediaInsteadOfPausing: self.duckMediaInsteadOfPausing, + duckMediaVolumeLevel: self.duckMediaVolumeLevel, vocabularyBoostingEnabled: self.vocabularyBoostingEnabled, customDictionaryEntries: self.customDictionaryEntries, selectedDictationPromptID: self.selectedDictationPromptID, @@ -2862,6 +2864,10 @@ final class SettingsStore: ObservableObject { self.continuousDictationSpacingEnabled = payload.continuousDictationSpacingEnabled ?? restoredContinuousDictationModeEnabled self.contextAwareCapitalizationEnabled = payload.contextAwareCapitalizationEnabled ?? restoredContinuousDictationModeEnabled self.pauseMediaDuringTranscription = payload.pauseMediaDuringTranscription + self.duckMediaInsteadOfPausing = payload.duckMediaInsteadOfPausing ?? false + if let duckLevel = payload.duckMediaVolumeLevel { + self.duckMediaVolumeLevel = duckLevel + } self.vocabularyBoostingEnabled = payload.vocabularyBoostingEnabled self.customDictionaryEntries = payload.customDictionaryEntries @@ -3531,6 +3537,29 @@ final class SettingsStore: ObservableObject { } } + /// When enabled (and `pauseMediaDuringTranscription` is on), lowers the system + /// output volume during transcription instead of fully pausing playback. + var duckMediaInsteadOfPausing: Bool { + get { self.defaults.object(forKey: Keys.duckMediaInsteadOfPausing) as? Bool ?? false } + set { + objectWillChange.send() + self.defaults.set(newValue, forKey: Keys.duckMediaInsteadOfPausing) + } + } + + /// Target output volume while ducking, expressed as a fraction (0.05–1.0) of + /// the volume at the moment transcription starts. Defaults to 0.2 (20%). + var duckMediaVolumeLevel: Double { + get { + let stored = self.defaults.object(forKey: Keys.duckMediaVolumeLevel) as? Double ?? 0.2 + return min(1.0, max(0.05, stored)) + } + set { + objectWillChange.send() + self.defaults.set(min(1.0, max(0.05, newValue)), forKey: Keys.duckMediaVolumeLevel) + } + } + // MARK: - Custom Dictionary /// A custom dictionary entry that maps multiple misheard/alternate spellings to a correct replacement. @@ -4436,6 +4465,8 @@ private extension SettingsStore { /// Media Playback Control static let pauseMediaDuringTranscription = "PauseMediaDuringTranscription" + static let duckMediaInsteadOfPausing = "DuckMediaInsteadOfPausing" + static let duckMediaVolumeLevel = "DuckMediaVolumeLevel" /// Custom Dictation Prompt static let customDictationPrompt = "CustomDictationPrompt" diff --git a/Sources/Fluid/Services/MediaPlaybackService.swift b/Sources/Fluid/Services/MediaPlaybackService.swift index 72160960..a2a1688d 100644 --- a/Sources/Fluid/Services/MediaPlaybackService.swift +++ b/Sources/Fluid/Services/MediaPlaybackService.swift @@ -14,6 +14,19 @@ final class MediaPlaybackService { #if arch(arm64) private let mediaController = MediaController() + private let volumeController = SystemAudioVolumeController() + + /// Tracks the action we took for the current transcription session so that + /// `resumeIfWePaused(_:)` can revert exactly what was applied. + private enum ActiveSuppression { + /// We sent a pause command and should send play() to restore. + case paused + /// We lowered the output volume from `original` to `applied` and should + /// raise it back to `original`. + case ducked(original: Float, applied: Float) + } + + private var activeSuppression: ActiveSuppression? #endif private init() {} @@ -21,10 +34,16 @@ final class MediaPlaybackService { // MARK: - Public API #if arch(arm64) - /// Pauses system media playback if something is currently playing. + /// Suppresses system media while transcription is active, if something is + /// currently playing. /// - /// - Returns: `true` if we successfully paused playback, `false` if nothing was playing - /// or if we couldn't determine playback state. + /// Depending on `SettingsStore.duckMediaInsteadOfPausing`, this either fully + /// pauses playback or lowers the system output volume ("ducking"). The action + /// taken is recorded so `resumeIfWePaused(_:)` can revert exactly what was done. + /// + /// - Returns: `true` if we took an action (pause or duck) that must later be + /// reverted, `false` if nothing was playing or if we couldn't determine + /// playback state. /// /// - Note: Uses a local one-shot gate to protect against `MediaRemoteAdapter` /// firing the `getTrackInfo` callback more than once, which would otherwise @@ -97,11 +116,7 @@ final class MediaPlaybackService { ) if isPlaying { - DebugLogger.shared.info( - "MediaPlaybackService: Media is playing, sending pause command", - source: "MediaPlaybackService" - ) - self.mediaController.pause() + self.applySuppression() resumeOnce(true) } else { DebugLogger.shared.debug( @@ -114,25 +129,94 @@ final class MediaPlaybackService { } } - /// Resumes media playback only if we were the ones who paused it. + /// Reverts the media suppression applied for this session — resuming playback + /// if we paused it, or restoring the output volume if we ducked it. /// /// - Parameter wePaused: `true` if `pauseIfPlaying()` returned `true` for this session. func resumeIfWePaused(_ wePaused: Bool) async { guard wePaused else { DebugLogger.shared.debug( - "MediaPlaybackService: We didn't pause media, not resuming", + "MediaPlaybackService: We didn't suppress media, nothing to revert", source: "MediaPlaybackService" ) return } + self.revertSuppression() + } + + // MARK: - Suppression helpers + + /// Either pauses playback or ducks the system output volume, based on the + /// user's setting, and records what was done in `activeSuppression`. + private func applySuppression() { + // Ducking: lower the output volume instead of stopping playback entirely. + if SettingsStore.shared.duckMediaInsteadOfPausing, + let original = self.volumeController.currentOutputVolume() + { + let level = Float(SettingsStore.shared.duckMediaVolumeLevel) + let target = original * level + if self.volumeController.setOutputVolume(target) { + // Read back the level the device actually snapped to (volume can be + // quantized to coarse steps) so the restore-time change check is accurate. + let applied = self.volumeController.currentOutputVolume() ?? target + self.activeSuppression = .ducked(original: original, applied: applied) + DebugLogger.shared.info( + "MediaPlaybackService: Ducked output volume \(original) -> \(applied) for transcription", + source: "MediaPlaybackService" + ) + return + } + + DebugLogger.shared.warning( + "MediaPlaybackService: Failed to lower output volume, falling back to pausing media", + source: "MediaPlaybackService" + ) + } + DebugLogger.shared.info( - "MediaPlaybackService: Resuming media playback (we paused it)", + "MediaPlaybackService: Media is playing, sending pause command", source: "MediaPlaybackService" ) + self.mediaController.pause() + self.activeSuppression = .paused + } + + /// Reverts whatever `applySuppression()` did for the current session. + private func revertSuppression() { + switch self.activeSuppression { + case .paused: + DebugLogger.shared.info( + "MediaPlaybackService: Resuming media playback (we paused it)", + source: "MediaPlaybackService" + ) + // Use explicit play() command - never toggle + self.mediaController.play() + + case let .ducked(original, applied): + // Only restore if the volume is still roughly where we left it. If the + // user adjusted it during dictation, respect their choice and leave it. + if let current = self.volumeController.currentOutputVolume(), abs(current - applied) > 0.02 { + DebugLogger.shared.info( + "MediaPlaybackService: Output volume changed during dictation (\(applied) -> \(current)), leaving as-is", + source: "MediaPlaybackService" + ) + } else { + DebugLogger.shared.info( + "MediaPlaybackService: Restoring output volume to \(original) (we ducked it)", + source: "MediaPlaybackService" + ) + self.volumeController.setOutputVolume(original) + } + + case .none: + DebugLogger.shared.debug( + "MediaPlaybackService: No active suppression to revert", + source: "MediaPlaybackService" + ) + } - // Use explicit play() command - never toggle - self.mediaController.play() + self.activeSuppression = nil } #else // Intel Mac stub - media control not available diff --git a/Sources/Fluid/Services/SystemAudioVolumeController.swift b/Sources/Fluid/Services/SystemAudioVolumeController.swift new file mode 100644 index 00000000..1e2295bd --- /dev/null +++ b/Sources/Fluid/Services/SystemAudioVolumeController.swift @@ -0,0 +1,128 @@ +import CoreAudio +import Foundation + +/// Thin CoreAudio wrapper for reading and adjusting the **default output device's** +/// master output volume. +/// +/// This is used to "duck" (temporarily lower) system audio while dictation is +/// active, as a gentler alternative to fully pausing media. Note that CoreAudio's +/// output volume is system-wide: ducking lowers *all* output from the default +/// device, not just a single app's media. +/// +/// Volume is expressed as a scalar in the `0.0...1.0` range. Some output devices +/// expose a settable master element (`kAudioObjectPropertyElementMain`), while +/// others only allow per-channel control; both paths are handled here. +struct SystemAudioVolumeController { + /// Returns the current scalar volume (`0.0...1.0`) of the default output + /// device, or `nil` if it can't be determined (e.g. an aggregate device or a + /// device that doesn't expose a volume property). + func currentOutputVolume() -> Float? { + guard let device = self.defaultOutputDevice() else { return nil } + + if let master = self.scalarVolume(device: device, element: kAudioObjectPropertyElementMain) { + return master + } + + // Fall back to averaging the individual stereo channels. + let values = self.stereoChannels(device: device) + .compactMap { self.scalarVolume(device: device, element: $0) } + guard !values.isEmpty else { return nil } + return values.reduce(0, +) / Float(values.count) + } + + /// Sets the default output device volume to `volume` (clamped to `0.0...1.0`). + /// + /// - Returns: `true` if at least one volume element was successfully written. + @discardableResult + func setOutputVolume(_ volume: Float) -> Bool { + guard let device = self.defaultOutputDevice() else { return false } + let clamped = max(0.0, min(1.0, volume)) + + if self.setScalarVolume(clamped, device: device, element: kAudioObjectPropertyElementMain) { + return true + } + + // Fall back to writing each stereo channel individually. + var didSet = false + for channel in self.stereoChannels(device: device) { + if self.setScalarVolume(clamped, device: device, element: channel) { + didSet = true + } + } + return didSet + } + + // MARK: - Private CoreAudio helpers + + private func defaultOutputDevice() -> AudioDeviceID? { + var deviceID = AudioDeviceID(kAudioObjectUnknown) + var size = UInt32(MemoryLayout.size) + var address = AudioObjectPropertyAddress( + mSelector: kAudioHardwarePropertyDefaultOutputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain + ) + let status = AudioObjectGetPropertyData( + AudioObjectID(kAudioObjectSystemObject), &address, 0, nil, &size, &deviceID + ) + guard status == noErr, deviceID != kAudioObjectUnknown else { return nil } + return deviceID + } + + /// The output channel numbers used for stereo, defaulting to `[1, 2]` when the + /// device doesn't advertise a preferred pair. + private func stereoChannels(device: AudioDeviceID) -> [UInt32] { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyPreferredChannelsForStereo, + mScope: kAudioObjectPropertyScopeOutput, + mElement: kAudioObjectPropertyElementMain + ) + guard AudioObjectHasProperty(device, &address) else { return [1, 2] } + + var channels: [UInt32] = [0, 0] + var size = UInt32(MemoryLayout.size * channels.count) + let status = channels.withUnsafeMutableBytes { buffer -> OSStatus in + guard let base = buffer.baseAddress else { return OSStatus(-1) } + return AudioObjectGetPropertyData(device, &address, 0, nil, &size, base) + } + guard status == noErr, channels.allSatisfy({ $0 != 0 }) else { return [1, 2] } + return channels + } + + private func scalarVolume(device: AudioDeviceID, element: AudioObjectPropertyElement) -> Float? { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioObjectPropertyScopeOutput, + mElement: element + ) + guard AudioObjectHasProperty(device, &address) else { return nil } + + var volume = Float(0) + var size = UInt32(MemoryLayout.size) + let status = AudioObjectGetPropertyData(device, &address, 0, nil, &size, &volume) + guard status == noErr else { return nil } + return volume + } + + private func setScalarVolume( + _ volume: Float, + device: AudioDeviceID, + element: AudioObjectPropertyElement + ) -> Bool { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioObjectPropertyScopeOutput, + mElement: element + ) + guard AudioObjectHasProperty(device, &address) else { return false } + + var settable = DarwinBoolean(false) + guard AudioObjectIsPropertySettable(device, &address, &settable) == noErr, settable.boolValue else { + return false + } + + var newVolume = max(0.0, min(1.0, volume)) + let size = UInt32(MemoryLayout.size) + return AudioObjectSetPropertyData(device, &address, 0, nil, size, &newVolume) == noErr + } +} diff --git a/Sources/Fluid/UI/SettingsView.swift b/Sources/Fluid/UI/SettingsView.swift index d9b280e1..3d79d9e4 100644 --- a/Sources/Fluid/UI/SettingsView.swift +++ b/Sources/Fluid/UI/SettingsView.swift @@ -936,6 +936,49 @@ struct SettingsView: View { set: { SettingsStore.shared.pauseMediaDuringTranscription = $0 } ) ) + + if self.settings.pauseMediaDuringTranscription { + VStack(alignment: .leading, spacing: 10) { + self.optionToggleRow( + title: "Lower Volume Instead of Pausing", + description: "Duck currently playing audio/video to a quieter level while you dictate, then restore it — instead of stopping playback. Affects overall system output volume.", + isOn: Binding( + get: { SettingsStore.shared.duckMediaInsteadOfPausing }, + set: { SettingsStore.shared.duckMediaInsteadOfPausing = $0 } + ) + ) + + if self.settings.duckMediaInsteadOfPausing { + HStack { + VStack(alignment: .leading, spacing: 2) { + Text("Volume While Dictating") + .font(self.theme.typography.bodyStrong) + .foregroundStyle(self.settingsTitleText) + Text("Fraction of the current volume to keep while dictation is active.") + .font(self.theme.typography.bodySmall) + .foregroundStyle(self.settingsSecondaryText) + } + + Spacer() + + HStack(spacing: 6) { + Slider(value: self.$settings.duckMediaVolumeLevel, in: 0.05 ... 1.0, step: 0.05) + .frame(width: 110) + .controlSize(.small) + + Text("\(Int((self.settings.duckMediaVolumeLevel * 100).rounded()))%") + .font(.caption.monospaced()) + .foregroundStyle(self.settingsSecondaryText) + .frame(width: 44, alignment: .trailing) + } + .frame(width: 160, alignment: .trailing) + } + } + } + .padding(.leading, 16) + .padding(.top, 2) + } + Divider().opacity(0.2) self.optionToggleRow( From 32457dc15ba85383a439e8ff303bb26f9aefdc24 Mon Sep 17 00:00:00 2001 From: Aditya Kar Date: Sun, 28 Jun 2026 13:11:35 -0400 Subject: [PATCH 2/7] fix(media): gate duck side effect against duplicate getTrackInfo callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MediaRemoteAdapter can fire the getTrackInfo callback more than once (the existing resumeOnce one-shot already guards the continuation against this). The duck side effect, however, ran on every callback before the gate. Since ducking is not idempotent — it reads the current output volume as the "original" — a duplicate callback re-ducked the already-lowered volume and overwrote activeSuppression with the ducked value, so the later restore only returned to the ducked level instead of the user's original volume. Route applySuppression() through resumeOnce's one-shot gate so it runs exactly once, for the winning callback, before the continuation resumes. Pause mode was unaffected (pause() is idempotent); this only mattered for ducking. --- Sources/Fluid/Services/MediaPlaybackService.swift | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Sources/Fluid/Services/MediaPlaybackService.swift b/Sources/Fluid/Services/MediaPlaybackService.swift index a2a1688d..20f2ecaa 100644 --- a/Sources/Fluid/Services/MediaPlaybackService.swift +++ b/Sources/Fluid/Services/MediaPlaybackService.swift @@ -54,7 +54,7 @@ final class MediaPlaybackService { let resumeLock = NSLock() var didResume = false - func resumeOnce(_ value: Bool) { + func resumeOnce(_ value: Bool, beforeResuming: (() -> Void)? = nil) { var shouldResume = false resumeLock.lock() @@ -72,6 +72,9 @@ final class MediaPlaybackService { return } + // Runs exactly once, only for the winning callback, before resuming the + // continuation — so any side effect is gated by the same one-shot. + beforeResuming?() continuation.resume(returning: value) } @@ -116,8 +119,11 @@ final class MediaPlaybackService { ) if isPlaying { - self.applySuppression() - resumeOnce(true) + // Gate the suppression behind the same one-shot as the resume. + // MediaRemoteAdapter can fire this callback more than once, and ducking + // is not idempotent (it reads the current volume as the "original"), so a + // duplicate must not re-duck or overwrite `activeSuppression`. + resumeOnce(true) { self.applySuppression() } } else { DebugLogger.shared.debug( "MediaPlaybackService: Media is not playing, no action needed", From d421045fb60ad118d715281bec4df7175932b40f Mon Sep 17 00:00:00 2001 From: Aditya Kar Date: Sun, 28 Jun 2026 21:16:45 -0400 Subject: [PATCH 3/7] fix(media): preserve per-channel volume balance when ducking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On output devices with no settable master volume, the controller fell back to per-channel writes while reading volume as the average of the channels. Ducking then captured that average as the "original" and, on restore, wrote it back to every channel — permanently flattening a user's non-centered left/right balance after a dictation session (and it did not self-heal). Replace the scalar get/set API with an OutputVolumeSnapshot that records each element (master or individual channels) and its level: - capture the full per-channel state before ducking, - duck by scaling every channel by the same factor (preserving balance), - restore each channel to its captured value. The snapshot also carries the device id, so restore and the "did the user change it?" check operate on the device that was actually ducked even if the default output device changes mid-dictation. --- .../Fluid/Services/MediaPlaybackService.swift | 29 ++--- .../SystemAudioVolumeController.swift | 105 ++++++++++++------ 2 files changed, 90 insertions(+), 44 deletions(-) diff --git a/Sources/Fluid/Services/MediaPlaybackService.swift b/Sources/Fluid/Services/MediaPlaybackService.swift index 20f2ecaa..0d574cef 100644 --- a/Sources/Fluid/Services/MediaPlaybackService.swift +++ b/Sources/Fluid/Services/MediaPlaybackService.swift @@ -21,9 +21,10 @@ final class MediaPlaybackService { private enum ActiveSuppression { /// We sent a pause command and should send play() to restore. case paused - /// We lowered the output volume from `original` to `applied` and should - /// raise it back to `original`. - case ducked(original: Float, applied: Float) + /// We lowered the output volume. `original` is the pre-duck snapshot to + /// restore (preserving per-channel balance); `applied` is what the device + /// actually snapped to, used to detect user changes mid-dictation. + case ducked(original: OutputVolumeSnapshot, applied: OutputVolumeSnapshot) } private var activeSuppression: ActiveSuppression? @@ -158,17 +159,17 @@ final class MediaPlaybackService { private func applySuppression() { // Ducking: lower the output volume instead of stopping playback entirely. if SettingsStore.shared.duckMediaInsteadOfPausing, - let original = self.volumeController.currentOutputVolume() + let original = self.volumeController.captureOutputVolume() { let level = Float(SettingsStore.shared.duckMediaVolumeLevel) - let target = original * level - if self.volumeController.setOutputVolume(target) { - // Read back the level the device actually snapped to (volume can be + let target = original.scaled(by: level) + if self.volumeController.apply(target) { + // Re-capture what the device actually snapped to (volume can be // quantized to coarse steps) so the restore-time change check is accurate. - let applied = self.volumeController.currentOutputVolume() ?? target + let applied = self.volumeController.captureOutputVolume() ?? target self.activeSuppression = .ducked(original: original, applied: applied) DebugLogger.shared.info( - "MediaPlaybackService: Ducked output volume \(original) -> \(applied) for transcription", + "MediaPlaybackService: Ducked output volume \(original.averageLevel) -> \(applied.averageLevel) for transcription", source: "MediaPlaybackService" ) return @@ -202,17 +203,19 @@ final class MediaPlaybackService { case let .ducked(original, applied): // Only restore if the volume is still roughly where we left it. If the // user adjusted it during dictation, respect their choice and leave it. - if let current = self.volumeController.currentOutputVolume(), abs(current - applied) > 0.02 { + if let current = self.volumeController.currentAverageLevel(matching: applied), + abs(current - applied.averageLevel) > 0.02 + { DebugLogger.shared.info( - "MediaPlaybackService: Output volume changed during dictation (\(applied) -> \(current)), leaving as-is", + "MediaPlaybackService: Output volume changed during dictation (\(applied.averageLevel) -> \(current)), leaving as-is", source: "MediaPlaybackService" ) } else { DebugLogger.shared.info( - "MediaPlaybackService: Restoring output volume to \(original) (we ducked it)", + "MediaPlaybackService: Restoring output volume to \(original.averageLevel) (we ducked it)", source: "MediaPlaybackService" ) - self.volumeController.setOutputVolume(original) + self.volumeController.apply(original) } case .none: diff --git a/Sources/Fluid/Services/SystemAudioVolumeController.swift b/Sources/Fluid/Services/SystemAudioVolumeController.swift index 1e2295bd..4334aeaa 100644 --- a/Sources/Fluid/Services/SystemAudioVolumeController.swift +++ b/Sources/Fluid/Services/SystemAudioVolumeController.swift @@ -2,56 +2,67 @@ import CoreAudio import Foundation /// Thin CoreAudio wrapper for reading and adjusting the **default output device's** -/// master output volume. +/// output volume, used to "duck" (temporarily lower) system audio while dictation +/// is active, as a gentler alternative to fully pausing media. /// -/// This is used to "duck" (temporarily lower) system audio while dictation is -/// active, as a gentler alternative to fully pausing media. Note that CoreAudio's -/// output volume is system-wide: ducking lowers *all* output from the default -/// device, not just a single app's media. +/// Note that CoreAudio's output volume is system-wide: ducking lowers *all* output +/// from the default device, not just a single app's media. /// -/// Volume is expressed as a scalar in the `0.0...1.0` range. Some output devices -/// expose a settable master element (`kAudioObjectPropertyElementMain`), while -/// others only allow per-channel control; both paths are handled here. +/// Volume is captured and restored as an `OutputVolumeSnapshot`, which preserves the +/// **individual per-channel levels** (or the master element). Some output devices +/// expose no settable master volume — only per-channel scalars — and a user may have +/// a non-centered left/right balance that must survive a duck/restore cycle +/// unchanged, so the snapshot records each element rather than a single scalar. struct SystemAudioVolumeController { - /// Returns the current scalar volume (`0.0...1.0`) of the default output - /// device, or `nil` if it can't be determined (e.g. an aggregate device or a - /// device that doesn't expose a volume property). - func currentOutputVolume() -> Float? { + /// Captures the default output device's current volume so it can later be + /// restored exactly, preserving per-channel balance. + /// + /// - Returns: A snapshot, or `nil` if no volume property is available (e.g. an + /// aggregate device). + func captureOutputVolume() -> OutputVolumeSnapshot? { guard let device = self.defaultOutputDevice() else { return nil } + // Prefer the single master element when the device exposes it. if let master = self.scalarVolume(device: device, element: kAudioObjectPropertyElementMain) { - return master + return OutputVolumeSnapshot( + deviceID: device, + channels: [.init(element: kAudioObjectPropertyElementMain, volume: master)] + ) } - // Fall back to averaging the individual stereo channels. - let values = self.stereoChannels(device: device) - .compactMap { self.scalarVolume(device: device, element: $0) } - guard !values.isEmpty else { return nil } - return values.reduce(0, +) / Float(values.count) + // Otherwise capture each stereo channel individually so balance is retained. + let channels = self.stereoChannels(device: device).compactMap { element -> OutputVolumeSnapshot.Channel? in + guard let volume = self.scalarVolume(device: device, element: element) else { return nil } + return .init(element: element, volume: volume) + } + guard !channels.isEmpty else { return nil } + return OutputVolumeSnapshot(deviceID: device, channels: channels) } - /// Sets the default output device volume to `volume` (clamped to `0.0...1.0`). + /// Writes every channel captured in `snapshot` back to its recorded level. /// - /// - Returns: `true` if at least one volume element was successfully written. + /// - Returns: `true` if at least one channel was successfully written. @discardableResult - func setOutputVolume(_ volume: Float) -> Bool { - guard let device = self.defaultOutputDevice() else { return false } - let clamped = max(0.0, min(1.0, volume)) - - if self.setScalarVolume(clamped, device: device, element: kAudioObjectPropertyElementMain) { - return true - } - - // Fall back to writing each stereo channel individually. + func apply(_ snapshot: OutputVolumeSnapshot) -> Bool { var didSet = false - for channel in self.stereoChannels(device: device) { - if self.setScalarVolume(clamped, device: device, element: channel) { + for channel in snapshot.channels { + if self.setScalarVolume(channel.volume, device: snapshot.deviceID, element: channel.element) { didSet = true } } return didSet } + /// Reads the current average level of the same device and elements captured in + /// `snapshot`, for detecting whether the user changed the volume since we set it. + func currentAverageLevel(matching snapshot: OutputVolumeSnapshot) -> Float? { + let values = snapshot.channels.compactMap { + self.scalarVolume(device: snapshot.deviceID, element: $0.element) + } + guard !values.isEmpty else { return nil } + return values.reduce(0, +) / Float(values.count) + } + // MARK: - Private CoreAudio helpers private func defaultOutputDevice() -> AudioDeviceID? { @@ -126,3 +137,35 @@ struct SystemAudioVolumeController { return AudioObjectSetPropertyData(device, &address, 0, nil, size, &newVolume) == noErr } } + +/// An immutable capture of an output device's volume — either its master element or +/// its individual stereo channels — so a duck can be reverted without losing the +/// device's original per-channel (left/right) balance. +struct OutputVolumeSnapshot { + fileprivate struct Channel { + let element: AudioObjectPropertyElement + let volume: Float + } + + fileprivate let deviceID: AudioDeviceID + fileprivate let channels: [Channel] + + /// Average level across the captured channels, used for logging and for + /// detecting whether the user changed the volume mid-dictation. + var averageLevel: Float { + guard !self.channels.isEmpty else { return 0 } + return self.channels.map(\.volume).reduce(0, +) / Float(self.channels.count) + } + + /// A copy with every channel scaled by `factor` (clamped to `0.0...1.0`). + /// Scaling each channel by the same factor lowers the volume while preserving + /// the device's left/right balance. + func scaled(by factor: Float) -> OutputVolumeSnapshot { + OutputVolumeSnapshot( + deviceID: self.deviceID, + channels: self.channels.map { + Channel(element: $0.element, volume: max(0.0, min(1.0, $0.volume * factor))) + } + ) + } +} From ef88004076357edf817806c39978b6c54f9a82de Mon Sep 17 00:00:00 2001 From: Aditya Kar Date: Sun, 28 Jun 2026 21:28:19 -0400 Subject: [PATCH 4/7] fix(media): let ducking take precedence over independent cue volume TranscriptionSoundPlayer's independent-volume mode also writes the system output volume and schedules a fire-and-forget async restore. That made it a second, uncoordinated owner of the same volume the new ducking feature controls, with two race outcomes: - the start cue (played just before asr.start()) sets system volume and the duck then captures that transient level as the "original"; the cue's late restore undoes the duck mid-session; - the stop cue (played while ducked) saves the ducked level and, if its restore fires after resumeIfWePaused, overwrites the final restore and leaves the Mac stuck at the ducked level. When ducking is the active media behavior (pause + lower-volume both on), it now owns the system volume: the cue plays at its own AVAudioPlayer volume and no longer hijacks/restores system volume, eliminating the race. Independent cue volume is unaffected when ducking is off. --- .../Services/TranscriptionSoundPlayer.swift | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Sources/Fluid/Services/TranscriptionSoundPlayer.swift b/Sources/Fluid/Services/TranscriptionSoundPlayer.swift index 5c7ced21..10b370ff 100644 --- a/Sources/Fluid/Services/TranscriptionSoundPlayer.swift +++ b/Sources/Fluid/Services/TranscriptionSoundPlayer.swift @@ -47,7 +47,17 @@ final class TranscriptionSoundPlayer { let settings = SettingsStore.shared let desiredVolume = overrideVolume ?? settings.transcriptionSoundVolume - if settings.transcriptionSoundIndependentVolume { + // When "lower volume instead of pausing" (ducking) is enabled, MediaPlaybackService + // owns the system output volume for the duration of a dictation session. The cue's + // independent-volume mode also temporarily sets and *asynchronously* restores the + // system volume, and the two would race: a late cue restore can undo the duck at + // session start, or overwrite the final restore and leave the Mac stuck at the ducked + // level. Ducking takes precedence — the cue plays at its own player volume and leaves + // the system volume alone. + let duckingOwnsSystemVolume = settings.pauseMediaDuringTranscription && settings.duckMediaInsteadOfPausing + let useIndependentVolume = settings.transcriptionSoundIndependentVolume && !duckingOwnsSystemVolume + + if useIndependentVolume { let currentSystemVol = Self.getSystemVolume() guard currentSystemVol > 0.001 else { return } // Save current system volume and temporarily set it to desired level @@ -66,7 +76,7 @@ final class TranscriptionSoundPlayer { } player.currentTime = 0 - if settings.transcriptionSoundIndependentVolume { + if useIndependentVolume { player.volume = 1.0 } else { player.volume = desiredVolume @@ -74,7 +84,7 @@ final class TranscriptionSoundPlayer { player.play() // Restore system volume after the sound finishes - if settings.transcriptionSoundIndependentVolume, let saved = self.savedSystemVolume { + if useIndependentVolume, let saved = self.savedSystemVolume { let duration = player.duration DispatchQueue.main.asyncAfter(deadline: .now() + duration + 0.05) { [weak self] in Self.setSystemVolume(saved) From 16d4ebe17351c3fa79262f3ca9b38902a1f5b701 Mon Sep 17 00:00:00 2001 From: Aditya Kar Date: Sun, 28 Jun 2026 21:33:38 -0400 Subject: [PATCH 5/7] fix(media): only capture settable volume elements and re-read the ducked device Two edge-case fixes found in self-review of the ducking path: - captureOutputVolume() preferred the master element whenever it was *readable*, but apply() requires it to be *settable*. On a device with a read-only master and settable per-channel volumes, ducking would capture the master, fail to apply, and needlessly fall back to pausing. Capture now records only settable elements (master if settable, otherwise the settable channels), keeping capture and restore symmetric. - Replaced the post-duck re-capture and the restore-time change check, which re-resolved the *default* output device, with reread(snapshot:) that reads the snapshot's own device/elements. This keeps the quantization read-back accurate and is unaffected if the default output device changes mid-session. --- .../Fluid/Services/MediaPlaybackService.swift | 8 ++-- .../SystemAudioVolumeController.swift | 47 ++++++++++++++----- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/Sources/Fluid/Services/MediaPlaybackService.swift b/Sources/Fluid/Services/MediaPlaybackService.swift index 0d574cef..3dc2f851 100644 --- a/Sources/Fluid/Services/MediaPlaybackService.swift +++ b/Sources/Fluid/Services/MediaPlaybackService.swift @@ -164,9 +164,9 @@ final class MediaPlaybackService { let level = Float(SettingsStore.shared.duckMediaVolumeLevel) let target = original.scaled(by: level) if self.volumeController.apply(target) { - // Re-capture what the device actually snapped to (volume can be - // quantized to coarse steps) so the restore-time change check is accurate. - let applied = self.volumeController.captureOutputVolume() ?? target + // Re-read what the device actually snapped to (volume can be quantized to + // coarse steps) so the restore-time change check is accurate. + let applied = self.volumeController.reread(target) ?? target self.activeSuppression = .ducked(original: original, applied: applied) DebugLogger.shared.info( "MediaPlaybackService: Ducked output volume \(original.averageLevel) -> \(applied.averageLevel) for transcription", @@ -203,7 +203,7 @@ final class MediaPlaybackService { case let .ducked(original, applied): // Only restore if the volume is still roughly where we left it. If the // user adjusted it during dictation, respect their choice and leave it. - if let current = self.volumeController.currentAverageLevel(matching: applied), + if let current = self.volumeController.reread(applied)?.averageLevel, abs(current - applied.averageLevel) > 0.02 { DebugLogger.shared.info( diff --git a/Sources/Fluid/Services/SystemAudioVolumeController.swift b/Sources/Fluid/Services/SystemAudioVolumeController.swift index 4334aeaa..cbd32da6 100644 --- a/Sources/Fluid/Services/SystemAudioVolumeController.swift +++ b/Sources/Fluid/Services/SystemAudioVolumeController.swift @@ -22,17 +22,23 @@ struct SystemAudioVolumeController { func captureOutputVolume() -> OutputVolumeSnapshot? { guard let device = self.defaultOutputDevice() else { return nil } - // Prefer the single master element when the device exposes it. - if let master = self.scalarVolume(device: device, element: kAudioObjectPropertyElementMain) { + // Prefer the single master element, but only when it is *settable* — otherwise we + // could capture a read-only master and then fail to restore it on a device whose + // per-channel volumes are the ones that are actually settable. + if self.isVolumeSettable(device: device, element: kAudioObjectPropertyElementMain), + let master = self.scalarVolume(device: device, element: kAudioObjectPropertyElementMain) + { return OutputVolumeSnapshot( deviceID: device, channels: [.init(element: kAudioObjectPropertyElementMain, volume: master)] ) } - // Otherwise capture each stereo channel individually so balance is retained. + // Otherwise capture each *settable* stereo channel individually so balance is retained. let channels = self.stereoChannels(device: device).compactMap { element -> OutputVolumeSnapshot.Channel? in - guard let volume = self.scalarVolume(device: device, element: element) else { return nil } + guard self.isVolumeSettable(device: device, element: element), + let volume = self.scalarVolume(device: device, element: element) + else { return nil } return .init(element: element, volume: volume) } guard !channels.isEmpty else { return nil } @@ -53,14 +59,20 @@ struct SystemAudioVolumeController { return didSet } - /// Reads the current average level of the same device and elements captured in - /// `snapshot`, for detecting whether the user changed the volume since we set it. - func currentAverageLevel(matching snapshot: OutputVolumeSnapshot) -> Float? { - let values = snapshot.channels.compactMap { - self.scalarVolume(device: snapshot.deviceID, element: $0.element) + /// Re-reads the current levels of the same device and elements captured in + /// `snapshot`, returning an updated snapshot — used both to learn what the hardware + /// actually snapped to (volume can be quantized to coarse steps) and to detect + /// whether the user changed the volume since we set it. Operates on the snapshot's + /// own device, so it is unaffected if the default output device changes mid-session. + /// + /// - Returns: An updated snapshot, or `nil` if a captured element is no longer readable. + func reread(_ snapshot: OutputVolumeSnapshot) -> OutputVolumeSnapshot? { + let channels = snapshot.channels.compactMap { channel -> OutputVolumeSnapshot.Channel? in + guard let volume = self.scalarVolume(device: snapshot.deviceID, element: channel.element) else { return nil } + return .init(element: channel.element, volume: volume) } - guard !values.isEmpty else { return nil } - return values.reduce(0, +) / Float(values.count) + guard channels.count == snapshot.channels.count else { return nil } + return OutputVolumeSnapshot(deviceID: snapshot.deviceID, channels: channels) } // MARK: - Private CoreAudio helpers @@ -115,6 +127,19 @@ struct SystemAudioVolumeController { return volume } + /// Whether the volume scalar for `element` exists and can be written. + private func isVolumeSettable(device: AudioDeviceID, element: AudioObjectPropertyElement) -> Bool { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioObjectPropertyScopeOutput, + mElement: element + ) + guard AudioObjectHasProperty(device, &address) else { return false } + + var settable = DarwinBoolean(false) + return AudioObjectIsPropertySettable(device, &address, &settable) == noErr && settable.boolValue + } + private func setScalarVolume( _ volume: Float, device: AudioDeviceID, From 4a82205e5f53e9da4cc9c394962dfa1dee47f639 Mon Sep 17 00:00:00 2001 From: Aditya Kar Date: Sun, 28 Jun 2026 21:37:57 -0400 Subject: [PATCH 6/7] fix(media): keep independent cue volume for Settings previews MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ducking-precedence guard was keyed on settings alone, so it also forced independent cue volume off for Settings previews — which never run during a dictation session and have no ducking conflict to avoid. Scope the precedence to the actual session start/stop cues via an enforceDuckingPrecedence flag; previews pass false and always honor the independent-volume setting. Session cues still defer to ducking, since the start cue fires before playback state is known. --- .../Services/TranscriptionSoundPlayer.swift | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Sources/Fluid/Services/TranscriptionSoundPlayer.swift b/Sources/Fluid/Services/TranscriptionSoundPlayer.swift index 10b370ff..a1db947d 100644 --- a/Sources/Fluid/Services/TranscriptionSoundPlayer.swift +++ b/Sources/Fluid/Services/TranscriptionSoundPlayer.swift @@ -15,14 +15,14 @@ final class TranscriptionSoundPlayer { guard SettingsStore.shared.enableTranscriptionSounds else { return } let selected = SettingsStore.shared.transcriptionStartSound guard let soundName = selected.startSoundFileName else { return } - self.play(soundName: soundName) + self.play(soundName: soundName, enforceDuckingPrecedence: true) } func playStopSound() { guard SettingsStore.shared.enableTranscriptionSounds else { return } let selected = SettingsStore.shared.transcriptionStartSound guard let soundName = selected.stopSoundFileName else { return } - self.play(soundName: soundName) + self.play(soundName: soundName, enforceDuckingPrecedence: true) } /// Preview a specific sound at the current volume setting (used in Settings UI). @@ -38,7 +38,7 @@ final class TranscriptionSoundPlayer { self.play(soundName: soundName, overrideVolume: volume) } - private func play(soundName: String, overrideVolume: Float? = nil) { + private func play(soundName: String, overrideVolume: Float? = nil, enforceDuckingPrecedence: Bool = false) { guard let url = Bundle.main.url(forResource: soundName, withExtension: "m4a") else { DebugLogger.shared.error("Missing sound resource: \(soundName).m4a", source: "TranscriptionSoundPlayer") return @@ -52,9 +52,13 @@ final class TranscriptionSoundPlayer { // independent-volume mode also temporarily sets and *asynchronously* restores the // system volume, and the two would race: a late cue restore can undo the duck at // session start, or overwrite the final restore and leave the Mac stuck at the ducked - // level. Ducking takes precedence — the cue plays at its own player volume and leaves - // the system volume alone. - let duckingOwnsSystemVolume = settings.pauseMediaDuringTranscription && settings.duckMediaInsteadOfPausing + // level. So for the session start/stop cues, ducking takes precedence — the cue plays + // at its own player volume and leaves the system volume alone. Settings previews pass + // enforceDuckingPrecedence == false, so they never run during a session and always + // honor the independent-volume setting. + let duckingOwnsSystemVolume = enforceDuckingPrecedence + && settings.pauseMediaDuringTranscription + && settings.duckMediaInsteadOfPausing let useIndependentVolume = settings.transcriptionSoundIndependentVolume && !duckingOwnsSystemVolume if useIndependentVolume { From 9169d9f175a96488f1b29ca9eae915c456f88854 Mon Sep 17 00:00:00 2001 From: Aditya Kar Date: Sun, 28 Jun 2026 21:42:09 -0400 Subject: [PATCH 7/7] fix(media): refuse a new suppression while one is still active stop() sets isRunning=false before its final transcription pass and only reverts media afterwards, so a new dictation can start during that window. With a single shared activeSuppression, the second session would capture the already-ducked volume as its "original," the first session's revert would then clear the newer snapshot, and the Mac could be left stuck at the ducked level. pauseIfPlaying() now bails out (returning false, "no new action") when a suppression is already active, so the in-flight revert from the prior session remains the sole owner of the system volume and restores the true original. --- Sources/Fluid/Services/MediaPlaybackService.swift | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Sources/Fluid/Services/MediaPlaybackService.swift b/Sources/Fluid/Services/MediaPlaybackService.swift index 3dc2f851..ee6b99e6 100644 --- a/Sources/Fluid/Services/MediaPlaybackService.swift +++ b/Sources/Fluid/Services/MediaPlaybackService.swift @@ -51,6 +51,20 @@ final class MediaPlaybackService { /// crash with `EXC_BREAKPOINT` (SIGTRAP) due to double-resume of a /// `CheckedContinuation`. func pauseIfPlaying() async -> Bool { + // A suppression from a previous session may not have been reverted yet: stop() flips + // isRunning false before its (slow) final transcription pass, and only reverts media + // afterwards, so a new dictation can start during that window. Don't begin a second + // suppression — capturing the already-ducked volume would lose the true original and + // could leave the Mac stuck at the ducked level. Report no new action; the in-flight + // revert from the prior session restores the original volume. + guard self.activeSuppression == nil else { + DebugLogger.shared.warning( + "MediaPlaybackService: Suppression already active from a prior session, not starting another", + source: "MediaPlaybackService" + ) + return false + } + return await withCheckedContinuation { continuation in let resumeLock = NSLock() var didResume = false