diff --git a/Sources/Fluid/Persistence/BackupService.swift b/Sources/Fluid/Persistence/BackupService.swift index 2927dfd7..a2cc61ca 100644 --- a/Sources/Fluid/Persistence/BackupService.swift +++ b/Sources/Fluid/Persistence/BackupService.swift @@ -76,6 +76,8 @@ struct SettingsBackupPayload: Codable, Equatable { let continuousDictationSpacingEnabled: Bool? let contextAwareCapitalizationEnabled: Bool? let pauseMediaDuringTranscription: Bool + let duckMediaInsteadOfPausing: Bool? + let duckMediaVolumeLevel: Double? let vocabularyBoostingEnabled: Bool let customDictionaryEntries: [SettingsStore.CustomDictionaryEntry] let selectedDictationPromptID: String? diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index 82d0fbec..0fdae67d 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2764,6 +2764,8 @@ final class SettingsStore: ObservableObject { continuousDictationSpacingEnabled: self.continuousDictationSpacingEnabled, contextAwareCapitalizationEnabled: self.contextAwareCapitalizationEnabled, pauseMediaDuringTranscription: self.pauseMediaDuringTranscription, + duckMediaInsteadOfPausing: self.duckMediaInsteadOfPausing, + duckMediaVolumeLevel: self.duckMediaVolumeLevel, vocabularyBoostingEnabled: self.vocabularyBoostingEnabled, customDictionaryEntries: self.customDictionaryEntries, selectedDictationPromptID: self.selectedDictationPromptID, @@ -2862,6 +2864,10 @@ final class SettingsStore: ObservableObject { self.continuousDictationSpacingEnabled = payload.continuousDictationSpacingEnabled ?? restoredContinuousDictationModeEnabled self.contextAwareCapitalizationEnabled = payload.contextAwareCapitalizationEnabled ?? restoredContinuousDictationModeEnabled self.pauseMediaDuringTranscription = payload.pauseMediaDuringTranscription + self.duckMediaInsteadOfPausing = payload.duckMediaInsteadOfPausing ?? false + if let duckLevel = payload.duckMediaVolumeLevel { + self.duckMediaVolumeLevel = duckLevel + } self.vocabularyBoostingEnabled = payload.vocabularyBoostingEnabled self.customDictionaryEntries = payload.customDictionaryEntries @@ -3531,6 +3537,29 @@ final class SettingsStore: ObservableObject { } } + /// When enabled (and `pauseMediaDuringTranscription` is on), lowers the system + /// output volume during transcription instead of fully pausing playback. + var duckMediaInsteadOfPausing: Bool { + get { self.defaults.object(forKey: Keys.duckMediaInsteadOfPausing) as? Bool ?? false } + set { + objectWillChange.send() + self.defaults.set(newValue, forKey: Keys.duckMediaInsteadOfPausing) + } + } + + /// Target output volume while ducking, expressed as a fraction (0.05–1.0) of + /// the volume at the moment transcription starts. Defaults to 0.2 (20%). + var duckMediaVolumeLevel: Double { + get { + let stored = self.defaults.object(forKey: Keys.duckMediaVolumeLevel) as? Double ?? 0.2 + return min(1.0, max(0.05, stored)) + } + set { + objectWillChange.send() + self.defaults.set(min(1.0, max(0.05, newValue)), forKey: Keys.duckMediaVolumeLevel) + } + } + // MARK: - Custom Dictionary /// A custom dictionary entry that maps multiple misheard/alternate spellings to a correct replacement. @@ -4436,6 +4465,8 @@ private extension SettingsStore { /// Media Playback Control static let pauseMediaDuringTranscription = "PauseMediaDuringTranscription" + static let duckMediaInsteadOfPausing = "DuckMediaInsteadOfPausing" + static let duckMediaVolumeLevel = "DuckMediaVolumeLevel" /// Custom Dictation Prompt static let customDictationPrompt = "CustomDictationPrompt" diff --git a/Sources/Fluid/Services/MediaPlaybackService.swift b/Sources/Fluid/Services/MediaPlaybackService.swift index 72160960..ee6b99e6 100644 --- a/Sources/Fluid/Services/MediaPlaybackService.swift +++ b/Sources/Fluid/Services/MediaPlaybackService.swift @@ -14,6 +14,20 @@ final class MediaPlaybackService { #if arch(arm64) private let mediaController = MediaController() + private let volumeController = SystemAudioVolumeController() + + /// Tracks the action we took for the current transcription session so that + /// `resumeIfWePaused(_:)` can revert exactly what was applied. + private enum ActiveSuppression { + /// We sent a pause command and should send play() to restore. + case paused + /// We lowered the output volume. `original` is the pre-duck snapshot to + /// restore (preserving per-channel balance); `applied` is what the device + /// actually snapped to, used to detect user changes mid-dictation. + case ducked(original: OutputVolumeSnapshot, applied: OutputVolumeSnapshot) + } + + private var activeSuppression: ActiveSuppression? #endif private init() {} @@ -21,21 +35,41 @@ final class MediaPlaybackService { // MARK: - Public API #if arch(arm64) - /// Pauses system media playback if something is currently playing. + /// Suppresses system media while transcription is active, if something is + /// currently playing. /// - /// - Returns: `true` if we successfully paused playback, `false` if nothing was playing - /// or if we couldn't determine playback state. + /// Depending on `SettingsStore.duckMediaInsteadOfPausing`, this either fully + /// pauses playback or lowers the system output volume ("ducking"). The action + /// taken is recorded so `resumeIfWePaused(_:)` can revert exactly what was done. + /// + /// - Returns: `true` if we took an action (pause or duck) that must later be + /// reverted, `false` if nothing was playing or if we couldn't determine + /// playback state. /// /// - Note: Uses a local one-shot gate to protect against `MediaRemoteAdapter` /// firing the `getTrackInfo` callback more than once, which would otherwise /// crash with `EXC_BREAKPOINT` (SIGTRAP) due to double-resume of a /// `CheckedContinuation`. func pauseIfPlaying() async -> Bool { + // A suppression from a previous session may not have been reverted yet: stop() flips + // isRunning false before its (slow) final transcription pass, and only reverts media + // afterwards, so a new dictation can start during that window. Don't begin a second + // suppression — capturing the already-ducked volume would lose the true original and + // could leave the Mac stuck at the ducked level. Report no new action; the in-flight + // revert from the prior session restores the original volume. + guard self.activeSuppression == nil else { + DebugLogger.shared.warning( + "MediaPlaybackService: Suppression already active from a prior session, not starting another", + source: "MediaPlaybackService" + ) + return false + } + return await withCheckedContinuation { continuation in let resumeLock = NSLock() var didResume = false - func resumeOnce(_ value: Bool) { + func resumeOnce(_ value: Bool, beforeResuming: (() -> Void)? = nil) { var shouldResume = false resumeLock.lock() @@ -53,6 +87,9 @@ final class MediaPlaybackService { return } + // Runs exactly once, only for the winning callback, before resuming the + // continuation — so any side effect is gated by the same one-shot. + beforeResuming?() continuation.resume(returning: value) } @@ -97,12 +134,11 @@ final class MediaPlaybackService { ) if isPlaying { - DebugLogger.shared.info( - "MediaPlaybackService: Media is playing, sending pause command", - source: "MediaPlaybackService" - ) - self.mediaController.pause() - resumeOnce(true) + // Gate the suppression behind the same one-shot as the resume. + // MediaRemoteAdapter can fire this callback more than once, and ducking + // is not idempotent (it reads the current volume as the "original"), so a + // duplicate must not re-duck or overwrite `activeSuppression`. + resumeOnce(true) { self.applySuppression() } } else { DebugLogger.shared.debug( "MediaPlaybackService: Media is not playing, no action needed", @@ -114,25 +150,96 @@ final class MediaPlaybackService { } } - /// Resumes media playback only if we were the ones who paused it. + /// Reverts the media suppression applied for this session — resuming playback + /// if we paused it, or restoring the output volume if we ducked it. /// /// - Parameter wePaused: `true` if `pauseIfPlaying()` returned `true` for this session. func resumeIfWePaused(_ wePaused: Bool) async { guard wePaused else { DebugLogger.shared.debug( - "MediaPlaybackService: We didn't pause media, not resuming", + "MediaPlaybackService: We didn't suppress media, nothing to revert", source: "MediaPlaybackService" ) return } + self.revertSuppression() + } + + // MARK: - Suppression helpers + + /// Either pauses playback or ducks the system output volume, based on the + /// user's setting, and records what was done in `activeSuppression`. + private func applySuppression() { + // Ducking: lower the output volume instead of stopping playback entirely. + if SettingsStore.shared.duckMediaInsteadOfPausing, + let original = self.volumeController.captureOutputVolume() + { + let level = Float(SettingsStore.shared.duckMediaVolumeLevel) + let target = original.scaled(by: level) + if self.volumeController.apply(target) { + // Re-read what the device actually snapped to (volume can be quantized to + // coarse steps) so the restore-time change check is accurate. + let applied = self.volumeController.reread(target) ?? target + self.activeSuppression = .ducked(original: original, applied: applied) + DebugLogger.shared.info( + "MediaPlaybackService: Ducked output volume \(original.averageLevel) -> \(applied.averageLevel) for transcription", + source: "MediaPlaybackService" + ) + return + } + + DebugLogger.shared.warning( + "MediaPlaybackService: Failed to lower output volume, falling back to pausing media", + source: "MediaPlaybackService" + ) + } + DebugLogger.shared.info( - "MediaPlaybackService: Resuming media playback (we paused it)", + "MediaPlaybackService: Media is playing, sending pause command", source: "MediaPlaybackService" ) + self.mediaController.pause() + self.activeSuppression = .paused + } + + /// Reverts whatever `applySuppression()` did for the current session. + private func revertSuppression() { + switch self.activeSuppression { + case .paused: + DebugLogger.shared.info( + "MediaPlaybackService: Resuming media playback (we paused it)", + source: "MediaPlaybackService" + ) + // Use explicit play() command - never toggle + self.mediaController.play() + + case let .ducked(original, applied): + // Only restore if the volume is still roughly where we left it. If the + // user adjusted it during dictation, respect their choice and leave it. + if let current = self.volumeController.reread(applied)?.averageLevel, + abs(current - applied.averageLevel) > 0.02 + { + DebugLogger.shared.info( + "MediaPlaybackService: Output volume changed during dictation (\(applied.averageLevel) -> \(current)), leaving as-is", + source: "MediaPlaybackService" + ) + } else { + DebugLogger.shared.info( + "MediaPlaybackService: Restoring output volume to \(original.averageLevel) (we ducked it)", + source: "MediaPlaybackService" + ) + self.volumeController.apply(original) + } + + case .none: + DebugLogger.shared.debug( + "MediaPlaybackService: No active suppression to revert", + source: "MediaPlaybackService" + ) + } - // Use explicit play() command - never toggle - self.mediaController.play() + self.activeSuppression = nil } #else // Intel Mac stub - media control not available diff --git a/Sources/Fluid/Services/SystemAudioVolumeController.swift b/Sources/Fluid/Services/SystemAudioVolumeController.swift new file mode 100644 index 00000000..cbd32da6 --- /dev/null +++ b/Sources/Fluid/Services/SystemAudioVolumeController.swift @@ -0,0 +1,196 @@ +import CoreAudio +import Foundation + +/// Thin CoreAudio wrapper for reading and adjusting the **default output device's** +/// output volume, used to "duck" (temporarily lower) system audio while dictation +/// is active, as a gentler alternative to fully pausing media. +/// +/// Note that CoreAudio's output volume is system-wide: ducking lowers *all* output +/// from the default device, not just a single app's media. +/// +/// Volume is captured and restored as an `OutputVolumeSnapshot`, which preserves the +/// **individual per-channel levels** (or the master element). Some output devices +/// expose no settable master volume — only per-channel scalars — and a user may have +/// a non-centered left/right balance that must survive a duck/restore cycle +/// unchanged, so the snapshot records each element rather than a single scalar. +struct SystemAudioVolumeController { + /// Captures the default output device's current volume so it can later be + /// restored exactly, preserving per-channel balance. + /// + /// - Returns: A snapshot, or `nil` if no volume property is available (e.g. an + /// aggregate device). + func captureOutputVolume() -> OutputVolumeSnapshot? { + guard let device = self.defaultOutputDevice() else { return nil } + + // Prefer the single master element, but only when it is *settable* — otherwise we + // could capture a read-only master and then fail to restore it on a device whose + // per-channel volumes are the ones that are actually settable. + if self.isVolumeSettable(device: device, element: kAudioObjectPropertyElementMain), + let master = self.scalarVolume(device: device, element: kAudioObjectPropertyElementMain) + { + return OutputVolumeSnapshot( + deviceID: device, + channels: [.init(element: kAudioObjectPropertyElementMain, volume: master)] + ) + } + + // Otherwise capture each *settable* stereo channel individually so balance is retained. + let channels = self.stereoChannels(device: device).compactMap { element -> OutputVolumeSnapshot.Channel? in + guard self.isVolumeSettable(device: device, element: element), + let volume = self.scalarVolume(device: device, element: element) + else { return nil } + return .init(element: element, volume: volume) + } + guard !channels.isEmpty else { return nil } + return OutputVolumeSnapshot(deviceID: device, channels: channels) + } + + /// Writes every channel captured in `snapshot` back to its recorded level. + /// + /// - Returns: `true` if at least one channel was successfully written. + @discardableResult + func apply(_ snapshot: OutputVolumeSnapshot) -> Bool { + var didSet = false + for channel in snapshot.channels { + if self.setScalarVolume(channel.volume, device: snapshot.deviceID, element: channel.element) { + didSet = true + } + } + return didSet + } + + /// Re-reads the current levels of the same device and elements captured in + /// `snapshot`, returning an updated snapshot — used both to learn what the hardware + /// actually snapped to (volume can be quantized to coarse steps) and to detect + /// whether the user changed the volume since we set it. Operates on the snapshot's + /// own device, so it is unaffected if the default output device changes mid-session. + /// + /// - Returns: An updated snapshot, or `nil` if a captured element is no longer readable. + func reread(_ snapshot: OutputVolumeSnapshot) -> OutputVolumeSnapshot? { + let channels = snapshot.channels.compactMap { channel -> OutputVolumeSnapshot.Channel? in + guard let volume = self.scalarVolume(device: snapshot.deviceID, element: channel.element) else { return nil } + return .init(element: channel.element, volume: volume) + } + guard channels.count == snapshot.channels.count else { return nil } + return OutputVolumeSnapshot(deviceID: snapshot.deviceID, channels: channels) + } + + // MARK: - Private CoreAudio helpers + + private func defaultOutputDevice() -> AudioDeviceID? { + var deviceID = AudioDeviceID(kAudioObjectUnknown) + var size = UInt32(MemoryLayout.size) + var address = AudioObjectPropertyAddress( + mSelector: kAudioHardwarePropertyDefaultOutputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain + ) + let status = AudioObjectGetPropertyData( + AudioObjectID(kAudioObjectSystemObject), &address, 0, nil, &size, &deviceID + ) + guard status == noErr, deviceID != kAudioObjectUnknown else { return nil } + return deviceID + } + + /// The output channel numbers used for stereo, defaulting to `[1, 2]` when the + /// device doesn't advertise a preferred pair. + private func stereoChannels(device: AudioDeviceID) -> [UInt32] { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyPreferredChannelsForStereo, + mScope: kAudioObjectPropertyScopeOutput, + mElement: kAudioObjectPropertyElementMain + ) + guard AudioObjectHasProperty(device, &address) else { return [1, 2] } + + var channels: [UInt32] = [0, 0] + var size = UInt32(MemoryLayout.size * channels.count) + let status = channels.withUnsafeMutableBytes { buffer -> OSStatus in + guard let base = buffer.baseAddress else { return OSStatus(-1) } + return AudioObjectGetPropertyData(device, &address, 0, nil, &size, base) + } + guard status == noErr, channels.allSatisfy({ $0 != 0 }) else { return [1, 2] } + return channels + } + + private func scalarVolume(device: AudioDeviceID, element: AudioObjectPropertyElement) -> Float? { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioObjectPropertyScopeOutput, + mElement: element + ) + guard AudioObjectHasProperty(device, &address) else { return nil } + + var volume = Float(0) + var size = UInt32(MemoryLayout.size) + let status = AudioObjectGetPropertyData(device, &address, 0, nil, &size, &volume) + guard status == noErr else { return nil } + return volume + } + + /// Whether the volume scalar for `element` exists and can be written. + private func isVolumeSettable(device: AudioDeviceID, element: AudioObjectPropertyElement) -> Bool { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioObjectPropertyScopeOutput, + mElement: element + ) + guard AudioObjectHasProperty(device, &address) else { return false } + + var settable = DarwinBoolean(false) + return AudioObjectIsPropertySettable(device, &address, &settable) == noErr && settable.boolValue + } + + private func setScalarVolume( + _ volume: Float, + device: AudioDeviceID, + element: AudioObjectPropertyElement + ) -> Bool { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyVolumeScalar, + mScope: kAudioObjectPropertyScopeOutput, + mElement: element + ) + guard AudioObjectHasProperty(device, &address) else { return false } + + var settable = DarwinBoolean(false) + guard AudioObjectIsPropertySettable(device, &address, &settable) == noErr, settable.boolValue else { + return false + } + + var newVolume = max(0.0, min(1.0, volume)) + let size = UInt32(MemoryLayout.size) + return AudioObjectSetPropertyData(device, &address, 0, nil, size, &newVolume) == noErr + } +} + +/// An immutable capture of an output device's volume — either its master element or +/// its individual stereo channels — so a duck can be reverted without losing the +/// device's original per-channel (left/right) balance. +struct OutputVolumeSnapshot { + fileprivate struct Channel { + let element: AudioObjectPropertyElement + let volume: Float + } + + fileprivate let deviceID: AudioDeviceID + fileprivate let channels: [Channel] + + /// Average level across the captured channels, used for logging and for + /// detecting whether the user changed the volume mid-dictation. + var averageLevel: Float { + guard !self.channels.isEmpty else { return 0 } + return self.channels.map(\.volume).reduce(0, +) / Float(self.channels.count) + } + + /// A copy with every channel scaled by `factor` (clamped to `0.0...1.0`). + /// Scaling each channel by the same factor lowers the volume while preserving + /// the device's left/right balance. + func scaled(by factor: Float) -> OutputVolumeSnapshot { + OutputVolumeSnapshot( + deviceID: self.deviceID, + channels: self.channels.map { + Channel(element: $0.element, volume: max(0.0, min(1.0, $0.volume * factor))) + } + ) + } +} diff --git a/Sources/Fluid/Services/TranscriptionSoundPlayer.swift b/Sources/Fluid/Services/TranscriptionSoundPlayer.swift index 5c7ced21..a1db947d 100644 --- a/Sources/Fluid/Services/TranscriptionSoundPlayer.swift +++ b/Sources/Fluid/Services/TranscriptionSoundPlayer.swift @@ -15,14 +15,14 @@ final class TranscriptionSoundPlayer { guard SettingsStore.shared.enableTranscriptionSounds else { return } let selected = SettingsStore.shared.transcriptionStartSound guard let soundName = selected.startSoundFileName else { return } - self.play(soundName: soundName) + self.play(soundName: soundName, enforceDuckingPrecedence: true) } func playStopSound() { guard SettingsStore.shared.enableTranscriptionSounds else { return } let selected = SettingsStore.shared.transcriptionStartSound guard let soundName = selected.stopSoundFileName else { return } - self.play(soundName: soundName) + self.play(soundName: soundName, enforceDuckingPrecedence: true) } /// Preview a specific sound at the current volume setting (used in Settings UI). @@ -38,7 +38,7 @@ final class TranscriptionSoundPlayer { self.play(soundName: soundName, overrideVolume: volume) } - private func play(soundName: String, overrideVolume: Float? = nil) { + private func play(soundName: String, overrideVolume: Float? = nil, enforceDuckingPrecedence: Bool = false) { guard let url = Bundle.main.url(forResource: soundName, withExtension: "m4a") else { DebugLogger.shared.error("Missing sound resource: \(soundName).m4a", source: "TranscriptionSoundPlayer") return @@ -47,7 +47,21 @@ final class TranscriptionSoundPlayer { let settings = SettingsStore.shared let desiredVolume = overrideVolume ?? settings.transcriptionSoundVolume - if settings.transcriptionSoundIndependentVolume { + // When "lower volume instead of pausing" (ducking) is enabled, MediaPlaybackService + // owns the system output volume for the duration of a dictation session. The cue's + // independent-volume mode also temporarily sets and *asynchronously* restores the + // system volume, and the two would race: a late cue restore can undo the duck at + // session start, or overwrite the final restore and leave the Mac stuck at the ducked + // level. So for the session start/stop cues, ducking takes precedence — the cue plays + // at its own player volume and leaves the system volume alone. Settings previews pass + // enforceDuckingPrecedence == false, so they never run during a session and always + // honor the independent-volume setting. + let duckingOwnsSystemVolume = enforceDuckingPrecedence + && settings.pauseMediaDuringTranscription + && settings.duckMediaInsteadOfPausing + let useIndependentVolume = settings.transcriptionSoundIndependentVolume && !duckingOwnsSystemVolume + + if useIndependentVolume { let currentSystemVol = Self.getSystemVolume() guard currentSystemVol > 0.001 else { return } // Save current system volume and temporarily set it to desired level @@ -66,7 +80,7 @@ final class TranscriptionSoundPlayer { } player.currentTime = 0 - if settings.transcriptionSoundIndependentVolume { + if useIndependentVolume { player.volume = 1.0 } else { player.volume = desiredVolume @@ -74,7 +88,7 @@ final class TranscriptionSoundPlayer { player.play() // Restore system volume after the sound finishes - if settings.transcriptionSoundIndependentVolume, let saved = self.savedSystemVolume { + if useIndependentVolume, let saved = self.savedSystemVolume { let duration = player.duration DispatchQueue.main.asyncAfter(deadline: .now() + duration + 0.05) { [weak self] in Self.setSystemVolume(saved) diff --git a/Sources/Fluid/UI/SettingsView.swift b/Sources/Fluid/UI/SettingsView.swift index d9b280e1..3d79d9e4 100644 --- a/Sources/Fluid/UI/SettingsView.swift +++ b/Sources/Fluid/UI/SettingsView.swift @@ -936,6 +936,49 @@ struct SettingsView: View { set: { SettingsStore.shared.pauseMediaDuringTranscription = $0 } ) ) + + if self.settings.pauseMediaDuringTranscription { + VStack(alignment: .leading, spacing: 10) { + self.optionToggleRow( + title: "Lower Volume Instead of Pausing", + description: "Duck currently playing audio/video to a quieter level while you dictate, then restore it — instead of stopping playback. Affects overall system output volume.", + isOn: Binding( + get: { SettingsStore.shared.duckMediaInsteadOfPausing }, + set: { SettingsStore.shared.duckMediaInsteadOfPausing = $0 } + ) + ) + + if self.settings.duckMediaInsteadOfPausing { + HStack { + VStack(alignment: .leading, spacing: 2) { + Text("Volume While Dictating") + .font(self.theme.typography.bodyStrong) + .foregroundStyle(self.settingsTitleText) + Text("Fraction of the current volume to keep while dictation is active.") + .font(self.theme.typography.bodySmall) + .foregroundStyle(self.settingsSecondaryText) + } + + Spacer() + + HStack(spacing: 6) { + Slider(value: self.$settings.duckMediaVolumeLevel, in: 0.05 ... 1.0, step: 0.05) + .frame(width: 110) + .controlSize(.small) + + Text("\(Int((self.settings.duckMediaVolumeLevel * 100).rounded()))%") + .font(.caption.monospaced()) + .foregroundStyle(self.settingsSecondaryText) + .frame(width: 44, alignment: .trailing) + } + .frame(width: 160, alignment: .trailing) + } + } + } + .padding(.leading, 16) + .padding(.top, 2) + } + Divider().opacity(0.2) self.optionToggleRow(