diff --git a/Fluid.xcodeproj/project.pbxproj b/Fluid.xcodeproj/project.pbxproj index d72a6a01..68adb622 100644 --- a/Fluid.xcodeproj/project.pbxproj +++ b/Fluid.xcodeproj/project.pbxproj @@ -14,6 +14,7 @@ 7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */; }; 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */; }; 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */; }; + 37C99EA57FCA4CDA8967073A /* DictationSystemPromptTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7C79B8A76E7C4F7A80A8EB95 /* DictationSystemPromptTests.swift */; }; 7CDB0A2F2F3C4D5600FB7CAD /* dictation_fixture.wav in Resources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */; }; 7CDB0A302F3C4D5600FB7CAD /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7CDB0A2C2F3C4D5600FB7CAD /* XCTest.framework */; }; 7CE006BD2E80EBE600DDCCD6 /* AppUpdater in Frameworks */ = {isa = PBXBuildFile; productRef = 7CE006BC2E80EBE600DDCCD6 /* AppUpdater */; }; @@ -34,6 +35,7 @@ 7CDB0A202F3C4D5600FB7CAD /* FluidDictationIntegrationTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = FluidDictationIntegrationTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HotkeyShortcutTests.swift; sourceTree = ""; }; 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictationE2ETests.swift; sourceTree = ""; }; + 7C79B8A76E7C4F7A80A8EB95 /* DictationSystemPromptTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictationSystemPromptTests.swift; sourceTree = ""; }; 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioFixtureLoader.swift; sourceTree = ""; }; 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = dictation_fixture.wav; sourceTree = ""; }; 7CDB0A2C2F3C4D5600FB7CAD /* XCTest.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = XCTest.framework; path = Platforms/MacOSX.platform/Developer/Library/Frameworks/XCTest.framework; sourceTree = DEVELOPER_DIR; }; @@ -104,6 +106,7 @@ 7CDB0A272F3C4D5600FB7CAD /* Resources */, 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */, 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */, + 7C79B8A76E7C4F7A80A8EB95 /* DictationSystemPromptTests.swift */, ); path = FluidDictationIntegrationTests; sourceTree = ""; @@ -258,6 +261,7 @@ 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */, 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */, 7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */, + 37C99EA57FCA4CDA8967073A /* DictationSystemPromptTests.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift index 65ba6239..2604647a 100644 --- a/Sources/Fluid/ContentView.swift +++ b/Sources/Fluid/ContentView.swift @@ -1807,23 +1807,12 @@ struct ContentView: View { return self.buildSystemPrompt(appInfo: appInfo, dictationSlot: dictationSlot) }() - // Dictation enhancement folds the prompt + transcript into a single user - // turn (substituting `${transcript}` when present, otherwise appending - // the transcript after a blank line). Non-dictation callers — the AI - // chat tab specifically — keep the legacy two-message layout where - // the prompt is the system turn and the input is the user turn. - let systemPrompt: String - let userMessageContent: String - if isDictationCall { - systemPrompt = "" - userMessageContent = SettingsStore.renderDictationUserMessage( - promptText: promptText, - transcript: inputText - ) - } else { - systemPrompt = promptText - userMessageContent = inputText - } + // Instructions always go in the system role; the transcript (or user + // input) is always the sole user turn. Folding both into the user message + // was the previous behaviour for dictation calls, but it causes weaker + // models to answer the transcript rather than apply the instructions. + let systemPrompt = SettingsStore.renderSystemPrompt(promptText: promptText, transcript: inputText) + let userMessageContent = inputText // Route to Apple Intelligence if selected if currentSelectedProviderID == "apple-intelligence" { diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index 936f530e..51fe5b89 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -1149,6 +1149,14 @@ final class SettingsStore: ObservableObject { return promptText + "\n\n" + transcript } + /// Substitute `${transcript}` in a system-role prompt template. + /// Unlike `renderDictationUserMessage`, this never appends the transcript — + /// the transcript is always sent as a separate user turn. + static func renderSystemPrompt(promptText: String, transcript: String) -> String { + guard promptText.contains(self.transcriptPlaceholder) else { return promptText } + return promptText.replacingOccurrences(of: self.transcriptPlaceholder, with: transcript) + } + private func defaultPromptResolution( for mode: PromptMode, source: PromptResolutionSource, diff --git a/Sources/Fluid/Services/DictationPostProcessingService.swift b/Sources/Fluid/Services/DictationPostProcessingService.swift index fd47ca0a..48169e9f 100644 --- a/Sources/Fluid/Services/DictationPostProcessingService.swift +++ b/Sources/Fluid/Services/DictationPostProcessingService.swift @@ -73,12 +73,11 @@ final class DictationPostProcessingService { ) } - let promptText = settings.effectiveDictationSystemPrompt(for: dictationSlot, appBundleID: nil) - let systemPrompt = "" - let userMessageContent = SettingsStore.renderDictationUserMessage( - promptText: promptText, + let systemPrompt = SettingsStore.renderSystemPrompt( + promptText: settings.effectiveDictationSystemPrompt(for: dictationSlot, appBundleID: nil), transcript: trimmed ) + let userMessageContent = trimmed if resolved.providerID == "apple-intelligence" { #if canImport(FoundationModels) diff --git a/Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift b/Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift new file mode 100644 index 00000000..0c89474e --- /dev/null +++ b/Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift @@ -0,0 +1,81 @@ +@testable import FluidVoice_Debug +import XCTest + +// Regression tests for https://github.com/altic-dev/FluidVoice/issues/388 +// AI enhancement instructions must be sent in the system role, not the user message. +// Previously, DictationPostProcessingService hardcoded systemPrompt = "" and folded +// the instruction text into the user message alongside the transcript. + +@MainActor +final class DictationSystemPromptTests: XCTestCase { + // MARK: - effectiveDictationSystemPrompt + + func testEffectiveDictationSystemPrompt_returnsConfiguredPrompt() { + self.withPromptSettingsRestored { + let settings = SettingsStore.shared + let custom = SettingsStore.DictationPromptProfile( + name: "Test Profile", + prompt: "Clean up the transcript. Remove filler words.", + mode: .dictate + ) + settings.dictationPromptProfiles = [custom] + settings.selectedDictationPromptID = custom.id + + let result = settings.effectiveDictationSystemPrompt(for: .primary) + XCTAssertFalse(result.isEmpty, "effectiveDictationSystemPrompt must return the configured prompt, not an empty string") + XCTAssertTrue(result.contains("Clean up the transcript"), "system prompt must include the custom instruction text") + } + } + + func testEffectiveDictationSystemPrompt_offSelection_returnsDefault() { + self.withPromptSettingsRestored { + let settings = SettingsStore.shared + settings.setDictationPromptSelection(.off) + + // When off, effectiveDictationSystemPrompt falls back to the built-in default, + // which is non-empty. This ensures the system field is never silently blank. + let result = settings.effectiveDictationSystemPrompt(for: .primary) + XCTAssertFalse(result.isEmpty, "built-in default prompt must be non-empty") + } + } + + // MARK: - renderSystemPrompt (${transcript} placeholder in the system role) + + func testRenderSystemPrompt_noPlaceholder_returnsPromptUnchanged() { + let prompt = "Clean up the transcript." + let result = SettingsStore.renderSystemPrompt(promptText: prompt, transcript: "hello world") + XCTAssertEqual(result, prompt, "prompt without placeholder must be returned unchanged") + } + + func testRenderSystemPrompt_transcriptPlaceholder_isSubstitutedInSystemRole() { + // Users can embed ${transcript} in their system prompt template so the transcript + // appears inline in their instructions. renderSystemPrompt must substitute it before + // the prompt is sent to the provider as the system message. + let prompt = "Rewrite cleanly: \(SettingsStore.transcriptPlaceholder)" + let transcript = "um so like yeah" + let result = SettingsStore.renderSystemPrompt(promptText: prompt, transcript: transcript) + XCTAssertEqual(result, "Rewrite cleanly: um so like yeah") + } + + // MARK: - Helpers + + private func withPromptSettingsRestored(_ run: () -> Void) { + let keys = [ + "DictationPromptProfiles", + "SelectedDictationPromptID", + "DictationPromptOff", + ] + let defaults = UserDefaults.standard + var snapshot: [String: Any] = [:] + for key in keys { + if let v = defaults.object(forKey: key) { snapshot[key] = v } + } + defer { + for key in keys { + if let v = snapshot[key] { defaults.set(v, forKey: key) } + else { defaults.removeObject(forKey: key) } + } + } + run() + } +}