From ded39d9a9b800297ca9d5f2398a8cdc6ca7e57b4 Mon Sep 17 00:00:00 2001 From: kayaba-attribution Date: Sat, 27 Jun 2026 11:50:11 -0400 Subject: [PATCH 1/3] Fix AI enhancement instructions sent in system prompt, not user message (#388) When building the LLM request for dictation enhancement, DictationPostProcessingService hardcoded `systemPrompt = ""` and folded the instruction text into the user message via renderDictationUserMessage. This caused providers that treat role separation strictly (e.g. Cerebras gpt-oss-120b) to answer the transcript as a question rather than apply the instructions as a directive. Fix: assign the resolved prompt text to `systemPrompt` directly and set `userMessageContent` to the raw transcript only. The messages builder already guards on `systemPrompt.isEmpty` before appending the system entry, so the no-prompt case (prompt off) is unaffected. Four unit tests added in DictationSystemPromptTests covering: - effectiveDictationSystemPrompt returns the configured instruction text - effectiveDictationSystemPrompt falls back to a non-empty built-in default - renderDictationUserMessage with empty prompt returns transcript only - renderDictationUserMessage ${transcript} placeholder substitution still works --- Fluid.xcodeproj/project.pbxproj | 4 + .../DictationPostProcessingService.swift | 8 +- .../DictationSystemPromptTests.swift | 82 +++++++++++++++++++ 3 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift diff --git a/Fluid.xcodeproj/project.pbxproj b/Fluid.xcodeproj/project.pbxproj index d72a6a01..68adb622 100644 --- a/Fluid.xcodeproj/project.pbxproj +++ b/Fluid.xcodeproj/project.pbxproj @@ -14,6 +14,7 @@ 7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */; }; 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */; }; 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */; }; + 37C99EA57FCA4CDA8967073A /* DictationSystemPromptTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7C79B8A76E7C4F7A80A8EB95 /* DictationSystemPromptTests.swift */; }; 7CDB0A2F2F3C4D5600FB7CAD /* dictation_fixture.wav in Resources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */; }; 7CDB0A302F3C4D5600FB7CAD /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7CDB0A2C2F3C4D5600FB7CAD /* XCTest.framework */; }; 7CE006BD2E80EBE600DDCCD6 /* AppUpdater in Frameworks */ = {isa = PBXBuildFile; productRef = 7CE006BC2E80EBE600DDCCD6 /* AppUpdater */; }; @@ -34,6 +35,7 @@ 7CDB0A202F3C4D5600FB7CAD /* FluidDictationIntegrationTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = FluidDictationIntegrationTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HotkeyShortcutTests.swift; sourceTree = ""; }; 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictationE2ETests.swift; sourceTree = ""; }; + 7C79B8A76E7C4F7A80A8EB95 /* DictationSystemPromptTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictationSystemPromptTests.swift; sourceTree = ""; }; 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioFixtureLoader.swift; sourceTree = ""; }; 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = dictation_fixture.wav; sourceTree = ""; }; 7CDB0A2C2F3C4D5600FB7CAD /* XCTest.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = XCTest.framework; path = Platforms/MacOSX.platform/Developer/Library/Frameworks/XCTest.framework; sourceTree = DEVELOPER_DIR; }; @@ -104,6 +106,7 @@ 7CDB0A272F3C4D5600FB7CAD /* Resources */, 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */, 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */, + 7C79B8A76E7C4F7A80A8EB95 /* DictationSystemPromptTests.swift */, ); path = FluidDictationIntegrationTests; sourceTree = ""; @@ -258,6 +261,7 @@ 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */, 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */, 7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */, + 37C99EA57FCA4CDA8967073A /* DictationSystemPromptTests.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Sources/Fluid/Services/DictationPostProcessingService.swift b/Sources/Fluid/Services/DictationPostProcessingService.swift index fd47ca0a..e24847df 100644 --- a/Sources/Fluid/Services/DictationPostProcessingService.swift +++ b/Sources/Fluid/Services/DictationPostProcessingService.swift @@ -73,12 +73,8 @@ final class DictationPostProcessingService { ) } - let promptText = settings.effectiveDictationSystemPrompt(for: dictationSlot, appBundleID: nil) - let systemPrompt = "" - let userMessageContent = SettingsStore.renderDictationUserMessage( - promptText: promptText, - transcript: trimmed - ) + let systemPrompt = settings.effectiveDictationSystemPrompt(for: dictationSlot, appBundleID: nil) + let userMessageContent = trimmed if resolved.providerID == "apple-intelligence" { #if canImport(FoundationModels) diff --git a/Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift b/Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift new file mode 100644 index 00000000..71e529af --- /dev/null +++ b/Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift @@ -0,0 +1,82 @@ +@testable import FluidVoice_Debug +import XCTest + +// Regression tests for https://github.com/altic-dev/FluidVoice/issues/388 +// AI enhancement instructions must be sent in the system role, not the user message. +// Previously, DictationPostProcessingService hardcoded systemPrompt = "" and folded +// the instruction text into the user message alongside the transcript. + +@MainActor +final class DictationSystemPromptTests: XCTestCase { + + // MARK: - effectiveDictationSystemPrompt + + func testEffectiveDictationSystemPrompt_returnsConfiguredPrompt() { + withPromptSettingsRestored { + let settings = SettingsStore.shared + let custom = SettingsStore.DictationPromptProfile( + name: "Test Profile", + prompt: "Clean up the transcript. Remove filler words.", + mode: .dictate + ) + settings.dictationPromptProfiles = [custom] + settings.selectedDictationPromptID = custom.id + + let result = settings.effectiveDictationSystemPrompt(for: .primary) + XCTAssertFalse(result.isEmpty, "effectiveDictationSystemPrompt must return the configured prompt, not an empty string") + XCTAssertTrue(result.contains("Clean up the transcript"), "system prompt must include the custom instruction text") + } + } + + func testEffectiveDictationSystemPrompt_offSelection_returnsDefault() { + withPromptSettingsRestored { + let settings = SettingsStore.shared + settings.setDictationPromptSelection(.off) + + // When off, effectiveDictationSystemPrompt falls back to the built-in default, + // which is non-empty. This ensures the system field is never silently blank. + let result = settings.effectiveDictationSystemPrompt(for: .primary) + XCTAssertFalse(result.isEmpty, "built-in default prompt must be non-empty") + } + } + + // MARK: - renderDictationUserMessage (user message must be only the transcript) + + func testRenderDictationUserMessage_emptyPrompt_returnsOnlyTranscript() { + // After the fix, userMessageContent = trimmed (the raw transcript). + // renderDictationUserMessage("", transcript:) must return only the transcript. + let transcript = "this is the dictated text" + let result = SettingsStore.renderDictationUserMessage(promptText: "", transcript: transcript) + XCTAssertEqual(result, transcript, "user message with empty promptText must be the transcript only — no instructions appended") + } + + func testRenderDictationUserMessage_transcriptPlaceholder_isReplacedCorrectly() { + // Verify placeholder substitution is not broken by the refactor. + let prompt = "Rewrite cleanly: \(SettingsStore.transcriptPlaceholder)" + let transcript = "um so like yeah" + let result = SettingsStore.renderDictationUserMessage(promptText: prompt, transcript: transcript) + XCTAssertEqual(result, "Rewrite cleanly: um so like yeah") + } + + // MARK: - Helpers + + private func withPromptSettingsRestored(_ run: () -> Void) { + let keys = [ + "DictationPromptProfiles", + "SelectedDictationPromptID", + "DictationPromptOff", + ] + let defaults = UserDefaults.standard + var snapshot: [String: Any] = [:] + for key in keys { + if let v = defaults.object(forKey: key) { snapshot[key] = v } + } + defer { + for key in keys { + if let v = snapshot[key] { defaults.set(v, forKey: key) } + else { defaults.removeObject(forKey: key) } + } + } + run() + } +} From 4e92e4e75d8ae4dff6310fe421f140c8aafe6e83 Mon Sep 17 00:00:00 2001 From: kayaba-attribution Date: Sat, 27 Jun 2026 11:58:38 -0400 Subject: [PATCH 2/3] Fix system prompt placement in ContentView.processTextWithAI (#388) DictationPostProcessingService (fixed in the previous commit) is only called from the local API endpoint. The actual user-facing dictation path goes through ContentView.processTextWithAI, which had its own independent prompt assembly that folded instructions + transcript into a single user turn via renderDictationUserMessage. This aligned the ContentView path with the correct behaviour: prompt goes in the system role, transcript (or user input) is the sole user turn. Both isDictationCall and non-dictation branches now do the same thing, so the conditional is removed. --- Sources/Fluid/ContentView.swift | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift index 65ba6239..a5043591 100644 --- a/Sources/Fluid/ContentView.swift +++ b/Sources/Fluid/ContentView.swift @@ -1807,23 +1807,12 @@ struct ContentView: View { return self.buildSystemPrompt(appInfo: appInfo, dictationSlot: dictationSlot) }() - // Dictation enhancement folds the prompt + transcript into a single user - // turn (substituting `${transcript}` when present, otherwise appending - // the transcript after a blank line). Non-dictation callers — the AI - // chat tab specifically — keep the legacy two-message layout where - // the prompt is the system turn and the input is the user turn. - let systemPrompt: String - let userMessageContent: String - if isDictationCall { - systemPrompt = "" - userMessageContent = SettingsStore.renderDictationUserMessage( - promptText: promptText, - transcript: inputText - ) - } else { - systemPrompt = promptText - userMessageContent = inputText - } + // Instructions always go in the system role; the transcript (or user + // input) is always the sole user turn. Folding both into the user message + // was the previous behaviour for dictation calls, but it causes weaker + // models to answer the transcript rather than apply the instructions. + let systemPrompt = promptText + let userMessageContent = inputText // Route to Apple Intelligence if selected if currentSelectedProviderID == "apple-intelligence" { From a7a0d1c18ede8ad66c9d1765a0d41e657c513c16 Mon Sep 17 00:00:00 2001 From: kayaba-attribution Date: Sat, 27 Jun 2026 13:13:05 -0400 Subject: [PATCH 3/3] fix: restore \${transcript} placeholder substitution in system role Add SettingsStore.renderSystemPrompt(promptText:transcript:) which substitutes the \${transcript} placeholder when present and returns the prompt unchanged otherwise. Call it in ContentView.processTextWithAI and DictationPostProcessingService before assigning the system prompt, so saved prompt templates that reference the placeholder continue to work under the new two-role message layout. Update DictationSystemPromptTests to test renderSystemPrompt (the production path) instead of the now-production-dead renderDictationUserMessage. Fixes the P2 raised in review of #439. Co-Authored-By: Claude Sonnet 4.6 --- Sources/Fluid/ContentView.swift | 2 +- Sources/Fluid/Persistence/SettingsStore.swift | 8 ++++++ .../DictationPostProcessingService.swift | 5 +++- .../DictationSystemPromptTests.swift | 25 +++++++++---------- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift index a5043591..2604647a 100644 --- a/Sources/Fluid/ContentView.swift +++ b/Sources/Fluid/ContentView.swift @@ -1811,7 +1811,7 @@ struct ContentView: View { // input) is always the sole user turn. Folding both into the user message // was the previous behaviour for dictation calls, but it causes weaker // models to answer the transcript rather than apply the instructions. - let systemPrompt = promptText + let systemPrompt = SettingsStore.renderSystemPrompt(promptText: promptText, transcript: inputText) let userMessageContent = inputText // Route to Apple Intelligence if selected diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index 936f530e..51fe5b89 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -1149,6 +1149,14 @@ final class SettingsStore: ObservableObject { return promptText + "\n\n" + transcript } + /// Substitute `${transcript}` in a system-role prompt template. + /// Unlike `renderDictationUserMessage`, this never appends the transcript — + /// the transcript is always sent as a separate user turn. + static func renderSystemPrompt(promptText: String, transcript: String) -> String { + guard promptText.contains(self.transcriptPlaceholder) else { return promptText } + return promptText.replacingOccurrences(of: self.transcriptPlaceholder, with: transcript) + } + private func defaultPromptResolution( for mode: PromptMode, source: PromptResolutionSource, diff --git a/Sources/Fluid/Services/DictationPostProcessingService.swift b/Sources/Fluid/Services/DictationPostProcessingService.swift index e24847df..48169e9f 100644 --- a/Sources/Fluid/Services/DictationPostProcessingService.swift +++ b/Sources/Fluid/Services/DictationPostProcessingService.swift @@ -73,7 +73,10 @@ final class DictationPostProcessingService { ) } - let systemPrompt = settings.effectiveDictationSystemPrompt(for: dictationSlot, appBundleID: nil) + let systemPrompt = SettingsStore.renderSystemPrompt( + promptText: settings.effectiveDictationSystemPrompt(for: dictationSlot, appBundleID: nil), + transcript: trimmed + ) let userMessageContent = trimmed if resolved.providerID == "apple-intelligence" { diff --git a/Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift b/Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift index 71e529af..0c89474e 100644 --- a/Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift +++ b/Tests/FluidDictationIntegrationTests/DictationSystemPromptTests.swift @@ -8,11 +8,10 @@ import XCTest @MainActor final class DictationSystemPromptTests: XCTestCase { - // MARK: - effectiveDictationSystemPrompt func testEffectiveDictationSystemPrompt_returnsConfiguredPrompt() { - withPromptSettingsRestored { + self.withPromptSettingsRestored { let settings = SettingsStore.shared let custom = SettingsStore.DictationPromptProfile( name: "Test Profile", @@ -29,7 +28,7 @@ final class DictationSystemPromptTests: XCTestCase { } func testEffectiveDictationSystemPrompt_offSelection_returnsDefault() { - withPromptSettingsRestored { + self.withPromptSettingsRestored { let settings = SettingsStore.shared settings.setDictationPromptSelection(.off) @@ -40,21 +39,21 @@ final class DictationSystemPromptTests: XCTestCase { } } - // MARK: - renderDictationUserMessage (user message must be only the transcript) + // MARK: - renderSystemPrompt (${transcript} placeholder in the system role) - func testRenderDictationUserMessage_emptyPrompt_returnsOnlyTranscript() { - // After the fix, userMessageContent = trimmed (the raw transcript). - // renderDictationUserMessage("", transcript:) must return only the transcript. - let transcript = "this is the dictated text" - let result = SettingsStore.renderDictationUserMessage(promptText: "", transcript: transcript) - XCTAssertEqual(result, transcript, "user message with empty promptText must be the transcript only — no instructions appended") + func testRenderSystemPrompt_noPlaceholder_returnsPromptUnchanged() { + let prompt = "Clean up the transcript." + let result = SettingsStore.renderSystemPrompt(promptText: prompt, transcript: "hello world") + XCTAssertEqual(result, prompt, "prompt without placeholder must be returned unchanged") } - func testRenderDictationUserMessage_transcriptPlaceholder_isReplacedCorrectly() { - // Verify placeholder substitution is not broken by the refactor. + func testRenderSystemPrompt_transcriptPlaceholder_isSubstitutedInSystemRole() { + // Users can embed ${transcript} in their system prompt template so the transcript + // appears inline in their instructions. renderSystemPrompt must substitute it before + // the prompt is sent to the provider as the system message. let prompt = "Rewrite cleanly: \(SettingsStore.transcriptPlaceholder)" let transcript = "um so like yeah" - let result = SettingsStore.renderDictationUserMessage(promptText: prompt, transcript: transcript) + let result = SettingsStore.renderSystemPrompt(promptText: prompt, transcript: transcript) XCTAssertEqual(result, "Rewrite cleanly: um so like yeah") }