diff --git a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 8bad378e..c120eabd 100644 --- a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -82,6 +82,15 @@ "version" : "1.6.0" } }, + { + "identity" : "swift-atomics", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-atomics.git", + "state" : { + "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7", + "version" : "1.3.0" + } + }, { "identity" : "swift-collections", "kind" : "remoteSourceControl", @@ -127,13 +136,22 @@ "version" : "1.6.4" } }, + { + "identity" : "swift-nio", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-nio.git", + "state" : { + "revision" : "77b84ac2cd2ac9e4ac67d19f045fd5b434f56967", + "version" : "2.101.0" + } + }, { "identity" : "swift-sdk", "kind" : "remoteSourceControl", "location" : "https://github.com/modelcontextprotocol/swift-sdk.git", "state" : { - "revision" : "c0407a0b52677cb395d824cac2879b963075ba8c", - "version" : "0.10.2" + "revision" : "a0ae212ebf6eab5f754c3129608bc5557637e605", + "version" : "0.12.1" } }, { diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift index eece55f6..3641c5d4 100644 --- a/Sources/Fluid/ContentView.swift +++ b/Sources/Fluid/ContentView.swift @@ -1621,15 +1621,17 @@ struct ContentView: View { return self.buildSystemPrompt(appInfo: appInfo, dictationSlot: dictationSlot) }() - // Dictation enhancement folds the prompt + transcript into a single user - // turn (substituting `${transcript}` when present, otherwise appending - // the transcript after a blank line). Non-dictation callers — the AI - // chat tab specifically — keep the legacy two-message layout where - // the prompt is the system turn and the input is the user turn. + // Dictation enhancement normally sends prompt instructions as the + // system turn and only the tagged transcript as the user turn. Explicit + // `${transcript}` templates keep their legacy single user-message shape. + // Non-dictation callers — the AI chat tab specifically — keep the + // legacy two-message layout where the prompt is the system turn and + // the input is the user turn. let systemPrompt: String let userMessageContent: String if isDictationCall { - systemPrompt = "" + let usesTranscriptTemplate = promptText.contains(SettingsStore.transcriptPlaceholder) + systemPrompt = usesTranscriptTemplate ? "" : promptText userMessageContent = SettingsStore.renderDictationUserMessage( promptText: promptText, transcript: inputText @@ -1751,10 +1753,10 @@ struct ContentView: View { ) } - // Build messages array. For dictation enhancement the whole prompt + - // transcript is folded into a single user message, so we omit the - // (empty) system role. Non-dictation callers keep the legacy - // system + user shape. + // Build messages array. Dictation enhancement uses system + user by + // default, but explicit `${transcript}` templates omit the system role + // because the full template is already the user message. Non-dictation + // callers keep the legacy system + user shape. var messages: [[String: Any]] = [] if !systemPrompt.isEmpty { messages.append(["role": "system", "content": systemPrompt]) diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index fde59d82..517d86ba 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -1019,18 +1019,16 @@ final class SettingsStore: ObservableObject { /// when composing the user message for a dictation enhancement call. static let transcriptPlaceholder = "${transcript}" - /// Compose the user-turn string for a dictation enhancement call by folding - /// the transcript into the prompt template. If the template contains the - /// `${transcript}` placeholder, the placeholder is replaced; otherwise - /// the transcript is appended after a blank line, matching the pre-PR - /// behaviour of sending the transcript as a separate user message. + /// Compose the user-turn string for a dictation enhancement call. + /// If the prompt explicitly contains the `${transcript}` placeholder, keep + /// honoring that full user-message template. Otherwise the prompt belongs + /// in the system turn, and the user turn contains only tagged transcript + /// content. static func renderDictationUserMessage(promptText: String, transcript: String) -> String { if promptText.contains(self.transcriptPlaceholder) { return promptText.replacingOccurrences(of: self.transcriptPlaceholder, with: transcript) } - let trimmedPrompt = promptText.trimmingCharacters(in: .whitespacesAndNewlines) - if trimmedPrompt.isEmpty { return transcript } - return promptText + "\n\n" + transcript + return "\n\(transcript)\n" } private func defaultPromptResolution( diff --git a/Sources/Fluid/Services/DictationPostProcessingService.swift b/Sources/Fluid/Services/DictationPostProcessingService.swift index 28ec06d3..bd7f3b53 100644 --- a/Sources/Fluid/Services/DictationPostProcessingService.swift +++ b/Sources/Fluid/Services/DictationPostProcessingService.swift @@ -67,7 +67,8 @@ final class DictationPostProcessingService { } let promptText = settings.effectiveDictationSystemPrompt(for: dictationSlot, appBundleID: nil) - let systemPrompt = "" + let usesTranscriptTemplate = promptText.contains(SettingsStore.transcriptPlaceholder) + let systemPrompt = usesTranscriptTemplate ? "" : promptText let userMessageContent = SettingsStore.renderDictationUserMessage( promptText: promptText, transcript: trimmed diff --git a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift index 9d441a7f..7d6f00ad 100644 --- a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift +++ b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift @@ -267,6 +267,16 @@ final class DictationE2ETests: XCTestCase { } } + func testDictationUserMessageWrapsTranscriptWithoutPromptInstructions() { + let userMessage = SettingsStore.renderDictationUserMessage( + promptText: "Clean up this transcript and output only the edited text.", + transcript: "hello fluid voice" + ) + + XCTAssertEqual(userMessage, "\nhello fluid voice\n") + XCTAssertFalse(userMessage.contains("Clean up this transcript")) + } + func testCustomProviderSettingsRoundTripThroughSettingsStore() { self.withProviderSettingsRestored { let settings = SettingsStore.shared