diff --git a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
index 8bad378e..c120eabd 100644
--- a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
+++ b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
@@ -82,6 +82,15 @@
"version" : "1.6.0"
}
},
+ {
+ "identity" : "swift-atomics",
+ "kind" : "remoteSourceControl",
+ "location" : "https://github.com/apple/swift-atomics.git",
+ "state" : {
+ "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7",
+ "version" : "1.3.0"
+ }
+ },
{
"identity" : "swift-collections",
"kind" : "remoteSourceControl",
@@ -127,13 +136,22 @@
"version" : "1.6.4"
}
},
+ {
+ "identity" : "swift-nio",
+ "kind" : "remoteSourceControl",
+ "location" : "https://github.com/apple/swift-nio.git",
+ "state" : {
+ "revision" : "77b84ac2cd2ac9e4ac67d19f045fd5b434f56967",
+ "version" : "2.101.0"
+ }
+ },
{
"identity" : "swift-sdk",
"kind" : "remoteSourceControl",
"location" : "https://github.com/modelcontextprotocol/swift-sdk.git",
"state" : {
- "revision" : "c0407a0b52677cb395d824cac2879b963075ba8c",
- "version" : "0.10.2"
+ "revision" : "a0ae212ebf6eab5f754c3129608bc5557637e605",
+ "version" : "0.12.1"
}
},
{
diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift
index eece55f6..3641c5d4 100644
--- a/Sources/Fluid/ContentView.swift
+++ b/Sources/Fluid/ContentView.swift
@@ -1621,15 +1621,17 @@ struct ContentView: View {
return self.buildSystemPrompt(appInfo: appInfo, dictationSlot: dictationSlot)
}()
- // Dictation enhancement folds the prompt + transcript into a single user
- // turn (substituting `${transcript}` when present, otherwise appending
- // the transcript after a blank line). Non-dictation callers — the AI
- // chat tab specifically — keep the legacy two-message layout where
- // the prompt is the system turn and the input is the user turn.
+ // Dictation enhancement normally sends prompt instructions as the
+ // system turn and only the tagged transcript as the user turn. Explicit
+ // `${transcript}` templates keep their legacy single user-message shape.
+ // Non-dictation callers — the AI chat tab specifically — keep the
+ // legacy two-message layout where the prompt is the system turn and
+ // the input is the user turn.
let systemPrompt: String
let userMessageContent: String
if isDictationCall {
- systemPrompt = ""
+ let usesTranscriptTemplate = promptText.contains(SettingsStore.transcriptPlaceholder)
+ systemPrompt = usesTranscriptTemplate ? "" : promptText
userMessageContent = SettingsStore.renderDictationUserMessage(
promptText: promptText,
transcript: inputText
@@ -1751,10 +1753,10 @@ struct ContentView: View {
)
}
- // Build messages array. For dictation enhancement the whole prompt +
- // transcript is folded into a single user message, so we omit the
- // (empty) system role. Non-dictation callers keep the legacy
- // system + user shape.
+ // Build messages array. Dictation enhancement uses system + user by
+ // default, but explicit `${transcript}` templates omit the system role
+ // because the full template is already the user message. Non-dictation
+ // callers keep the legacy system + user shape.
var messages: [[String: Any]] = []
if !systemPrompt.isEmpty {
messages.append(["role": "system", "content": systemPrompt])
diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift
index fde59d82..517d86ba 100644
--- a/Sources/Fluid/Persistence/SettingsStore.swift
+++ b/Sources/Fluid/Persistence/SettingsStore.swift
@@ -1019,18 +1019,16 @@ final class SettingsStore: ObservableObject {
/// when composing the user message for a dictation enhancement call.
static let transcriptPlaceholder = "${transcript}"
- /// Compose the user-turn string for a dictation enhancement call by folding
- /// the transcript into the prompt template. If the template contains the
- /// `${transcript}` placeholder, the placeholder is replaced; otherwise
- /// the transcript is appended after a blank line, matching the pre-PR
- /// behaviour of sending the transcript as a separate user message.
+ /// Compose the user-turn string for a dictation enhancement call.
+ /// If the prompt explicitly contains the `${transcript}` placeholder, keep
+ /// honoring that full user-message template. Otherwise the prompt belongs
+ /// in the system turn, and the user turn contains only tagged transcript
+ /// content.
static func renderDictationUserMessage(promptText: String, transcript: String) -> String {
if promptText.contains(self.transcriptPlaceholder) {
return promptText.replacingOccurrences(of: self.transcriptPlaceholder, with: transcript)
}
- let trimmedPrompt = promptText.trimmingCharacters(in: .whitespacesAndNewlines)
- if trimmedPrompt.isEmpty { return transcript }
- return promptText + "\n\n" + transcript
+ return "\n\(transcript)\n"
}
private func defaultPromptResolution(
diff --git a/Sources/Fluid/Services/DictationPostProcessingService.swift b/Sources/Fluid/Services/DictationPostProcessingService.swift
index 28ec06d3..bd7f3b53 100644
--- a/Sources/Fluid/Services/DictationPostProcessingService.swift
+++ b/Sources/Fluid/Services/DictationPostProcessingService.swift
@@ -67,7 +67,8 @@ final class DictationPostProcessingService {
}
let promptText = settings.effectiveDictationSystemPrompt(for: dictationSlot, appBundleID: nil)
- let systemPrompt = ""
+ let usesTranscriptTemplate = promptText.contains(SettingsStore.transcriptPlaceholder)
+ let systemPrompt = usesTranscriptTemplate ? "" : promptText
let userMessageContent = SettingsStore.renderDictationUserMessage(
promptText: promptText,
transcript: trimmed
diff --git a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift
index 9d441a7f..7d6f00ad 100644
--- a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift
+++ b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift
@@ -267,6 +267,16 @@ final class DictationE2ETests: XCTestCase {
}
}
+ func testDictationUserMessageWrapsTranscriptWithoutPromptInstructions() {
+ let userMessage = SettingsStore.renderDictationUserMessage(
+ promptText: "Clean up this transcript and output only the edited text.",
+ transcript: "hello fluid voice"
+ )
+
+ XCTAssertEqual(userMessage, "\nhello fluid voice\n")
+ XCTAssertFalse(userMessage.contains("Clean up this transcript"))
+ }
+
func testCustomProviderSettingsRoundTripThroughSettingsStore() {
self.withProviderSettingsRestored {
let settings = SettingsStore.shared