altic-dev · sdhilip200 · Jun 21, 2026
diff --git a/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Fluid.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift
@@ -1621,15 +1621,17 @@ struct ContentView: View {
             return self.buildSystemPrompt(appInfo: appInfo, dictationSlot: dictationSlot)
         }()
 
-        // Dictation enhancement folds the prompt + transcript into a single user
-        // turn (substituting `${transcript}` when present, otherwise appending
-        // the transcript after a blank line). Non-dictation callers — the AI
-        // chat tab specifically — keep the legacy two-message layout where
-        // the prompt is the system turn and the input is the user turn.
+        // Dictation enhancement normally sends prompt instructions as the
+        // system turn and only the tagged transcript as the user turn. Explicit
+        // `${transcript}` templates keep their legacy single user-message shape.
+        // Non-dictation callers — the AI chat tab specifically — keep the
+        // legacy two-message layout where the prompt is the system turn and
+        // the input is the user turn.
         let systemPrompt: String
         let userMessageContent: String
         if isDictationCall {
-            systemPrompt = ""
+            let usesTranscriptTemplate = promptText.contains(SettingsStore.transcriptPlaceholder)
+            systemPrompt = usesTranscriptTemplate ? "" : promptText
             userMessageContent = SettingsStore.renderDictationUserMessage(
                 promptText: promptText,
                 transcript: inputText
@@ -1751,10 +1753,10 @@ struct ContentView: View {
             )
         }
 
-        // Build messages array. For dictation enhancement the whole prompt +
-        // transcript is folded into a single user message, so we omit the
-        // (empty) system role. Non-dictation callers keep the legacy
-        // system + user shape.
+        // Build messages array. Dictation enhancement uses system + user by
+        // default, but explicit `${transcript}` templates omit the system role
+        // because the full template is already the user message. Non-dictation
+        // callers keep the legacy system + user shape.
         var messages: [[String: Any]] = []
         if !systemPrompt.isEmpty {
             messages.append(["role": "system", "content": systemPrompt])

diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift
@@ -1019,18 +1019,16 @@ final class SettingsStore: ObservableObject {
     /// when composing the user message for a dictation enhancement call.
     static let transcriptPlaceholder = "${transcript}"
 
-    /// Compose the user-turn string for a dictation enhancement call by folding
-    /// the transcript into the prompt template. If the template contains the
-    /// `${transcript}` placeholder, the placeholder is replaced; otherwise
-    /// the transcript is appended after a blank line, matching the pre-PR
-    /// behaviour of sending the transcript as a separate user message.
+    /// Compose the user-turn string for a dictation enhancement call.
+    /// If the prompt explicitly contains the `${transcript}` placeholder, keep
+    /// honoring that full user-message template. Otherwise the prompt belongs
+    /// in the system turn, and the user turn contains only tagged transcript
+    /// content.
     static func renderDictationUserMessage(promptText: String, transcript: String) -> String {
         if promptText.contains(self.transcriptPlaceholder) {
             return promptText.replacingOccurrences(of: self.transcriptPlaceholder, with: transcript)
         }
-        let trimmedPrompt = promptText.trimmingCharacters(in: .whitespacesAndNewlines)
-        if trimmedPrompt.isEmpty { return transcript }
-        return promptText + "\n\n" + transcript
+        return "<transcript>\n\(transcript)\n</transcript>"
     }
 
     private func defaultPromptResolution(

diff --git a/Sources/Fluid/Services/DictationPostProcessingService.swift b/Sources/Fluid/Services/DictationPostProcessingService.swift
@@ -67,7 +67,8 @@ final class DictationPostProcessingService {
         }
 
         let promptText = settings.effectiveDictationSystemPrompt(for: dictationSlot, appBundleID: nil)
-        let systemPrompt = ""
+        let usesTranscriptTemplate = promptText.contains(SettingsStore.transcriptPlaceholder)
+        let systemPrompt = usesTranscriptTemplate ? "" : promptText
         let userMessageContent = SettingsStore.renderDictationUserMessage(
             promptText: promptText,
             transcript: trimmed

diff --git a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift
@@ -267,6 +267,16 @@ final class DictationE2ETests: XCTestCase {
         }
     }
 
+    func testDictationUserMessageWrapsTranscriptWithoutPromptInstructions() {
+        let userMessage = SettingsStore.renderDictationUserMessage(
+            promptText: "Clean up this transcript and output only the edited text.",
+            transcript: "hello fluid voice"
+        )
+
+        XCTAssertEqual(userMessage, "<transcript>\nhello fluid voice\n</transcript>")
+        XCTAssertFalse(userMessage.contains("Clean up this transcript"))
+    }
+
     func testCustomProviderSettingsRoundTripThroughSettingsStore() {
         self.withProviderSettingsRestored {
             let settings = SettingsStore.shared