mattmezza · mattmezza · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/skills/voice.md b/skills/voice.md
@@ -17,3 +17,20 @@ Use voice responses when:
 Do NOT use voice responses when:
 - The response contains code, links, or structured data.
 - The response is long or complex.
+
+## Writing for voice
+
+When you add `[respond_with_voice]`, write the whole message to be *spoken*, not
+read. The medium changed, so the style changes with it. Before deciding on voice,
+ask: does this content even work aloud? If it only makes sense on screen, reply
+with text instead.
+
+A voice reply must contain only plain, speakable words:
+- No emojis, no symbols (`*`, `#`, `~`, `>`, etc.) — say the meaning instead.
+- No URLs — describe the link ("I sent the booking page") or send it as text.
+- No code snippets, tables, or structured/markdown formatting.
+- No bullet points or dashes as list markers — speak it as flowing sentences
+  ("First… then… finally…").
+- Spell awkward things out: say "version one point two", not "v1.2".
+
+Keep it short and conversational, the way you'd actually say it out loud.
diff --git a/tests/test_voice_clean.py b/tests/test_voice_clean.py
@@ -0,0 +1,32 @@
+from voice.pipeline import clean_for_speech
+
+
+def test_strips_emoji():
+    assert clean_for_speech("Done 👍 ✅") == "Done"
+
+
+def test_strips_urls():
+    assert "http" not in clean_for_speech("See https://example.com/x for more")
+    assert "www" not in clean_for_speech("Visit www.example.com now")
+
+
+def test_strips_code():
+    assert clean_for_speech("Run `npm install` then go") == "Run then go"
+    assert clean_for_speech("Code:\n```\nx = 1\n```\ndone") == "Code:\ndone"
+
+
+def test_strips_markdown_symbols():
+    assert clean_for_speech("**bold** and #heading") == "bold and heading"
+
+
+def test_strips_bullets():
+    assert clean_for_speech("- first\n- second") == "first\nsecond"
+
+
+def test_dash_separator_becomes_pause():
+    assert clean_for_speech("yes — really") == "yes, really"
+    assert clean_for_speech("e-mail stays") == "e-mail stays"
+
+
+def test_plain_text_untouched():
+    assert clean_for_speech("Hello there, how are you?") == "Hello there, how are you?"
diff --git a/voice/pipeline.py b/voice/pipeline.py
@@ -4,6 +4,8 @@
 
 import io
 import logging
+import re
+import unicodedata
 from functools import partial
 from typing import TYPE_CHECKING
 
@@ -15,6 +17,33 @@
 
 log = logging.getLogger(__name__)
 
+_CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL)  # fenced code
+_INLINE_CODE_RE = re.compile(r"`[^`]*`")
+_URL_RE = re.compile(r"\b(?:https?://|www\.)\S+", re.IGNORECASE)
+_LIST_MARKER_RE = re.compile(r"^[ \t]*[-*•‣◦]+[ \t]+", re.MULTILINE)  # leading bullets
+_MD_SYMBOLS_RE = re.compile(r"[*#_~>`|]")  # markdown emphasis/heading/table chars
+_WS_RE = re.compile(r"[ \t]{2,}")
+
+
+def clean_for_speech(text: str) -> str:
+    """Strip anything that reads badly when spoken: code, URLs, emojis, markdown.
+
+    Voice replies should be plain speakable text — no emojis, bullets, code
+    snippets, URLs, or symbols like * and #.  See issue #10.
+    """
+    text = _CODE_BLOCK_RE.sub(" ", text)
+    text = _INLINE_CODE_RE.sub(" ", text)
+    text = _URL_RE.sub(" ", text)
+    text = _LIST_MARKER_RE.sub("", text)
+    # dashes used as separators → pause; keep hyphens inside words
+    text = re.sub(r"\s[-–—]+\s", ", ", text)
+    text = _MD_SYMBOLS_RE.sub("", text)
+    # drop emoji & other pictographic symbols (unicode category "So")
+    text = "".join(ch for ch in text if unicodedata.category(ch) != "So")
+    text = _WS_RE.sub(" ", text)
+    lines = (line.strip() for line in text.splitlines())
+    return "\n".join(line for line in lines if line).strip()
+
 
 class VoicePipeline:
     """Speech-to-text via faster-whisper, text-to-speech via edge-tts."""
@@ -62,6 +91,7 @@ async def synthesize(self, text: str) -> bytes:
         if not self.tts_enabled:
             raise RuntimeError("TTS is disabled in config")
 
+        text = clean_for_speech(text)
         communicate = edge_tts.Communicate(text, self.tts_voice)
         buf = io.BytesIO()
         async for chunk in communicate.stream():