diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5df72ed..8e138c4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -57,9 +57,9 @@ jobs:
           r = client.get('/health')
           assert r.status_code == 200
           data = r.json()
-          assert data['status'] == 'ok'
-          assert 'engines_loaded' in data
-          assert 'vad_loaded' in data
+          assert data['status'] in ('ok', 'degraded')
+          assert 'engines' in data
+          assert 'modalities' in data
 
           # Voices endpoint
           r = client.get('/v1/voices')
diff --git a/.gitignore b/.gitignore
index fda447b..b53a61e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,9 @@ __pycache__/
 *.pyc
 .DS_Store
 .claude/
+
+# ONNX Runtime WASM binaries (large, download via setup.sh)
+dashboard/vad/*.wasm
+dashboard/vad/*.mjs
+dashboard/vad/ort.js
+dashboard/vad/ort.min.mjs
diff --git a/agent_loop.py b/agent_loop.py
new file mode 100644
index 0000000..449fcba
--- /dev/null
+++ b/agent_loop.py
@@ -0,0 +1,262 @@
+"""Agent loop — receives percepts, calls LLM with tools, dispatches actions.
+
+The agent loop is the bridge between the ModalityBus (perception/action)
+and the InferenceProvider (thinking). It maintains conversation history
+and routes tool calls through the bus.
+"""
+
+from __future__ import annotations
+
+import json as _json
+import logging
+import os
+import time
+from typing import TYPE_CHECKING
+
+import httpx
+
+from bus import ModalityBus
+from modality import CognitiveEvent, CognitiveIntent, ModalityType
+from pipeline_state import PipelineState
+from providers import AGENT_TOOLS, InferenceProvider
+
+if TYPE_CHECKING:
+    from channels import BrowserChannel
+
+logger = logging.getLogger("mod3.agent_loop")
+
+# Base system prompt — kernel context is appended dynamically
+_BASE_SYSTEM_PROMPT = (
+    "You are Cog, a voice assistant running on Mod³ (Apple Silicon, fully local). "
+    "You respond using tool calls. Use speak() for conversational voice responses — "
+    "keep them concise, 1-3 sentences. Use send_text() only when the content is "
+    "better read than heard (code, lists, links, structured data). "
+    "No markdown in speak() text. Speak naturally. "
+    "If the user asks something you can't do, say so briefly."
+)
+
+# CogOS kernel endpoint for context enrichment
+_COGOS_ENDPOINT = os.environ.get("COGOS_ENDPOINT", "http://localhost:6931")
+
+# Bus endpoint for logging exchanges (observation channel)
+_COGOS_BUS_ENDPOINT = f"{_COGOS_ENDPOINT}/v1/bus"
+
+
+def _fetch_kernel_context() -> str:
+    """Pull active context from CogOS kernel to enrich the system prompt.
+
+    Returns a context block string, or empty string if kernel unavailable.
+    This is the afferent path: kernel → local model.
+    """
+    try:
+        resp = httpx.get(f"{_COGOS_ENDPOINT}/health", timeout=2.0)
+        if resp.status_code != 200:
+            return ""
+        health = resp.json()
+
+        parts = []
+        identity = health.get("identity", "cog")
+        state = health.get("state", "unknown")
+        parts.append(f"Kernel identity: {identity}, state: {state}")
+
+        # Try to get active session context
+        try:
+            ctx_resp = httpx.get(f"{_COGOS_ENDPOINT}/v1/context", timeout=2.0)
+            if ctx_resp.status_code == 200:
+                ctx = ctx_resp.json()
+                nucleus = ctx.get("nucleus", "")
+                if nucleus:
+                    parts.append(f"Active nucleus: {nucleus}")
+                process_state = ctx.get("state", "")
+                if process_state:
+                    parts.append(f"Process state: {process_state}")
+        except Exception:
+            pass
+
+        # Check for barge-in context (what was Claude saying when interrupted?)
+        signal_file = os.environ.get("BARGEIN_SIGNAL", "/tmp/mod3-barge-in.json")
+        try:
+            if os.path.exists(signal_file):
+                with open(signal_file) as f:
+                    signal = _json.load(f)
+                interrupted = signal.get("interrupted")
+                if interrupted:
+                    delivered = interrupted.get("delivered_text", "")
+                    pct = interrupted.get("spoken_pct", 0)
+                    parts.append(
+                        f"[barge-in] Claude's speech was interrupted at {pct * 100:.0f}%. "
+                        f'Delivered: "{delivered}". '
+                        f"The user interrupted to say something — acknowledge and respond to them."
+                    )
+        except Exception:
+            pass
+
+        if parts:
+            return "\n\nKernel context:\n" + "\n".join(f"- {p}" for p in parts)
+        return ""
+    except Exception:
+        return ""
+
+
+def _log_exchange_to_bus(user_text: str, assistant_text: str, provider_name: str):
+    """Log the local model exchange to the CogOS bus (observation channel).
+
+    This is the efferent path: local model → kernel → Claude can observe.
+    """
+    try:
+        payload = {
+            "type": "modality.voice.exchange",
+            "from": f"mod3-reflex:{provider_name}",
+            "payload": {
+                "user": user_text,
+                "assistant": assistant_text,
+                "provider": provider_name,
+                "timestamp": time.time(),
+            },
+        }
+        httpx.post(
+            _COGOS_BUS_ENDPOINT,
+            json=payload,
+            timeout=2.0,
+        )
+    except Exception as e:
+        logger.debug("Failed to log exchange to bus: %s", e)
+
+
+MAX_HISTORY = 50
+
+
+class AgentLoop:
+    """Conversational agent that receives percepts and acts through the bus."""
+
+    def __init__(
+        self,
+        bus: ModalityBus,
+        provider: InferenceProvider,
+        pipeline_state: PipelineState,
+        channel_id: str = "",
+    ):
+        self.bus = bus
+        self.provider = provider
+        self.pipeline_state = pipeline_state
+        self.channel_id = channel_id
+        self.conversation: list[dict[str, str]] = []
+        self._channel_ref: BrowserChannel | None = None
+        self._processing = False
+
+    async def handle_event(self, event: CognitiveEvent) -> None:
+        """Called when a CognitiveEvent arrives from the channel."""
+        if not event.content.strip():
+            return
+
+        if self._processing:
+            logger.warning("agent busy, dropping: %s", event.content[:50])
+            return
+
+        self._processing = True
+        try:
+            await self._process(event)
+        except Exception as e:
+            logger.error("agent_loop error: %s", e, exc_info=True)
+            try:
+                if self._channel_ref:
+                    await self._channel_ref.send_response_text(f"[error: {e}]")
+                    await self._channel_ref.send_response_complete()
+            except Exception:
+                pass  # channel may be dead, don't block finally
+        finally:
+            self._processing = False
+
+    async def _process(self, event: CognitiveEvent) -> None:
+        """Core: event → provider → tool dispatch."""
+        self.conversation.append({"role": "user", "content": event.content})
+        self._trim_history()
+
+        t_start = time.perf_counter()
+
+        # Assemble system prompt with kernel context (afferent path)
+        kernel_ctx = _fetch_kernel_context()
+        system_prompt = _BASE_SYSTEM_PROMPT + kernel_ctx
+
+        response = await self.provider.chat(
+            messages=self.conversation,
+            tools=AGENT_TOOLS,
+            system=system_prompt,
+        )
+
+        t_llm = (time.perf_counter() - t_start) * 1000
+
+        # Dispatch tool calls
+        assistant_parts: list[str] = []
+
+        for tc in response.tool_calls:
+            if tc.name == "speak":
+                text = tc.arguments.get("text", "")
+                if text:
+                    assistant_parts.append(text)
+                    # Show text in chat panel
+                    if self._channel_ref:
+                        await self._channel_ref.send_response_text(text)
+                    # Route through bus → VoiceEncoder → TTS → channel.deliver
+                    intent = CognitiveIntent(
+                        modality=ModalityType.VOICE,
+                        content=text,
+                        target_channel=self.channel_id,
+                        metadata={
+                            "voice": self._channel_ref.config.get("voice", "bm_lewis")
+                            if self._channel_ref
+                            else "bm_lewis",
+                            "speed": self._channel_ref.config.get("speed", 1.25) if self._channel_ref else 1.25,
+                        },
+                    )
+                    # Fire-and-forget: bus.act(blocking=False) returns QueuedJob immediately,
+                    # OutputQueue drain thread handles TTS encoding + delivery.
+                    self.bus.act(intent, channel=self.channel_id)
+
+            elif tc.name == "send_text":
+                text = tc.arguments.get("text", "")
+                if text:
+                    assistant_parts.append(text)
+                    if self._channel_ref:
+                        await self._channel_ref.send_response_text(text)
+
+        # Fallback: if provider returned text but no tool calls, auto-speak
+        if not response.tool_calls and response.text:
+            text = response.text
+            assistant_parts.append(text)
+            if self._channel_ref:
+                await self._channel_ref.send_response_text(text)
+            intent = CognitiveIntent(
+                modality=ModalityType.VOICE,
+                content=text,
+                target_channel=self.channel_id,
+                metadata={
+                    "voice": self._channel_ref.config.get("voice", "bm_lewis") if self._channel_ref else "bm_lewis",
+                    "speed": self._channel_ref.config.get("speed", 1.25) if self._channel_ref else 1.25,
+                },
+            )
+            self.bus.act(intent, channel=self.channel_id)
+
+        # Update conversation history
+        if assistant_parts:
+            assistant_text = " ".join(assistant_parts)
+            self.conversation.append(
+                {
+                    "role": "assistant",
+                    "content": assistant_text,
+                }
+            )
+
+            # Log exchange to CogOS bus (observation channel — Claude can see this)
+            _log_exchange_to_bus(event.content, assistant_text, self.provider.name)
+
+        # Signal completion
+        if self._channel_ref:
+            await self._channel_ref.send_response_complete(
+                metrics={"llm_ms": round(t_llm, 1), "provider": self.provider.name}
+            )
+
+    def _trim_history(self) -> None:
+        """Keep conversation within MAX_HISTORY messages."""
+        if len(self.conversation) > MAX_HISTORY:
+            self.conversation = self.conversation[-MAX_HISTORY:]
diff --git a/channels.py b/channels.py
new file mode 100644
index 0000000..d0ab8c8
--- /dev/null
+++ b/channels.py
@@ -0,0 +1,315 @@
+"""Browser channel — WebSocket adapter for the Mod³ dashboard.
+
+Wraps a FastAPI WebSocket connection as a ChannelDescriptor on the bus.
+Knows the WebSocket protocol (binary PCM / JSON control frames),
+knows nothing about LLMs or agent logic.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import time
+import uuid
+from typing import Any, Awaitable, Callable
+
+from fastapi import WebSocket, WebSocketDisconnect
+
+from bus import ModalityBus
+from modality import CognitiveEvent, EncodedOutput, ModalityType
+from pipeline_state import PipelineState
+
+logger = logging.getLogger("mod3.channels")
+
+
+class BrowserChannel:
+    """WebSocket-backed channel for the browser dashboard."""
+
+    def __init__(
+        self,
+        ws: WebSocket,
+        bus: ModalityBus,
+        pipeline_state: PipelineState,
+        loop: asyncio.AbstractEventLoop,
+        on_event: Callable[[CognitiveEvent], Awaitable[None]] | None = None,
+    ):
+        self.ws = ws
+        self.bus = bus
+        self.pipeline_state = pipeline_state
+        self._loop = loop
+        self._on_event = on_event
+        self.channel_id = f"browser:{uuid.uuid4().hex[:8]}"
+        self.config: dict[str, Any] = {
+            "voice": "bm_lewis",
+            "speed": 1.25,
+            "model": "kokoro",
+        }
+        self._audio_buffer = bytearray()
+        self._active = True
+
+        # Register on the bus with a delivery callback
+        bus.register_channel(
+            self.channel_id,
+            modalities=[ModalityType.VOICE, ModalityType.TEXT],
+            deliver=self._deliver_sync,
+        )
+        logger.info("BrowserChannel registered: %s", self.channel_id)
+
+    # ------------------------------------------------------------------
+    # Delivery (bus → browser)
+    # ------------------------------------------------------------------
+
+    def _deliver_sync(self, output: EncodedOutput) -> None:
+        """Called from the sync OutputQueue drain thread. Bridges to async."""
+        if not self._active:
+            return
+        try:
+            future = asyncio.run_coroutine_threadsafe(self._deliver_async(output), self._loop)
+            future.result(timeout=10.0)
+        except (WebSocketDisconnect, RuntimeError, TimeoutError):
+            logger.debug("deliver failed (client disconnected?), deactivating channel")
+            self._active = False
+
+    async def _deliver_async(self, output: EncodedOutput) -> None:
+        """Send encoded output over the WebSocket."""
+        import base64
+
+        logger.info(
+            "deliver: modality=%s format=%s bytes=%d duration=%.1fs",
+            output.modality.value if output.modality else "none",
+            output.format,
+            len(output.data) if output.data else 0,
+            output.duration_sec,
+        )
+
+        if output.modality == ModalityType.VOICE and output.data:
+            # Send audio as base64 JSON (avoids binary frame issues)
+            audio_b64 = base64.b64encode(output.data).decode("ascii")
+            logger.info("deliver: sending base64 audio JSON (%d chars)", len(audio_b64))
+            await self.ws.send_json(
+                {
+                    "type": "audio",
+                    "data": audio_b64,
+                    "format": output.format or "wav",
+                    "duration_sec": round(output.duration_sec, 2),
+                    "sample_rate": output.metadata.get("sample_rate", 24000),
+                }
+            )
+            logger.info("deliver: audio sent OK")
+        elif output.modality == ModalityType.TEXT:
+            text = output.data.decode("utf-8") if isinstance(output.data, bytes) else str(output.data)
+            logger.info("deliver: sending text response (%d chars)", len(text))
+            await self.ws.send_json({"type": "response_text", "text": text})
+        else:
+            logger.warning("deliver: unhandled modality %s, dropping", output.modality)
+
+    # ------------------------------------------------------------------
+    # Receive loop (browser → server)
+    # ------------------------------------------------------------------
+
+    async def run(self) -> None:
+        """Main receive loop — runs until WebSocket disconnects."""
+        try:
+            while True:
+                message = await self.ws.receive()
+                msg_type = message.get("type", "")
+                if msg_type == "websocket.disconnect":
+                    break
+                if "bytes" in message and message["bytes"]:
+                    self._handle_audio(message["bytes"])
+                elif "text" in message and message["text"]:
+                    await self._handle_json(json.loads(message["text"]))
+        except WebSocketDisconnect:
+            pass
+        except Exception as e:
+            logger.error("BrowserChannel error: %s", e)
+        finally:
+            self._cleanup()
+
+    def _handle_audio(self, pcm_bytes: bytes) -> None:
+        """Binary frame: raw Int16 PCM at 16kHz from browser Silero VAD."""
+        self._audio_buffer.extend(pcm_bytes)
+
+    async def _handle_json(self, msg: dict) -> None:
+        """JSON frame: control message dispatch."""
+        msg_type = msg.get("type", "")
+        logger.info("Received JSON: type=%s", msg_type)
+
+        if msg_type == "end_of_speech":
+            await self._process_utterance()
+        elif msg_type == "text_message":
+            text = msg.get("text", "").strip()
+            if text:
+                await self._process_text(text)
+        elif msg_type == "interrupt":
+            await self._handle_interrupt()
+        elif msg_type == "config":
+            for key in ("model", "voice", "speed"):
+                if key in msg:
+                    self.config[key] = msg[key]
+
+    # ------------------------------------------------------------------
+    # Processing
+    # ------------------------------------------------------------------
+
+    async def _process_utterance(self) -> None:
+        """PCM audio buffer → WhisperDecoder STT → CognitiveEvent → agent loop.
+
+        Skips the server-side VoiceGate (Silero VAD) because the browser
+        already ran Silero VAD client-side — no need to validate again,
+        and it avoids the torchaudio dependency for resampling.
+        """
+        pcm_data = bytes(self._audio_buffer)
+        self._audio_buffer.clear()
+
+        if len(pcm_data) < 6400:  # <200ms at 16kHz Int16
+            return
+
+        t0 = time.perf_counter()
+
+        # Transcribe via mlx_whisper — needs a temp WAV file
+        def _transcribe():
+            import io
+            import os
+            import struct
+            import tempfile
+
+            import mlx_whisper
+            import numpy as np
+
+            from vad import is_hallucination
+
+            audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
+
+            # Skip silence
+            if len(audio) < 16000 * 0.3:
+                return None
+            rms = float(np.sqrt(np.mean(audio**2)))
+            if rms < 0.005:
+                return None
+
+            # mlx_whisper needs a file path — write temp WAV
+            buf = io.BytesIO()
+            buf.write(b"RIFF")
+            buf.write(struct.pack("<I", 36 + len(pcm_data)))
+            buf.write(b"WAVE")
+            buf.write(b"fmt ")
+            buf.write(struct.pack("<IHHIIHH", 16, 1, 1, 16000, 32000, 2, 16))
+            buf.write(b"data")
+            buf.write(struct.pack("<I", len(pcm_data)))
+            buf.write(pcm_data)
+
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+                f.write(buf.getvalue())
+                tmp_path = f.name
+
+            try:
+                result = mlx_whisper.transcribe(
+                    tmp_path,
+                    path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
+                    language="en",
+                )
+                transcript = result.get("text", "").strip()
+                logger.info("STT: '%s' (%.1fs, rms=%.3f)", transcript[:80], len(audio) / 16000, rms)
+
+                if not transcript or is_hallucination(transcript):
+                    return None
+
+                return CognitiveEvent(
+                    modality=ModalityType.VOICE,
+                    content=transcript,
+                    source_channel=self.channel_id,
+                    confidence=1.0,
+                )
+            except Exception as e:
+                logger.error("STT failed: %s", e)
+                return None
+            finally:
+                os.unlink(tmp_path)
+
+        event = await asyncio.to_thread(_transcribe)
+
+        stt_ms = (time.perf_counter() - t0) * 1000
+
+        if event and event.content:
+            # Send transcript to browser
+            await self.ws.send_json(
+                {
+                    "type": "transcript",
+                    "text": event.content,
+                    "stt_ms": round(stt_ms, 1),
+                    "source": "voice",
+                }
+            )
+            # Forward to agent loop
+            event.metadata["stt_ms"] = stt_ms
+            if self._on_event:
+                await self._on_event(event)
+
+    async def _process_text(self, text: str) -> None:
+        """Text message → CognitiveEvent → agent loop."""
+        event = CognitiveEvent(
+            modality=ModalityType.TEXT,
+            content=text,
+            source_channel=self.channel_id,
+            confidence=1.0,
+        )
+        await self.ws.send_json(
+            {
+                "type": "transcript",
+                "text": text,
+                "source": "text",
+            }
+        )
+        if self._on_event:
+            await self._on_event(event)
+
+    async def _handle_interrupt(self) -> None:
+        """Interrupt in-flight speech."""
+        if self.pipeline_state.is_speaking:
+            self.pipeline_state.interrupt(reason="browser_interrupt")
+        await self.ws.send_json({"type": "interrupted"})
+
+    # ------------------------------------------------------------------
+    # Helper methods (called by agent loop)
+    # ------------------------------------------------------------------
+
+    async def send_response_text(self, text: str) -> None:
+        """Send response text for display in chat panel."""
+        if self._active:
+            try:
+                logger.info("send_response_text: %s", text[:100])
+                await self.ws.send_json({"type": "response_text", "text": text})
+            except Exception:
+                self._active = False
+
+    async def send_response_complete(self, metrics: dict | None = None) -> None:
+        """Signal response is complete."""
+        if self._active:
+            try:
+                await self.ws.send_json(
+                    {
+                        "type": "response_complete",
+                        "metrics": metrics or {},
+                    }
+                )
+            except Exception:
+                self._active = False
+
+    # ------------------------------------------------------------------
+    # Cleanup
+    # ------------------------------------------------------------------
+
+    def _cleanup(self) -> None:
+        """Deactivate channel and cancel pending TTS jobs on disconnect."""
+        self._active = False
+        ch = self.bus._channels.get(self.channel_id)
+        if ch:
+            ch.active = False
+        cancelled = self.bus._queue_manager.cancel_channel(self.channel_id)
+        logger.info(
+            "BrowserChannel disconnected: %s (cancelled %d pending jobs)",
+            self.channel_id,
+            cancelled,
+        )
diff --git a/dashboard/index.html b/dashboard/index.html
new file mode 100644
index 0000000..6b1f9f8
--- /dev/null
+++ b/dashboard/index.html
@@ -0,0 +1,696 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Mod³ Dashboard</title>
+<style>
+  :root {
+    --bg: #0d1117; --surface: #161b22; --border: #30363d;
+    --text: #e6edf3; --muted: #8b949e; --accent: #58a6ff;
+    --green: #3fb950; --orange: #d29922; --red: #f85149;
+  }
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: -apple-system, system-ui, sans-serif; background: var(--bg); color: var(--text); line-height: 1.5; height: 100vh; display: flex; flex-direction: column; }
+
+  /* Header bar */
+  .header {
+    display: flex; align-items: center; gap: 16px;
+    padding: 12px 24px; border-bottom: 1px solid var(--border);
+    background: var(--surface); flex-shrink: 0;
+  }
+  .header h1 { font-size: 1.2rem; margin-right: auto; }
+  .status-pill {
+    display: flex; align-items: center; gap: 6px;
+    font-size: 0.75rem; color: var(--muted);
+    padding: 4px 10px; border-radius: 12px;
+    background: var(--bg); border: 1px solid var(--border);
+  }
+  .status-dot {
+    width: 8px; height: 8px; border-radius: 50%;
+    background: var(--muted); flex-shrink: 0;
+  }
+  .status-dot.ok { background: var(--green); }
+  .status-dot.warn { background: var(--orange); }
+  .status-dot.err { background: var(--red); }
+
+  /* Main layout */
+  .main { flex: 1; display: flex; flex-direction: column; max-width: 900px; width: 100%; margin: 0 auto; padding: 16px 24px; overflow: hidden; }
+
+  /* Voice controls bar */
+  .voice-controls {
+    display: flex; align-items: center; gap: 12px;
+    padding: 12px 0; flex-shrink: 0;
+  }
+  .voice-controls .form-group { display: flex; flex-direction: column; gap: 2px; }
+  .voice-controls .form-group label { font-size: 0.7rem; color: var(--muted); text-transform: uppercase; letter-spacing: 0.5px; }
+  .voice-controls select {
+    background: var(--bg); border: 1px solid var(--border); border-radius: 4px;
+    padding: 6px 8px; color: var(--text); font-size: 0.85rem; font-family: inherit;
+  }
+  .voice-controls select:focus { outline: none; border-color: var(--accent); }
+  button.primary { background: var(--accent); color: #000; border: none; border-radius: 6px; padding: 8px 20px; font-weight: 600; cursor: pointer; font-size: 0.9rem; }
+  button.primary:hover { opacity: 0.9; }
+  button.primary:disabled { opacity: 0.4; cursor: not-allowed; }
+  #voice-connect-btn.active { background: var(--red); color: var(--text); border-color: var(--red) !important; }
+  #voice-status { font-size: 0.85rem; color: var(--muted); }
+
+  /* Mic level bar */
+  .level-bar { height: 3px; background: var(--surface); border-radius: 2px; overflow: hidden; flex-shrink: 0; }
+  .level-bar-inner { height: 100%; width: 0%; background: var(--green); transition: width 50ms; border-radius: 2px; }
+
+  /* Chat area — scrollable, takes remaining space */
+  .chat-area {
+    flex: 1; overflow-y: auto; display: flex; flex-direction: column; gap: 8px;
+    padding: 12px 0; min-height: 0;
+  }
+
+  /* Message bubbles */
+  .msg { max-width: 80%; padding: 10px 14px; border-radius: 12px; font-size: 0.9rem; line-height: 1.5; word-wrap: break-word; }
+  .msg-user { align-self: flex-end; background: #1c3a5f; border: 1px solid #264d7a; border-bottom-right-radius: 4px; }
+  .msg-assistant { align-self: flex-start; background: var(--surface); border: 1px solid var(--border); border-bottom-left-radius: 4px; }
+  .msg-role { font-size: 0.7rem; color: var(--muted); margin-bottom: 2px; text-transform: uppercase; letter-spacing: 0.5px; }
+  .msg-user .msg-role { color: var(--accent); }
+  .msg-assistant .msg-role { color: var(--green); }
+  .msg-source { font-size: 0.65rem; color: var(--muted); margin-top: 4px; font-style: italic; }
+
+  /* Voice transcript section */
+  .voice-transcript-label {
+    font-size: 0.75rem; color: var(--muted); text-transform: uppercase;
+    letter-spacing: 0.5px; padding: 8px 0 4px; flex-shrink: 0;
+  }
+
+  /* Metrics bar */
+  .metrics-bar { font-size: 0.7rem; color: var(--muted); font-variant-numeric: tabular-nums; flex-shrink: 0; padding: 4px 0; }
+
+  /* Text input area */
+  .chat-input-area {
+    display: flex; gap: 8px; padding: 12px 0 8px;
+    border-top: 1px solid var(--border); flex-shrink: 0;
+  }
+  .chat-input {
+    flex: 1; background: var(--surface); border: 1px solid var(--border); border-radius: 8px;
+    padding: 10px 14px; color: var(--text); font-size: 0.9rem; font-family: inherit;
+    resize: none; outline: none; min-height: 40px; max-height: 120px;
+  }
+  .chat-input:focus { border-color: var(--accent); }
+  .chat-input::placeholder { color: var(--muted); }
+  .send-btn {
+    background: var(--accent); color: #000; border: none; border-radius: 8px;
+    padding: 0 20px; font-weight: 600; cursor: pointer; font-size: 0.9rem;
+    align-self: flex-end; height: 40px;
+  }
+  .send-btn:hover { opacity: 0.9; }
+  .send-btn:disabled { opacity: 0.4; cursor: not-allowed; }
+
+  /* Headphone hint */
+  .hint { font-size: 0.7rem; color: var(--muted); padding: 4px 0; flex-shrink: 0; }
+
+  /* Responsive */
+  @media (max-width: 700px) {
+    .main { padding: 12px 16px; }
+    .voice-controls { flex-wrap: wrap; }
+    .msg { max-width: 90%; }
+  }
+</style>
+</head>
+<body>
+
+<!-- Header with connection status -->
+<div class="header">
+  <h1>Mod&#179;</h1>
+  <div class="status-pill">
+    <span class="status-dot" id="status-server"></span>
+    <span>Server</span>
+  </div>
+  <div class="status-pill">
+    <span class="status-dot" id="status-mic"></span>
+    <span>Mic</span>
+  </div>
+</div>
+
+<!-- Main content -->
+<div class="main">
+
+  <!-- Controls — compact bar -->
+  <div class="voice-controls">
+    <span id="voice-status" style="font-size:0.8rem;">Connecting...</span>
+    <div style="flex:1;"></div>
+    <button style="background:none; border:1px solid var(--border); border-radius:4px; padding:4px 10px; color:var(--muted); cursor:pointer; font-size:0.75rem;" id="voice-connect-btn" title="Enable microphone for voice input">&#127908; Mic</button>
+    <div class="level-bar" id="level-bar-wrap" style="flex:0 0 80px; margin:0 8px; display:none;">
+      <div class="level-bar-inner" id="voice-level"></div>
+    </div>
+    <span style="font-size:0.6rem; color:var(--muted);" id="mic-debug"></span>
+    <button style="background:none; border:1px solid var(--border); border-radius:4px; padding:4px 10px; color:var(--muted); cursor:pointer; font-size:0.85rem;" id="settings-toggle" title="Settings">&#9881;</button>
+  </div>
+
+  <!-- Settings panel (collapsed by default) -->
+  <div id="settings-panel" style="display:none; background:var(--surface); border:1px solid var(--border); border-radius:8px; padding:12px 16px; margin-bottom:8px;">
+    <div style="display:flex; gap:12px; flex-wrap:wrap; align-items:end;">
+      <div class="form-group">
+        <label>TTS Model</label>
+        <select id="voice-model">
+          <option value="kokoro" selected>Kokoro 82M (fast)</option>
+          <option value="voxtral">Voxtral 4B (quality)</option>
+          <option value="chatterbox">Chatterbox (clone)</option>
+          <option value="spark">Spark 0.5B</option>
+        </select>
+      </div>
+      <div class="form-group">
+        <label>Voice</label>
+        <select id="voice-voice">
+          <option value="">Default</option>
+          <optgroup label="Kokoro">
+            <option value="bm_lewis" selected>bm_lewis</option>
+            <option value="af_heart">af_heart</option>
+            <option value="af_bella">af_bella</option>
+            <option value="af_nicole">af_nicole</option>
+            <option value="af_sarah">af_sarah</option>
+            <option value="af_sky">af_sky</option>
+            <option value="am_adam">am_adam</option>
+            <option value="am_michael">am_michael</option>
+            <option value="bf_emma">bf_emma</option>
+            <option value="bf_isabella">bf_isabella</option>
+            <option value="bm_george">bm_george</option>
+          </optgroup>
+          <optgroup label="Voxtral">
+            <option value="casual_female">casual_female</option>
+            <option value="casual_male">casual_male</option>
+            <option value="neutral_female">neutral_female</option>
+            <option value="neutral_male">neutral_male</option>
+          </optgroup>
+        </select>
+      </div>
+      <div class="form-group">
+        <label>Output Device</label>
+        <select id="audio-output">
+          <option value="">Default</option>
+        </select>
+      </div>
+      <div class="form-group">
+        <label>Input Device</label>
+        <select id="audio-input">
+          <option value="">Default</option>
+        </select>
+      </div>
+      <div class="form-group">
+        <label>VAD Sensitivity</label>
+        <select id="vad-mode">
+          <option value="0.3">Very Sensitive</option>
+          <option value="0.5" selected>Normal</option>
+          <option value="0.7">Low (noisy room)</option>
+          <option value="0.85">Very Low</option>
+        </select>
+      </div>
+      <div class="form-group">
+        <label>Speed</label>
+        <select id="tts-speed">
+          <option value="1.0">1.0x</option>
+          <option value="1.25" selected>1.25x</option>
+          <option value="1.5">1.5x</option>
+          <option value="2.0">2.0x</option>
+        </select>
+      </div>
+    </div>
+  </div>
+
+  <!-- Unified chat / transcript area -->
+  <div class="chat-area" id="chat-area"></div>
+
+  <!-- Metrics -->
+  <div class="metrics-bar" id="voice-metrics"></div>
+
+  <!-- Text input -->
+  <div class="chat-input-area">
+    <textarea class="chat-input" id="chat-input" placeholder="Type a message..." rows="1"></textarea>
+    <button class="send-btn" id="send-btn">Send</button>
+  </div>
+
+  <div class="hint">For voice, use headphones. Speak naturally — the system detects when you start and stop. Speak during playback to interrupt.</div>
+</div>
+
+<script>
+// --- Settings panel toggle ---
+document.getElementById('settings-toggle').addEventListener('click', () => {
+  const panel = document.getElementById('settings-panel');
+  panel.style.display = panel.style.display === 'none' ? 'block' : 'none';
+});
+
+// --- Connection status indicators ---
+const statusDots = {
+  server: document.getElementById('status-server'),
+  mic: document.getElementById('status-mic'),
+};
+
+function setStatusDot(name, state) {
+  const dot = statusDots[name];
+  if (!dot) return;
+  dot.className = 'status-dot';
+  if (state === 'ok') dot.classList.add('ok');
+  else if (state === 'warn') dot.classList.add('warn');
+  else if (state === 'err') dot.classList.add('err');
+}
+
+// Check backend connectivity
+async function checkServices() {
+  try {
+    const r = await fetch('/health');
+    const data = await r.json();
+    setStatusDot('server', data.status === 'ok' ? 'ok' : 'err');
+  } catch {
+    setStatusDot('server', 'err');
+  }
+}
+checkServices();
+setInterval(checkServices, 30000);
+
+// --- Audio output device selector ---
+const audioOutputSelect = document.getElementById('audio-output');
+// _playback declared below with _ws and _transport
+
+async function populateOutputDevices() {
+  try {
+    const devices = await navigator.mediaDevices.enumerateDevices();
+    const outputs = devices.filter(d => d.kind === 'audiooutput');
+    if (outputs.length === 0) return;
+    audioOutputSelect.innerHTML = '';
+    let selectedIdx = 0;
+    outputs.forEach((d, i) => {
+      const opt = document.createElement('option');
+      opt.value = d.deviceId;
+      opt.textContent = d.label || `Speaker ${i + 1}`;
+      audioOutputSelect.appendChild(opt);
+      const label = (d.label || '').toLowerCase();
+      // Prefer device starting with "Default -"
+      if (label.startsWith('default -') || label.startsWith('default —')) {
+        selectedIdx = i;
+      }
+    });
+    audioOutputSelect.selectedIndex = selectedIdx;
+  } catch (err) {
+    console.warn('Could not enumerate output devices:', err);
+  }
+}
+populateOutputDevices();
+
+const audioInputSelect = document.getElementById('audio-input');
+async function populateInputDevices() {
+  try {
+    const devices = await navigator.mediaDevices.enumerateDevices();
+    const inputs = devices.filter(d => d.kind === 'audioinput');
+    if (inputs.length === 0) return;
+    audioInputSelect.innerHTML = '';
+    let selectedIdx = 0;
+    inputs.forEach((d, i) => {
+      const opt = document.createElement('option');
+      opt.value = d.deviceId;
+      opt.textContent = d.label || `Mic ${i + 1}`;
+      audioInputSelect.appendChild(opt);
+      const label = (d.label || '').toLowerCase();
+      // Prefer device starting with "Default -"
+      if (label.startsWith('default -') || label.startsWith('default —')) {
+        selectedIdx = i;
+      }
+    });
+    audioInputSelect.selectedIndex = selectedIdx;
+  } catch (err) {
+    console.warn('Could not enumerate input devices:', err);
+  }
+}
+populateInputDevices();
+
+// Request mic permission early to get labeled device names
+// This fires once on page load — if already granted, it's instant
+(async () => {
+  try {
+    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    stream.getTracks().forEach(t => t.stop()); // release immediately
+    await populateOutputDevices();
+    await populateInputDevices();
+    setStatusDot('mic', 'warn'); // mic available but not active
+  } catch {
+    // Permission denied or no mic — lists stay unlabeled
+  }
+})();
+
+// Re-populate on device changes
+navigator.mediaDevices.addEventListener('devicechange', () => {
+  populateOutputDevices();
+  populateInputDevices();
+});
+
+audioOutputSelect.addEventListener('change', async () => {
+  if (_playback) {
+    await _playback.setOutputDevice(audioOutputSelect.value);
+    console.log('[Output] Switched to:', audioOutputSelect.options[audioOutputSelect.selectedIndex].text);
+  }
+});
+
+// --- WebSocket transport reference (shared between voice and text) ---
+var _ws = null;
+var _transport = null;
+var _playback = null;
+
+function getWs() { return _ws; }
+
+// Auto-connect WebSocket — called after all scripts load (see bottom of page)
+
+// --- Text chat ---
+const chatArea = document.getElementById('chat-area');
+const chatInput = document.getElementById('chat-input');
+const sendBtn = document.getElementById('send-btn');
+
+function escapeHtml(text) {
+  const d = document.createElement('div');
+  d.textContent = text;
+  return d.innerHTML;
+}
+
+function appendMessage(role, text, source) {
+  const div = document.createElement('div');
+  div.className = `msg msg-${role}`;
+  let html = `<div class="msg-role">${role === 'user' ? 'You' : 'AI'}</div>`;
+  html += `<div class="msg-text">${escapeHtml(text)}</div>`;
+  if (source) html += `<div class="msg-source">${escapeHtml(source)}</div>`;
+  div.innerHTML = html;
+  chatArea.appendChild(div);
+  chatArea.scrollTop = chatArea.scrollHeight;
+  return div;
+}
+
+let _currentAssistantMsg = null;
+
+function appendAssistantStart(source) {
+  const div = document.createElement('div');
+  div.className = 'msg msg-assistant';
+  div.innerHTML = `<div class="msg-role">AI</div><div class="msg-text"></div>`;
+  if (source) {
+    const srcEl = document.createElement('div');
+    srcEl.className = 'msg-source';
+    srcEl.textContent = source;
+    div.appendChild(srcEl);
+  }
+  chatArea.appendChild(div);
+  chatArea.scrollTop = chatArea.scrollHeight;
+  _currentAssistantMsg = div.querySelector('.msg-text');
+  return div;
+}
+
+function appendAssistantChunk(text) {
+  // Lazy-create the assistant bubble on first chunk
+  if (!_currentAssistantMsg) {
+    appendAssistantStart();
+  }
+  _currentAssistantMsg.innerHTML += escapeHtml(text);
+  chatArea.scrollTop = chatArea.scrollHeight;
+}
+
+function finalizeAssistant(suffix) {
+  if (_currentAssistantMsg && suffix) {
+    _currentAssistantMsg.innerHTML += ` <em style="color:var(--muted)">${escapeHtml(suffix)}</em>`;
+  }
+  _currentAssistantMsg = null;
+}
+
+async function sendTextMessage(text) {
+  if (_transport && _transport.connected) {
+    _transport.sendControl({ type: "text_message", text: text });
+    appendMessage('user', text, 'text');
+  } else {
+    appendMessage('user', text, 'text');
+    appendMessage('assistant', 'Not connected to server. Reconnecting...', 'system');
+  }
+}
+
+sendBtn.addEventListener('click', () => {
+  const text = chatInput.value.trim();
+  if (!text) return;
+  sendTextMessage(text);
+  chatInput.value = '';
+  chatInput.style.height = 'auto';
+});
+
+chatInput.addEventListener('keydown', (e) => {
+  if (e.key === 'Enter' && !e.shiftKey) {
+    e.preventDefault();
+    sendBtn.click();
+  }
+});
+
+// Auto-resize textarea
+chatInput.addEventListener('input', () => {
+  chatInput.style.height = 'auto';
+  chatInput.style.height = Math.min(chatInput.scrollHeight, 120) + 'px';
+});
+</script>
+
+<!-- Voice chat dependencies (self-hosted) -->
+<script src="/dashboard/vad/ort.js"></script>
+<script>
+  ort.env.wasm.wasmPaths = "/dashboard/vad/";
+  ort.env.wasm.numThreads = 1;
+</script>
+<script src="/dashboard/vad/bundle.min.js"></script>
+<script src="/dashboard/playback.js?v=2"></script>
+<script src="/dashboard/transport.js?v=2"></script>
+
+<script>
+// Auto-connect WebSocket on page load (text chat works without mic)
+(function autoConnectWs() {
+  const wsUrl = `ws://${window.location.host}/ws/chat`;
+  const pb = new AudioPlayback(24000);
+  _playback = pb;
+
+  const outSel = document.getElementById('audio-output');
+  if (outSel && outSel.value) pb.setOutputDevice(outSel.value);
+  pb.onPlaybackEnd = () => {
+    const el = document.getElementById('voice-status');
+    if (el && el.textContent.includes('Speaking')) {
+      el.textContent = 'Ready';
+      el.style.color = 'var(--green)';
+    }
+  };
+
+  const t = new VoiceTransport(wsUrl, {
+    onAudio: (data) => {
+      pb.enqueueWav(data);
+      const el = document.getElementById('voice-status');
+      if (el) { el.textContent = 'Speaking...'; el.style.color = 'var(--accent)'; }
+    },
+    onTranscript: (msg) => {
+      if (msg.source !== 'text') appendMessage('user', msg.text, 'voice');
+      // Don't create assistant bubble here — appendAssistantChunk lazy-creates it
+      const el = document.getElementById('voice-status');
+      if (el) { el.textContent = 'Thinking...'; el.style.color = 'var(--orange)'; }
+    },
+    onResponseText: (msg) => appendAssistantChunk(msg.text),
+    onResponseComplete: (msg) => {
+      finalizeAssistant();
+      if (msg.metrics) {
+        const mel = document.getElementById('voice-metrics');
+        if (mel) {
+          const parts = [];
+          if (msg.metrics.llm_ms != null) parts.push(`LLM: ${Math.round(msg.metrics.llm_ms)}ms`);
+          if (msg.metrics.provider) parts.push(msg.metrics.provider);
+          mel.textContent = parts.join(' \u00b7 ');
+        }
+      }
+    },
+    onInterrupted: () => {
+      pb.flush();
+      finalizeAssistant('[interrupted]');
+    },
+    onMetrics: (msg) => {
+      if (msg.sample_rate && msg.sample_rate !== pb.sampleRate) pb.setSampleRate(msg.sample_rate);
+    },
+    onError: (msg) => console.error('[WS] Error:', msg),
+    onOpen: () => {
+      _ws = t.ws;
+      _transport = t;
+      setStatusDot('server', 'ok');
+      const sEl = document.getElementById('voice-status');
+      if (sEl) { sEl.textContent = 'Ready'; sEl.style.color = 'var(--green)'; }
+      console.log('[WS] Auto-connected');
+      const ms = document.getElementById('voice-model');
+      const vs = document.getElementById('voice-voice');
+      t.setConfig({ model: ms ? ms.value : 'kokoro', voice: vs ? vs.value : 'bm_lewis' });
+    },
+    onClose: () => {
+      _ws = null;
+      _transport = null;
+      setStatusDot('server', 'warn');
+      const sEl = document.getElementById('voice-status');
+      if (sEl) { sEl.textContent = 'Reconnecting...'; sEl.style.color = 'var(--orange)'; }
+      console.log('[WS] Disconnected, reconnecting in 3s...');
+      setTimeout(autoConnectWs, 3000);
+    },
+  });
+  t.connect();
+})();
+</script>
+
+<script>
+/**
+ * Voice chat integration — uses the global chat area for messages,
+ * keeps AudioCapture + VoiceTransport + AudioPlayback modules intact.
+ */
+(function() {
+  let capture = null;
+  let micActive = false;
+
+  const connectBtn = document.getElementById('voice-connect-btn');
+  const statusEl = document.getElementById('voice-status');
+  const levelBar = document.getElementById('voice-level');
+  const metricsEl = document.getElementById('voice-metrics');
+  const modelSelect = document.getElementById('voice-model');
+  const voiceSelect = document.getElementById('voice-voice');
+
+  connectBtn.addEventListener('click', toggleMic);
+
+  if (modelSelect) {
+    modelSelect.addEventListener('change', () => {
+      if (_transport && _transport.connected) _transport.setConfig({ model: modelSelect.value });
+    });
+  }
+  if (voiceSelect) {
+    voiceSelect.addEventListener('change', () => {
+      if (_transport && _transport.connected) _transport.setConfig({ voice: voiceSelect.value });
+    });
+  }
+
+  function toggleMic() {
+    if (micActive) stopMic();
+    else startMic();
+  }
+
+  const statusColors = {
+    disconnected: 'var(--muted)', connecting: 'var(--orange)',
+    listening: 'var(--green)', recording: 'var(--red)',
+    processing: 'var(--orange)', speaking: 'var(--accent)', error: 'var(--red)',
+  };
+
+  function setStatus(state, text) {
+    if (statusEl) {
+      statusEl.textContent = text;
+      statusEl.style.color = statusColors[state] || 'var(--muted)';
+    }
+    // Update mic status dot
+    if (state === 'listening' || state === 'recording' || state === 'speaking') {
+      setStatusDot('mic', 'ok');
+    } else if (state === 'error') {
+      setStatusDot('mic', 'err');
+    } else if (state === 'disconnected') {
+      setStatusDot('mic', '');
+    } else {
+      setStatusDot('mic', 'warn');
+    }
+  }
+
+  async function startMic() {
+    if (!_transport || !_transport.connected) {
+      setStatus('error', 'Server not connected yet...');
+      return;
+    }
+
+    setStatus('connecting', 'Starting mic...');
+
+    // Re-populate device lists (needs mic permission)
+    populateOutputDevices();
+    populateInputDevices();
+
+    const micDebug = document.getElementById('mic-debug');
+    const vadSelect = document.getElementById('vad-mode');
+
+    try {
+      const vadThreshold = parseFloat(vadSelect ? vadSelect.value : '0.5');
+      const inputSelect = document.getElementById('audio-input');
+      const selectedDeviceId = inputSelect ? inputSelect.value : '';
+
+      capture = await vad.MicVAD.new({
+        baseAssetPath: "/dashboard/vad/",
+        onnxWASMBasePath: "/dashboard/vad/",
+        model: "v5",
+        getStream: async () => {
+          const constraints = {
+            audio: {
+              channelCount: 1,
+              echoCancellation: true,
+              noiseSuppression: true,
+              autoGainControl: true,
+            }
+          };
+          if (selectedDeviceId) {
+            constraints.audio.deviceId = { exact: selectedDeviceId };
+          }
+          console.log('[MicVAD] Requesting mic:', selectedDeviceId || 'default');
+          return await navigator.mediaDevices.getUserMedia(constraints);
+        },
+        positiveSpeechThreshold: vadThreshold,
+        negativeSpeechThreshold: Math.max(0.1, vadThreshold - 0.2),
+        redemptionFrames: 20,
+        minSpeechFrames: 4,
+        preSpeechPadFrames: 8,
+        onSpeechStart: () => {
+          console.log('[Silero VAD] Speech START');
+          if (_playback && _playback.isPlaying) {
+            _playback.flush();
+            if (_transport) _transport.interrupt();
+          }
+          setStatus('recording', 'Recording...');
+        },
+        onSpeechEnd: (audio) => {
+          console.log(`[Silero VAD] Speech END — ${audio.length} samples`);
+          setStatus('processing', 'Processing...');
+          const int16 = new Int16Array(audio.length);
+          for (let i = 0; i < audio.length; i++) {
+            int16[i] = Math.max(-32768, Math.min(32767, audio[i] * 32768));
+          }
+          if (_transport) {
+            _transport.sendAudio(int16.buffer);
+            _transport.endSpeech();
+          }
+        },
+        onVADMisfire: () => {
+          if (micDebug) micDebug.textContent = 'VAD: misfire (too short)';
+        },
+        onFrameProcessed: (probs) => {
+          if (levelBar) levelBar.style.width = Math.min(100, probs.isSpeech * 100) + '%';
+          if (micDebug) micDebug.textContent = `silero: ${probs.isSpeech.toFixed(3)} thr=${vadThreshold}`;
+        },
+      });
+      capture.start();
+      micActive = true;
+      connectBtn.textContent = '\u{1F534} Mic On';
+      connectBtn.classList.add('active');
+      const lbw = document.getElementById('level-bar-wrap');
+      if (lbw) lbw.style.display = '';
+      setStatus('listening', 'Listening...');
+    } catch (err) {
+      console.error('[Silero VAD] Failed:', err);
+      setStatus('error', `VAD error: ${err.message}`);
+    }
+  }
+
+  function stopMic() {
+    if (capture) { try { capture.destroy ? capture.destroy() : capture.stop(); } catch {} capture = null; }
+    micActive = false;
+    connectBtn.textContent = '\u{1F3A4} Mic';
+    connectBtn.classList.remove('active');
+    const lbw = document.getElementById('level-bar-wrap');
+    if (lbw) lbw.style.display = 'none';
+    setStatus('listening', 'Ready');
+    if (levelBar) levelBar.style.width = '0%';
+  }
+
+  function showMetrics(data) {
+    if (!metricsEl) return;
+    const parts = [];
+    if (data.stt_ms != null) parts.push(`STT: ${data.stt_ms}ms`);
+    if (data.llm_ttft_ms != null) parts.push(`LLM TTFT: ${data.llm_ttft_ms}ms`);
+    if (data.tts_ttfa_ms != null) parts.push(`TTS TTFA: ${data.tts_ttfa_ms}ms`);
+    metricsEl.textContent = parts.join(' \u00b7 ');
+  }
+})();
+</script>
+</body>
+</html>
diff --git a/dashboard/playback.js b/dashboard/playback.js
new file mode 100644
index 0000000..82be279
--- /dev/null
+++ b/dashboard/playback.js
@@ -0,0 +1,112 @@
+/**
+ * Streaming audio playback engine.
+ * Receives Int16 PCM chunks and plays them seamlessly via Web Audio API.
+ */
+class AudioPlayback {
+  constructor(sampleRate = 24000) {
+    this.sampleRate = sampleRate;
+    this.queue = [];
+    this.isPlaying = false;
+    this.audioContext = null;
+    this.currentSource = null;
+    this.nextStartTime = 0;
+    this.onPlaybackStart = null;
+    this.onPlaybackEnd = null;
+    this.sinkId = undefined; // output device ID
+  }
+
+  _ensureContext() {
+    if (!this.audioContext || this.audioContext.state === "closed") {
+      this.audioContext = new AudioContext({ sampleRate: this.sampleRate });
+      // Apply selected output device if set
+      if (this.sinkId !== undefined && this.audioContext.setSinkId) {
+        this.audioContext.setSinkId(this.sinkId).catch(() => {});
+      }
+    }
+    if (this.audioContext.state === "suspended") {
+      this.audioContext.resume();
+    }
+  }
+
+  async setOutputDevice(deviceId) {
+    this.sinkId = deviceId;
+    if (this.audioContext && this.audioContext.setSinkId) {
+      await this.audioContext.setSinkId(deviceId);
+    }
+  }
+
+  /** Enqueue raw Int16 PCM for playback. */
+  enqueue(pcmArrayBuffer) {
+    this._ensureContext();
+
+    const int16 = new Int16Array(pcmArrayBuffer);
+    const float32 = new Float32Array(int16.length);
+    for (let i = 0; i < int16.length; i++) {
+      float32[i] = int16[i] / 32768;
+    }
+
+    const buffer = this.audioContext.createBuffer(1, float32.length, this.sampleRate);
+    buffer.getChannelData(0).set(float32);
+    this.queue.push(buffer);
+
+    if (!this.isPlaying) this._playNext();
+  }
+
+  /** Enqueue a full WAV file (with header) for playback. */
+  async enqueueWav(wavArrayBuffer) {
+    this._ensureContext();
+    try {
+      const audioBuffer = await this.audioContext.decodeAudioData(wavArrayBuffer.slice(0));
+      this.queue.push(audioBuffer);
+      if (!this.isPlaying) this._playNext();
+    } catch (err) {
+      console.error("[AudioPlayback] Failed to decode WAV:", err);
+    }
+  }
+
+  _playNext() {
+    if (this.queue.length === 0) {
+      this.isPlaying = false;
+      if (this.onPlaybackEnd) this.onPlaybackEnd();
+      return;
+    }
+
+    if (!this.isPlaying) {
+      this.isPlaying = true;
+      this.nextStartTime = this.audioContext.currentTime;
+      if (this.onPlaybackStart) this.onPlaybackStart();
+    }
+
+    const buffer = this.queue.shift();
+    const source = this.audioContext.createBufferSource();
+    source.buffer = buffer;
+    source.connect(this.audioContext.destination);
+    source.onended = () => this._playNext();
+
+    // Schedule this chunk right after the previous one for gapless playback
+    const startTime = Math.max(this.nextStartTime, this.audioContext.currentTime);
+    source.start(startTime);
+    this.nextStartTime = startTime + buffer.duration;
+    this.currentSource = source;
+  }
+
+  flush() {
+    this.queue = [];
+    if (this.currentSource) {
+      try { this.currentSource.stop(); } catch {}
+    }
+    this.isPlaying = false;
+    this.nextStartTime = 0;
+  }
+
+  setSampleRate(rate) {
+    if (rate !== this.sampleRate) {
+      this.sampleRate = rate;
+      // Close old context so next enqueue creates one with the right rate
+      if (this.audioContext) {
+        this.audioContext.close();
+        this.audioContext = null;
+      }
+    }
+  }
+}
diff --git a/dashboard/transport.js b/dashboard/transport.js
new file mode 100644
index 0000000..d0d02ad
--- /dev/null
+++ b/dashboard/transport.js
@@ -0,0 +1,112 @@
+/**
+ * WebSocket transport for voice chat.
+ * Binary frames = audio, text frames = JSON control messages.
+ */
+class VoiceTransport {
+  constructor(url, handlers) {
+    this.url = url;
+    this.handlers = handlers; // { onAudio, onTranscript, onResponseText, onResponseComplete, onInterrupted, onMetrics, onError, onOpen, onClose }
+    this.ws = null;
+    this.reconnectAttempts = 0;
+    this.maxReconnects = 3;
+  }
+
+  connect() {
+    this.ws = new WebSocket(this.url);
+    this.ws.binaryType = "arraybuffer";
+
+    this.ws.onopen = () => {
+      this.reconnectAttempts = 0;
+      if (this.handlers.onOpen) this.handlers.onOpen();
+    };
+
+    this.ws.onmessage = (event) => {
+      if (event.data instanceof ArrayBuffer) {
+        console.log(`[WS] Binary frame: ${event.data.byteLength} bytes`);
+        if (this.handlers.onAudio) this.handlers.onAudio(event.data);
+      } else if (event.data instanceof Blob) {
+        console.log(`[WS] Blob frame: ${event.data.size} bytes (converting)`);
+        event.data.arrayBuffer().then(buf => {
+          if (this.handlers.onAudio) this.handlers.onAudio(buf);
+        });
+      } else {
+        try {
+          const msg = JSON.parse(event.data);
+          console.log(`[WS] JSON: ${msg.type}`, msg.type === 'response_text' ? msg.text?.substring(0, 80) : '');
+          this._dispatch(msg);
+        } catch (e) {
+          console.error("Failed to parse WS message:", e, "data:", typeof event.data, event.data?.substring?.(0, 100));
+        }
+      }
+    };
+
+    this.ws.onclose = (event) => {
+      if (this.handlers.onClose) this.handlers.onClose(event);
+    };
+
+    this.ws.onerror = (error) => {
+      console.error("WebSocket error:", error);
+      if (this.handlers.onError) this.handlers.onError(error);
+    };
+  }
+
+  _dispatch(msg) {
+    // Handle base64 audio message — decode and route to onAudio
+    if (msg.type === "audio" && msg.data) {
+      const binary = atob(msg.data);
+      const bytes = new Uint8Array(binary.length);
+      for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
+      if (this.handlers.onAudio) this.handlers.onAudio(bytes.buffer);
+      if (this.handlers.onMetrics) this.handlers.onMetrics(msg);
+      return;
+    }
+
+    const handlerMap = {
+      transcript: "onTranscript",
+      response_text: "onResponseText",
+      response_complete: "onResponseComplete",
+      interrupted: "onInterrupted",
+      metrics: "onMetrics",
+      error: "onError",
+    };
+    const handler = handlerMap[msg.type];
+    if (handler && this.handlers[handler]) {
+      this.handlers[handler](msg);
+    }
+  }
+
+  sendAudio(pcmBuffer) {
+    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+      this.ws.send(pcmBuffer);
+    }
+  }
+
+  sendControl(msg) {
+    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+      this.ws.send(JSON.stringify(msg));
+    }
+  }
+
+  endSpeech() {
+    this.sendControl({ type: "end_of_speech" });
+  }
+
+  interrupt() {
+    this.sendControl({ type: "interrupt" });
+  }
+
+  setConfig(config) {
+    this.sendControl({ type: "config", ...config });
+  }
+
+  disconnect() {
+    if (this.ws) {
+      this.ws.close();
+      this.ws = null;
+    }
+  }
+
+  get connected() {
+    return this.ws && this.ws.readyState === WebSocket.OPEN;
+  }
+}
diff --git a/dashboard/vad/bundle.min.js b/dashboard/vad/bundle.min.js
new file mode 100644
index 0000000..77366c0
--- /dev/null
+++ b/dashboard/vad/bundle.min.js
@@ -0,0 +1,2 @@
+/*! For license information please see bundle.min.js.LICENSE.txt */
+!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t(require("onnxruntime-web")):"function"==typeof define&&define.amd?define(["onnxruntime-web"],t):"object"==typeof exports?exports.vad=t(require("onnxruntime-web")):e.vad=t(e.ort)}(self,(e=>(()=>{var t={647:(e,t,r)=>{"use strict";var o=(()=>{var e,t,o,n,s,a,i,l,u,d,c,p,f,h,m,w,g,y,b,v,S,A,E,T,_,O,P,M,C,R,F,x=Object.defineProperty,D=Object.getOwnPropertyDescriptor,U=Object.getOwnPropertyNames,L=Object.prototype.hasOwnProperty,I=r(936),B=(e,t)=>()=>(e&&(t=e(e=0)),t),N=(e,t)=>{for(var r in t)x(e,r,{get:t[r],enumerable:!0})},k=e=>((e,t,r,o)=>{if(t&&"object"==typeof t||"function"==typeof t)for(let r of U(t))!L.call(e,r)&&undefined!==r&&x(e,r,{get:()=>t[r],enumerable:!(o=D(t,r))||o.enumerable});return e})(x({},"__esModule",{value:!0}),e),$=B((()=>{e=new Map,t=[],o=(r,o,n)=>{if(!o||"function"!=typeof o.init||"function"!=typeof o.createInferenceSessionHandler)throw new TypeError("not a valid backend");{let s=e.get(r);if(void 0===s)e.set(r,{backend:o,priority:n});else{if(s.priority>n)return;if(s.priority===n&&s.backend!==o)throw new Error(`cannot register backend "${r}" using priority ${n}`)}if(n>=0){let o=t.indexOf(r);-1!==o&&t.splice(o,1);for(let o=0;o<t.length;o++)if(e.get(t[o]).priority<=n)return void t.splice(o,0,r);t.push(r)}}},n=async t=>{let r=e.get(t);if(!r)return"backend not found.";if(r.initialized)return r.backend;if(r.aborted)return r.error;{let e=!!r.initPromise;try{return e||(r.initPromise=r.backend.init(t)),await r.initPromise,r.initialized=!0,r.backend}catch(t){return e||(r.error=`${t}`,r.aborted=!0),r.error}finally{delete r.initPromise}}},s=async e=>{let r,o=e.executionProviders||[],s=o.map((e=>"string"==typeof e?e:e.name)),a=0===s.length?t:s,i=[],l=new Set;for(let e of a){let t=await n(e);"string"==typeof t?i.push({name:e,err:t}):(r||(r=t),r===t&&l.add(e))}if(!r)throw new Error(`no available backend found. ERR: ${i.map((e=>`[${e.name}] ${e.err}`)).join(", ")}`);for(let{name:e,err:t}of i)s.includes(e)&&console.warn(`removing requested execution provider "${e}" from session options because it is not available: ${t}`);let u=o.filter((e=>l.has("string"==typeof e?e:e.name)));return[r,new Proxy(e,{get:(e,t)=>"executionProviders"===t?u:Reflect.get(e,t)})]}})),j=B((()=>{$()})),V=B((()=>{a="1.22.0"})),G=B((()=>{V(),i="warning",l={wasm:{},webgl:{},webgpu:{},versions:{common:a},set logLevel(e){if(void 0!==e){if("string"!=typeof e||-1===["verbose","info","warning","error","fatal"].indexOf(e))throw new Error(`Unsupported logging level: ${e}`);i=e}},get logLevel(){return i}},Object.defineProperty(l,"logLevel",{enumerable:!0})})),W=B((()=>{G(),u=l})),z=B((()=>{d=(e,t)=>{let r=typeof document<"u"?document.createElement("canvas"):new OffscreenCanvas(1,1);r.width=e.dims[3],r.height=e.dims[2];let o=r.getContext("2d");if(null!=o){let n,s;void 0!==t?.tensorLayout&&"NHWC"===t.tensorLayout?(n=e.dims[2],s=e.dims[3]):(n=e.dims[3],s=e.dims[2]);let a,i,l=void 0!==t?.format?t.format:"RGB",u=t?.norm;void 0===u||void 0===u.mean?a=[255,255,255,255]:"number"==typeof u.mean?a=[u.mean,u.mean,u.mean,u.mean]:(a=[u.mean[0],u.mean[1],u.mean[2],0],void 0!==u.mean[3]&&(a[3]=u.mean[3])),void 0===u||void 0===u.bias?i=[0,0,0,0]:"number"==typeof u.bias?i=[u.bias,u.bias,u.bias,u.bias]:(i=[u.bias[0],u.bias[1],u.bias[2],0],void 0!==u.bias[3]&&(i[3]=u.bias[3]));let d=s*n,c=0,p=d,f=2*d,h=-1;"RGBA"===l?(c=0,p=d,f=2*d,h=3*d):"RGB"===l?(c=0,p=d,f=2*d):"RBG"===l&&(c=0,f=d,p=2*d);for(let t=0;t<s;t++)for(let r=0;r<n;r++){let n=(e.data[c++]-i[0])*a[0],s=(e.data[p++]-i[1])*a[1],l=(e.data[f++]-i[2])*a[2],u=-1===h?255:(e.data[h++]-i[3])*a[3];o.fillStyle="rgba("+n+","+s+","+l+","+u+")",o.fillRect(r,t,1,1)}if("toDataURL"in r)return r.toDataURL();throw new Error("toDataURL is not supported")}throw new Error("Can not access image data")},c=(e,t)=>{let r,o=typeof document<"u"?document.createElement("canvas").getContext("2d"):new OffscreenCanvas(1,1).getContext("2d");if(null==o)throw new Error("Can not access image data");{let n,s,a;void 0!==t?.tensorLayout&&"NHWC"===t.tensorLayout?(n=e.dims[2],s=e.dims[1],a=e.dims[3]):(n=e.dims[3],s=e.dims[2],a=e.dims[1]);let i,l,u=void 0!==t&&void 0!==t.format?t.format:"RGB",d=t?.norm;void 0===d||void 0===d.mean?i=[255,255,255,255]:"number"==typeof d.mean?i=[d.mean,d.mean,d.mean,d.mean]:(i=[d.mean[0],d.mean[1],d.mean[2],255],void 0!==d.mean[3]&&(i[3]=d.mean[3])),void 0===d||void 0===d.bias?l=[0,0,0,0]:"number"==typeof d.bias?l=[d.bias,d.bias,d.bias,d.bias]:(l=[d.bias[0],d.bias[1],d.bias[2],0],void 0!==d.bias[3]&&(l[3]=d.bias[3]));let c=s*n;if(void 0!==t&&(void 0!==t.format&&4===a&&"RGBA"!==t.format||3===a&&"RGB"!==t.format&&"BGR"!==t.format))throw new Error("Tensor format doesn't match input tensor dims");let p=4,f=0,h=1,m=2,w=3,g=0,y=c,b=2*c,v=-1;"RGBA"===u?(g=0,y=c,b=2*c,v=3*c):"RGB"===u?(g=0,y=c,b=2*c):"RBG"===u&&(g=0,b=c,y=2*c),r=o.createImageData(n,s);for(let t=0;t<s*n;f+=p,h+=p,m+=p,w+=p,t++)r.data[f]=(e.data[g++]-l[0])*i[0],r.data[h]=(e.data[y++]-l[1])*i[1],r.data[m]=(e.data[b++]-l[2])*i[2],r.data[w]=-1===v?255:(e.data[v++]-l[3])*i[3]}return r}})),H=B((()=>{J(),p=(e,t)=>{if(void 0===e)throw new Error("Image buffer must be defined");if(void 0===t.height||void 0===t.width)throw new Error("Image height and width must be defined");if("NHWC"===t.tensorLayout)throw new Error("NHWC Tensor layout is not supported yet");let r,o,{height:n,width:s}=t,a=t.norm??{mean:255,bias:0};r="number"==typeof a.mean?[a.mean,a.mean,a.mean,a.mean]:[a.mean[0],a.mean[1],a.mean[2],a.mean[3]??255],o="number"==typeof a.bias?[a.bias,a.bias,a.bias,a.bias]:[a.bias[0],a.bias[1],a.bias[2],a.bias[3]??0];let i=void 0!==t.format?t.format:"RGBA",l=void 0!==t.tensorFormat&&void 0!==t.tensorFormat?t.tensorFormat:"RGB",u=n*s,d="RGBA"===l?new Float32Array(4*u):new Float32Array(3*u),c=4,p=0,f=1,h=2,m=3,w=0,g=u,y=2*u,b=-1;"RGB"===i&&(c=3,p=0,f=1,h=2,m=-1),"RGBA"===l?b=3*u:"RBG"===l?(w=0,y=u,g=2*u):"BGR"===l&&(y=0,g=u,w=2*u);for(let t=0;t<u;t++,p+=c,h+=c,f+=c,m+=c)d[w++]=(e[p]+o[0])/r[0],d[g++]=(e[f]+o[1])/r[1],d[y++]=(e[h]+o[2])/r[2],-1!==b&&-1!==m&&(d[b++]=(e[m]+o[3])/r[3]);return new T("float32",d,"RGBA"===l?[1,4,n,s]:[1,3,n,s])},f=async(e,t)=>{let r,o=typeof HTMLImageElement<"u"&&e instanceof HTMLImageElement,n=typeof ImageData<"u"&&e instanceof ImageData,s=typeof ImageBitmap<"u"&&e instanceof ImageBitmap,a="string"==typeof e,i=t??{},l=()=>{if(typeof document<"u")return document.createElement("canvas");if(typeof OffscreenCanvas<"u")return new OffscreenCanvas(1,1);throw new Error("Canvas is not supported")},u=e=>typeof HTMLCanvasElement<"u"&&e instanceof HTMLCanvasElement||e instanceof OffscreenCanvas?e.getContext("2d"):null;if(o){let o=l();o.width=e.width,o.height=e.height;let n=u(o);if(null==n)throw new Error("Can not access image data");{let o=e.height,s=e.width;if(void 0!==t&&void 0!==t.resizedHeight&&void 0!==t.resizedWidth&&(o=t.resizedHeight,s=t.resizedWidth),void 0!==t){if(i=t,void 0!==t.tensorFormat)throw new Error("Image input config format must be RGBA for HTMLImageElement");i.tensorFormat="RGBA",i.height=o,i.width=s}else i.tensorFormat="RGBA",i.height=o,i.width=s;n.drawImage(e,0,0),r=n.getImageData(0,0,s,o).data}}else{if(!n){if(s){if(void 0===t)throw new Error("Please provide image config with format for Imagebitmap");let o=l();o.width=e.width,o.height=e.height;let n=u(o);if(null!=n){let t=e.height,o=e.width;return n.drawImage(e,0,0,o,t),r=n.getImageData(0,0,o,t).data,i.height=t,i.width=o,p(r,i)}throw new Error("Can not access image data")}if(a)return new Promise(((t,r)=>{let o=l(),n=u(o);if(!e||!n)return r();let s=new Image;s.crossOrigin="Anonymous",s.src=e,s.onload=()=>{o.width=s.width,o.height=s.height,n.drawImage(s,0,0,o.width,o.height);let e=n.getImageData(0,0,o.width,o.height);i.height=o.height,i.width=o.width,t(p(e.data,i))}}));throw new Error("Input data provided is not supported - aborted tensor creation")}{let o,n;if(void 0!==t&&void 0!==t.resizedWidth&&void 0!==t.resizedHeight?(o=t.resizedHeight,n=t.resizedWidth):(o=e.height,n=e.width),void 0!==t&&(i=t),i.format="RGBA",i.height=o,i.width=n,void 0!==t){let t=l();t.width=n,t.height=o;let s=u(t);if(null==s)throw new Error("Can not access image data");s.putImageData(e,0,0),r=s.getImageData(0,0,n,o).data}else r=e.data}}if(void 0!==r)return p(r,i);throw new Error("Input data provided is not supported - aborted tensor creation")},h=(e,t)=>{let{width:r,height:o,download:n,dispose:s}=t;return new T({location:"texture",type:"float32",texture:e,dims:[1,o,r,4],download:n,dispose:s})},m=(e,t)=>{let{dataType:r,dims:o,download:n,dispose:s}=t;return new T({location:"gpu-buffer",type:r??"float32",gpuBuffer:e,dims:o,download:n,dispose:s})},w=(e,t)=>{let{dataType:r,dims:o,download:n,dispose:s}=t;return new T({location:"ml-tensor",type:r??"float32",mlTensor:e,dims:o,download:n,dispose:s})},g=(e,t,r)=>new T({location:"cpu-pinned",type:e,data:t,dims:r??[t.length]})})),Z=B((()=>{y=new Map([["float32",Float32Array],["uint8",Uint8Array],["int8",Int8Array],["uint16",Uint16Array],["int16",Int16Array],["int32",Int32Array],["bool",Uint8Array],["float64",Float64Array],["uint32",Uint32Array],["int4",Uint8Array],["uint4",Uint8Array]]),b=new Map([[Float32Array,"float32"],[Uint8Array,"uint8"],[Int8Array,"int8"],[Uint16Array,"uint16"],[Int16Array,"int16"],[Int32Array,"int32"],[Float64Array,"float64"],[Uint32Array,"uint32"]]),v=!1,S=()=>{if(!v){v=!0;let e=typeof BigInt64Array<"u"&&BigInt64Array.from,t=typeof BigUint64Array<"u"&&BigUint64Array.from,r=globalThis.Float16Array,o=typeof r<"u"&&r.from;e&&(y.set("int64",BigInt64Array),b.set(BigInt64Array,"int64")),t&&(y.set("uint64",BigUint64Array),b.set(BigUint64Array,"uint64")),o?(y.set("float16",r),b.set(r,"float16")):y.set("float16",Uint16Array)}}})),q=B((()=>{J(),A=e=>{let t=1;for(let r=0;r<e.length;r++){let o=e[r];if("number"!=typeof o||!Number.isSafeInteger(o))throw new TypeError(`dims[${r}] must be an integer, got: ${o}`);if(o<0)throw new RangeError(`dims[${r}] must be a non-negative integer, got: ${o}`);t*=o}return t},E=(e,t)=>{switch(e.location){case"cpu":return new T(e.type,e.data,t);case"cpu-pinned":return new T({location:"cpu-pinned",data:e.data,type:e.type,dims:t});case"texture":return new T({location:"texture",texture:e.texture,type:e.type,dims:t});case"gpu-buffer":return new T({location:"gpu-buffer",gpuBuffer:e.gpuBuffer,type:e.type,dims:t});case"ml-tensor":return new T({location:"ml-tensor",mlTensor:e.mlTensor,type:e.type,dims:t});default:throw new Error(`tensorReshape: tensor location ${e.location} is not supported`)}}})),J=B((()=>{z(),H(),Z(),q(),T=class{constructor(e,t,r){let o,n;if(S(),"object"==typeof e&&"location"in e)switch(this.dataLocation=e.location,o=e.type,n=e.dims,e.location){case"cpu-pinned":{let t=y.get(o);if(!t)throw new TypeError(`unsupported type "${o}" to create tensor from pinned buffer`);if(!(e.data instanceof t))throw new TypeError(`buffer should be of type ${t.name}`);this.cpuData=e.data;break}case"texture":if("float32"!==o)throw new TypeError(`unsupported type "${o}" to create tensor from texture`);this.gpuTextureData=e.texture,this.downloader=e.download,this.disposer=e.dispose;break;case"gpu-buffer":if("float32"!==o&&"float16"!==o&&"int32"!==o&&"int64"!==o&&"uint32"!==o&&"uint8"!==o&&"bool"!==o&&"uint4"!==o&&"int4"!==o)throw new TypeError(`unsupported type "${o}" to create tensor from gpu buffer`);this.gpuBufferData=e.gpuBuffer,this.downloader=e.download,this.disposer=e.dispose;break;case"ml-tensor":if("float32"!==o&&"float16"!==o&&"int32"!==o&&"int64"!==o&&"uint32"!==o&&"uint64"!==o&&"int8"!==o&&"uint8"!==o&&"bool"!==o&&"uint4"!==o&&"int4"!==o)throw new TypeError(`unsupported type "${o}" to create tensor from MLTensor`);this.mlTensorData=e.mlTensor,this.downloader=e.download,this.disposer=e.dispose;break;default:throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`)}else{let s,a;if("string"==typeof e)if(o=e,a=r,"string"===e){if(!Array.isArray(t))throw new TypeError("A string tensor's data must be a string array.");s=t}else{let r=y.get(e);if(void 0===r)throw new TypeError(`Unsupported tensor type: ${e}.`);if(Array.isArray(t)){if("float16"===e&&r===Uint16Array||"uint4"===e||"int4"===e)throw new TypeError(`Creating a ${e} tensor from number array is not supported. Please use ${r.name} as data.`);s="uint64"===e||"int64"===e?r.from(t,BigInt):r.from(t)}else if(t instanceof r)s=t;else if(t instanceof Uint8ClampedArray){if("uint8"!==e)throw new TypeError("A Uint8ClampedArray tensor's data must be type of uint8");s=Uint8Array.from(t)}else{if(!("float16"===e&&t instanceof Uint16Array&&r!==Uint16Array))throw new TypeError(`A ${o} tensor's data must be type of ${r}`);s=new globalThis.Float16Array(t.buffer,t.byteOffset,t.length)}}else if(a=t,Array.isArray(e)){if(0===e.length)throw new TypeError("Tensor type cannot be inferred from an empty array.");let t=typeof e[0];if("string"===t)o="string",s=e;else{if("boolean"!==t)throw new TypeError(`Invalid element type of data array: ${t}.`);o="bool",s=Uint8Array.from(e)}}else if(e instanceof Uint8ClampedArray)o="uint8",s=Uint8Array.from(e);else{let t=b.get(e.constructor);if(void 0===t)throw new TypeError(`Unsupported type for tensor data: ${e.constructor}.`);o=t,s=e}if(void 0===a)a=[s.length];else if(!Array.isArray(a))throw new TypeError("A tensor's dims must be a number array");n=a,this.cpuData=s,this.dataLocation="cpu"}let s=A(n);if(this.cpuData&&s!==this.cpuData.length&&("uint4"!==o&&"int4"!==o||Math.ceil(s/2)!==this.cpuData.length))throw new Error(`Tensor's size(${s}) does not match data length(${this.cpuData.length}).`);this.type=o,this.dims=n,this.size=s}static async fromImage(e,t){return f(e,t)}static fromTexture(e,t){return h(e,t)}static fromGpuBuffer(e,t){return m(e,t)}static fromMLTensor(e,t){return w(e,t)}static fromPinnedBuffer(e,t,r){return g(e,t,r)}toDataURL(e){return d(this,e)}toImageData(e){return c(this,e)}get data(){if(this.ensureValid(),!this.cpuData)throw new Error("The data is not on CPU. Use `getData()` to download GPU data to CPU, or use `texture` or `gpuBuffer` property to access the GPU data directly.");return this.cpuData}get location(){return this.dataLocation}get texture(){if(this.ensureValid(),!this.gpuTextureData)throw new Error("The data is not stored as a WebGL texture.");return this.gpuTextureData}get gpuBuffer(){if(this.ensureValid(),!this.gpuBufferData)throw new Error("The data is not stored as a WebGPU buffer.");return this.gpuBufferData}get mlTensor(){if(this.ensureValid(),!this.mlTensorData)throw new Error("The data is not stored as a WebNN MLTensor.");return this.mlTensorData}async getData(e){switch(this.ensureValid(),this.dataLocation){case"cpu":case"cpu-pinned":return this.data;case"texture":case"gpu-buffer":case"ml-tensor":if(!this.downloader)throw new Error("The current tensor is not created with a specified data downloader.");if(this.isDownloading)throw new Error("The current tensor is being downloaded.");try{this.isDownloading=!0;let t=await this.downloader();return this.downloader=void 0,this.dataLocation="cpu",this.cpuData=t,e&&this.disposer&&(this.disposer(),this.disposer=void 0),t}finally{this.isDownloading=!1}default:throw new Error(`cannot get data from location: ${this.dataLocation}`)}}dispose(){if(this.isDownloading)throw new Error("The current tensor is being downloaded.");this.disposer&&(this.disposer(),this.disposer=void 0),this.cpuData=void 0,this.gpuTextureData=void 0,this.gpuBufferData=void 0,this.mlTensorData=void 0,this.downloader=void 0,this.isDownloading=void 0,this.dataLocation="none"}ensureValid(){if("none"===this.dataLocation)throw new Error("The tensor is disposed.")}reshape(e){if(this.ensureValid(),this.downloader||this.disposer)throw new Error("Cannot reshape a tensor that owns GPU resource.");return E(this,e)}}})),K=B((()=>{J(),_=T})),Q=B((()=>{G(),O=(e,t)=>{(typeof l.trace>"u"?!l.wasm.trace:!l.trace)||console.timeStamp(`${e}::ORT::${t}`)},P=(e,t)=>{let r=(new Error).stack?.split(/\r\n|\r|\n/g)||[],o=!1;for(let n=0;n<r.length;n++){if(o&&!r[n].includes("TRACE_FUNC")){let o=`FUNC_${e}::${r[n].trim().split(" ")[1]}`;return t&&(o+=`::${t}`),void O("CPU",o)}r[n].includes("TRACE_FUNC")&&(o=!0)}},M=e=>{(typeof l.trace>"u"?!l.wasm.trace:!l.trace)||P("BEGIN",e)},C=e=>{(typeof l.trace>"u"?!l.wasm.trace:!l.trace)||P("END",e)}})),X=B((()=>{$(),K(),Q(),R=class e{constructor(e){this.handler=e}async run(e,t,r){M();let o={},n={};if("object"!=typeof e||null===e||e instanceof _||Array.isArray(e))throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.");let s=!0;if("object"==typeof t){if(null===t)throw new TypeError("Unexpected argument[1]: cannot be null.");if(t instanceof _)throw new TypeError("'fetches' cannot be a Tensor");if(Array.isArray(t)){if(0===t.length)throw new TypeError("'fetches' cannot be an empty array.");s=!1;for(let e of t){if("string"!=typeof e)throw new TypeError("'fetches' must be a string array or an object.");if(-1===this.outputNames.indexOf(e))throw new RangeError(`'fetches' contains invalid output name: ${e}.`);o[e]=null}if("object"==typeof r&&null!==r)n=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else{let e=!1,a=Object.getOwnPropertyNames(t);for(let r of this.outputNames)if(-1!==a.indexOf(r)){let n=t[r];(null===n||n instanceof _)&&(e=!0,s=!1,o[r]=n)}if(e){if("object"==typeof r&&null!==r)n=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else n=t}}else if(typeof t<"u")throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'.");for(let t of this.inputNames)if(typeof e[t]>"u")throw new Error(`input '${t}' is missing in 'feeds'.`);if(s)for(let e of this.outputNames)o[e]=null;let a=await this.handler.run(e,o,n),i={};for(let e in a)if(Object.hasOwnProperty.call(a,e)){let t=a[e];i[e]=t instanceof _?t:new _(t.type,t.data,t.dims)}return C(),i}async release(){return this.handler.dispose()}static async create(t,r,o,n){M();let a,i={};if("string"==typeof t){if(a=t,"object"==typeof r&&null!==r)i=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof Uint8Array){if(a=t,"object"==typeof r&&null!==r)i=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else{if(!(t instanceof ArrayBuffer||typeof SharedArrayBuffer<"u"&&t instanceof SharedArrayBuffer))throw new TypeError("Unexpected argument[0]: must be 'path' or 'buffer'.");{let e=t,s=0,l=t.byteLength;if("object"==typeof r&&null!==r)i=r;else if("number"==typeof r){if(s=r,!Number.isSafeInteger(s))throw new RangeError("'byteOffset' must be an integer.");if(s<0||s>=e.byteLength)throw new RangeError(`'byteOffset' is out of range [0, ${e.byteLength}).`);if(l=t.byteLength-s,"number"==typeof o){if(l=o,!Number.isSafeInteger(l))throw new RangeError("'byteLength' must be an integer.");if(l<=0||s+l>e.byteLength)throw new RangeError(`'byteLength' is out of range (0, ${e.byteLength-s}].`);if("object"==typeof n&&null!==n)i=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else if(typeof o<"u")throw new TypeError("'byteLength' must be a number.")}else if(typeof r<"u")throw new TypeError("'options' must be an object.");a=new Uint8Array(e,s,l)}}let[l,u]=await s(i),d=await l.createInferenceSessionHandler(a,u);return C(),new e(d)}startProfiling(){this.handler.startProfiling()}endProfiling(){this.handler.endProfiling()}get inputNames(){return this.handler.inputNames}get outputNames(){return this.handler.outputNames}get inputMetadata(){return this.handler.inputMetadata}get outputMetadata(){return this.handler.outputMetadata}}})),Y=B((()=>{X(),F=R})),ee=B((()=>{})),te=B((()=>{})),re=B((()=>{})),oe=B((()=>{})),ne={};N(ne,{InferenceSession:()=>F,TRACE:()=>O,TRACE_FUNC_BEGIN:()=>M,TRACE_FUNC_END:()=>C,Tensor:()=>_,env:()=>u,registerBackend:()=>o});var se=B((()=>{j(),W(),Y(),K(),ee(),te(),Q(),re(),oe()})),ae=B((()=>{})),ie={};N(ie,{default:()=>de});var le,ue,de,ce,pe,fe,he,me,we,ge,ye,be,ve,Se,Ae,Ee,Te,_e,Oe,Pe,Me,Ce,Re,Fe,xe,De,Ue,Le,Ie,Be,Ne,ke,$e,je,Ve,Ge,We,ze,He,Ze,qe,Je,Ke,Qe,Xe,Ye,et,tt,rt,ot,nt,st,at,it,lt,ut,dt,ct,pt,ft,ht,mt,wt,gt,yt,bt,vt,St,At,Et,Tt,_t,Ot,Pt,Mt,Ct,Rt=B((()=>{Nt(),xt(),Ft(),le="ort-wasm-proxy-worker",(ue=globalThis.self?.name===le)&&(self.onmessage=e=>{let{type:t,in:r}=e.data;try{switch(t){case"init-wasm":Ce(r.wasm).then((()=>{Qe(r).then((()=>{postMessage({type:t})}),(e=>{postMessage({type:t,err:e})}))}),(e=>{postMessage({type:t,err:e})}));break;case"init-ep":{let{epName:e,env:o}=r;Xe(o,e).then((()=>{postMessage({type:t})}),(e=>{postMessage({type:t,err:e})}));break}case"copy-from":{let{buffer:e}=r,o=rt(e);postMessage({type:t,out:o});break}case"create":{let{model:e,options:o}=r;ot(e,o).then((e=>{postMessage({type:t,out:e})}),(e=>{postMessage({type:t,err:e})}));break}case"release":nt(r),postMessage({type:t});break;case"run":{let{sessionId:e,inputIndices:o,inputs:n,outputIndices:s,options:a}=r;at(e,o,n,s,new Array(s.length).fill(null),a).then((e=>{e.some((e=>"cpu"!==e[3]))?postMessage({type:t,err:"Proxy does not support non-cpu tensor location."}):postMessage({type:t,out:e},lt([...n,...e]))}),(e=>{postMessage({type:t,err:e})}));break}case"end-profiling":it(r),postMessage({type:t})}}catch(e){postMessage({type:t,err:e})}}),de=ue?null:e=>new Worker(e??pe,{type:"classic",name:le})})),Ft=B((()=>{ae(),ce=typeof location>"u"?void 0:location.origin,pe=typeof document<"u"?document.currentScript?.src:typeof self<"u"?self.location?.href:void 0,fe=()=>{if(pe&&!pe.startsWith("blob:"))return pe.substring(0,pe.lastIndexOf("/")+1)},he=(e,t)=>{try{let r=t??pe;return(r?new URL(e,r):new URL(e)).origin===ce}catch{return!1}},me=(e,t)=>{let r=t??pe;try{return(r?new URL(e,r):new URL(e)).href}catch{return}},we=(e,t)=>`${t??"./"}${e}`,ge=async e=>{let t=await(await fetch(e,{credentials:"same-origin"})).blob();return URL.createObjectURL(t)},ye=async e=>(await import(e)).default,be=(Rt(),k(ie)).default,ve=async()=>{if(!pe)throw new Error("Failed to load proxy worker: cannot determine the script source URL.");if(he(pe))return[void 0,be()];let e=await ge(pe);return[e,be(e)]},Se=async(e,t,r)=>{{let o="ort-wasm-simd-threaded.mjs",n=e??me(o,t),s=r&&n&&!he(n,t),a=s?await ge(n):n??we(o,t);return[s?a:void 0,await ye(a)]}}})),xt=B((()=>{Ft(),Ee=!1,Te=!1,_e=!1,Oe=()=>{if(typeof SharedArrayBuffer>"u")return!1;try{return typeof MessageChannel<"u"&&(new MessageChannel).port1.postMessage(new SharedArrayBuffer(1)),WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,5,4,1,3,1,1,10,11,1,9,0,65,0,254,16,2,0,26,11]))}catch{return!1}},Pe=()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,10,30,1,28,0,65,0,253,15,253,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,253,186,1,26,11]))}catch{return!1}},Me=()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,5,1,96,0,1,123,3,2,1,0,10,19,1,17,0,65,1,253,15,65,2,253,15,65,3,253,15,253,147,2,11]))}catch{return!1}},Ce=async e=>{if(Ee)return Promise.resolve();if(Te)throw new Error("multiple calls to 'initializeWebAssembly()' detected.");if(_e)throw new Error("previous call to 'initializeWebAssembly()' failed.");Te=!0;let t=e.initTimeout,r=e.numThreads;if(!1!==e.simd)if("relaxed"===e.simd){if(!Me())throw new Error("Relaxed WebAssembly SIMD is not supported in the current environment.")}else if(!Pe())throw new Error("WebAssembly SIMD is not supported in the current environment.");let o=Oe();r>1&&!o&&(typeof self<"u"&&!self.crossOriginIsolated&&console.warn("env.wasm.numThreads is set to "+r+", but this will not work unless you enable crossOriginIsolated mode. See https://web.dev/cross-origin-isolation-guide/ for more info."),console.warn("WebAssembly multi-threading is not supported in the current environment. Falling back to single-threading."),e.numThreads=r=1);let n=e.wasmPaths,s="string"==typeof n?n:void 0,a=n?.mjs,i=a?.href??a,l=n?.wasm,u=l?.href??l,d=e.wasmBinary,[c,p]=await Se(i,s,r>1),f=!1,h=[];if(t>0&&h.push(new Promise((e=>{setTimeout((()=>{f=!0,e()}),t)}))),h.push(new Promise(((e,t)=>{let o={numThreads:r};if(d)o.wasmBinary=d;else if(u||s)o.locateFile=e=>u??s+e;else if(i&&0!==i.indexOf("blob:"))o.locateFile=e=>new URL(e,i).href;else if(c){let e=fe();e&&(o.locateFile=t=>e+t)}p(o).then((t=>{Te=!1,Ee=!0,Ae=t,e(),c&&URL.revokeObjectURL(c)}),(e=>{Te=!1,_e=!0,t(e)}))}))),await Promise.race(h),f)throw new Error(`WebAssembly backend initializing failed due to timeout: ${t}ms`)},Re=()=>{if(Ee&&Ae)return Ae;throw new Error("WebAssembly is not initialized yet.")}})),Dt=B((()=>{xt(),Fe=(e,t)=>{let r=Re(),o=r.lengthBytesUTF8(e)+1,n=r._malloc(o);return r.stringToUTF8(e,n,o),t.push(n),n},xe=(e,t,r,o)=>{if("object"==typeof e&&null!==e){if(r.has(e))throw new Error("Circular reference in options");r.add(e)}Object.entries(e).forEach((([e,n])=>{let s=t?t+e:e;if("object"==typeof n)xe(n,s+".",r,o);else if("string"==typeof n||"number"==typeof n)o(s,n.toString());else{if("boolean"!=typeof n)throw new Error("Can't handle extra config type: "+typeof n);o(s,n?"1":"0")}}))},De=e=>{let t=Re(),r=t.stackSave();try{let r=t.PTR_SIZE,o=t.stackAlloc(2*r);t._OrtGetLastError(o,o+r);let n=Number(t.getValue(o,4===r?"i32":"i64")),s=t.getValue(o+r,"*"),a=s?t.UTF8ToString(s):"";throw new Error(`${e} ERROR_CODE: ${n}, ERROR_MESSAGE: ${a}`)}finally{t.stackRestore(r)}}})),Ut=B((()=>{xt(),Dt(),Ue=e=>{let t=Re(),r=0,o=[],n=e||{};try{if(void 0===e?.logSeverityLevel)n.logSeverityLevel=2;else if("number"!=typeof e.logSeverityLevel||!Number.isInteger(e.logSeverityLevel)||e.logSeverityLevel<0||e.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${e.logSeverityLevel}`);if(void 0===e?.logVerbosityLevel)n.logVerbosityLevel=0;else if("number"!=typeof e.logVerbosityLevel||!Number.isInteger(e.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${e.logVerbosityLevel}`);void 0===e?.terminate&&(n.terminate=!1);let s=0;return void 0!==e?.tag&&(s=Fe(e.tag,o)),r=t._OrtCreateRunOptions(n.logSeverityLevel,n.logVerbosityLevel,!!n.terminate,s),0===r&&De("Can't create run options."),void 0!==e?.extra&&xe(e.extra,"",new WeakSet,((e,n)=>{let s=Fe(e,o),a=Fe(n,o);0!==t._OrtAddRunConfigEntry(r,s,a)&&De(`Can't set a run config entry: ${e} - ${n}.`)})),[r,o]}catch(e){throw 0!==r&&t._OrtReleaseRunOptions(r),o.forEach((e=>t._free(e))),e}}})),Lt=B((()=>{xt(),Dt(),Le=e=>{switch(e){case"disabled":return 0;case"basic":return 1;case"extended":return 2;case"all":return 99;default:throw new Error(`unsupported graph optimization level: ${e}`)}},Ie=e=>{switch(e){case"sequential":return 0;case"parallel":return 1;default:throw new Error(`unsupported execution mode: ${e}`)}},Be=e=>{e.extra||(e.extra={}),e.extra.session||(e.extra.session={});let t=e.extra.session;t.use_ort_model_bytes_directly||(t.use_ort_model_bytes_directly="1"),e.executionProviders&&e.executionProviders.some((e=>"webgpu"===("string"==typeof e?e:e.name)))&&(e.enableMemPattern=!1)},Ne=(e,t,r,o)=>{let n=Fe(t,o),s=Fe(r,o);0!==Re()._OrtAddSessionConfigEntry(e,n,s)&&De(`Can't set a session config entry: ${t} - ${r}.`)},ke=async(e,t,r)=>{for(let o of t){let t="string"==typeof o?o:o.name,n=[];switch(t){case"webnn":if(t="WEBNN","string"!=typeof o){let t=o?.deviceType;t&&Ne(e,"deviceType",t,r)}break;case"webgpu":if(t="JS","string"!=typeof o){let t=o;if(t?.preferredLayout){if("NCHW"!==t.preferredLayout&&"NHWC"!==t.preferredLayout)throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${t.preferredLayout}`);Ne(e,"preferredLayout",t.preferredLayout,r)}}break;case"wasm":case"cpu":continue;default:throw new Error(`not supported execution provider: ${t}`)}let s=Fe(t,r),a=n.length,i=0,l=0;if(a>0){i=Re()._malloc(a*Re().PTR_SIZE),r.push(i),l=Re()._malloc(a*Re().PTR_SIZE),r.push(l);for(let e=0;e<a;e++)Re().setValue(i+e*Re().PTR_SIZE,n[e][0],"*"),Re().setValue(l+e*Re().PTR_SIZE,n[e][1],"*")}0!==await Re()._OrtAppendExecutionProvider(e,s,i,l,a)&&De(`Can't append execution provider: ${t}.`)}},$e=async e=>{let t=Re(),r=0,o=[],n=e||{};Be(n);try{let e=Le(n.graphOptimizationLevel??"all"),s=Ie(n.executionMode??"sequential"),a="string"==typeof n.logId?Fe(n.logId,o):0,i=n.logSeverityLevel??2;if(!Number.isInteger(i)||i<0||i>4)throw new Error(`log serverity level is not valid: ${i}`);let l=n.logVerbosityLevel??0;if(!Number.isInteger(l)||l<0||l>4)throw new Error(`log verbosity level is not valid: ${l}`);let u="string"==typeof n.optimizedModelFilePath?Fe(n.optimizedModelFilePath,o):0;if(r=t._OrtCreateSessionOptions(e,!!n.enableCpuMemArena,!!n.enableMemPattern,s,!!n.enableProfiling,0,a,i,l,u),0===r&&De("Can't create session options."),n.executionProviders&&await ke(r,n.executionProviders,o),void 0!==n.enableGraphCapture){if("boolean"!=typeof n.enableGraphCapture)throw new Error(`enableGraphCapture must be a boolean value: ${n.enableGraphCapture}`);Ne(r,"enableGraphCapture",n.enableGraphCapture.toString(),o)}if(n.freeDimensionOverrides)for(let[e,s]of Object.entries(n.freeDimensionOverrides)){if("string"!=typeof e)throw new Error(`free dimension override name must be a string: ${e}`);if("number"!=typeof s||!Number.isInteger(s)||s<0)throw new Error(`free dimension override value must be a non-negative integer: ${s}`);let n=Fe(e,o);0!==t._OrtAddFreeDimensionOverride(r,n,s)&&De(`Can't set a free dimension override: ${e} - ${s}.`)}return void 0!==n.extra&&xe(n.extra,"",new WeakSet,((e,t)=>{Ne(r,e,t,o)})),[r,o]}catch(e){throw 0!==r&&0!==t._OrtReleaseSessionOptions(r)&&De("Can't release session options."),o.forEach((e=>t._free(e))),e}}})),It=B((()=>{je=e=>{switch(e){case"int8":return 3;case"uint8":return 2;case"bool":return 9;case"int16":return 5;case"uint16":return 4;case"int32":return 6;case"uint32":return 12;case"float16":return 10;case"float32":return 1;case"float64":return 11;case"string":return 8;case"int64":return 7;case"uint64":return 13;case"int4":return 22;case"uint4":return 21;default:throw new Error(`unsupported data type: ${e}`)}},Ve=e=>{switch(e){case 3:return"int8";case 2:return"uint8";case 9:return"bool";case 5:return"int16";case 4:return"uint16";case 6:return"int32";case 12:return"uint32";case 10:return"float16";case 1:return"float32";case 11:return"float64";case 8:return"string";case 7:return"int64";case 13:return"uint64";case 22:return"int4";case 21:return"uint4";default:throw new Error(`unsupported data type: ${e}`)}},Ge=(e,t)=>{let r=[-1,4,1,1,2,2,4,8,-1,1,2,8,4,8,-1,-1,-1,-1,-1,-1,-1,.5,.5][e],o="number"==typeof t?t:t.reduce(((e,t)=>e*t),1);return r>0?Math.ceil(o*r):void 0},We=e=>{switch(e){case"float16":return typeof Float16Array<"u"&&Float16Array.from?Float16Array:Uint16Array;case"float32":return Float32Array;case"uint8":case"bool":return Uint8Array;case"int8":return Int8Array;case"uint16":return Uint16Array;case"int16":return Int16Array;case"int32":return Int32Array;case"float64":return Float64Array;case"uint32":return Uint32Array;case"int64":return BigInt64Array;case"uint64":return BigUint64Array;default:throw new Error(`unsupported type: ${e}`)}},ze=e=>{switch(e){case"verbose":return 0;case"info":return 1;case"warning":return 2;case"error":return 3;case"fatal":return 4;default:throw new Error(`unsupported logging level: ${e}`)}},He=e=>"float32"===e||"float16"===e||"int32"===e||"int64"===e||"uint32"===e||"uint8"===e||"bool"===e||"uint4"===e||"int4"===e,Ze=e=>"float32"===e||"float16"===e||"int32"===e||"int64"===e||"uint32"===e||"uint64"===e||"int8"===e||"uint8"===e||"bool"===e||"uint4"===e||"int4"===e,qe=e=>{switch(e){case"none":return 0;case"cpu":return 1;case"cpu-pinned":return 2;case"texture":return 3;case"gpu-buffer":return 4;case"ml-tensor":return 5;default:throw new Error(`unsupported data location: ${e}`)}}})),Bt=B((()=>{ae(),Je=async e=>{if("string"==typeof e){let t=await fetch(e);if(!t.ok)throw new Error(`failed to load external data file: ${e}`);let r=t.headers.get("Content-Length"),o=r?parseInt(r,10):0;if(o<1073741824)return new Uint8Array(await t.arrayBuffer());{if(!t.body)throw new Error(`failed to load external data file: ${e}, no response body.`);let r,n=t.body.getReader();try{r=new ArrayBuffer(o)}catch(e){if(!(e instanceof RangeError))throw e;{let e=Math.ceil(o/65536);r=new WebAssembly.Memory({initial:e,maximum:e}).buffer}}let s=0;for(;;){let{done:e,value:t}=await n.read();if(e)break;let o=t.byteLength;new Uint8Array(r,s,o).set(t),s+=o}return new Uint8Array(r,0,o)}}return e instanceof Blob?new Uint8Array(await e.arrayBuffer()):e instanceof Uint8Array?e:new Uint8Array(e)}})),Nt=B((()=>{Ut(),Lt(),It(),xt(),Dt(),Bt(),Ke=(e,t)=>{0!==Re()._OrtInit(e,t)&&De("Can't initialize onnxruntime.")},Qe=async e=>{Ke(e.wasm.numThreads,ze(e.logLevel))},Xe=async(e,t)=>{Re().asyncInit?.()},Ye=new Map,et=e=>{let t=Re(),r=t.stackSave();try{let r=t.PTR_SIZE,o=t.stackAlloc(2*r);0!==t._OrtGetInputOutputCount(e,o,o+r)&&De("Can't get session input/output count.");let n=4===r?"i32":"i64";return[Number(t.getValue(o,n)),Number(t.getValue(o+r,n))]}finally{t.stackRestore(r)}},tt=(e,t)=>{let r=Re(),o=r.stackSave(),n=0;try{let o=r.PTR_SIZE,s=r.stackAlloc(2*o);0!==r._OrtGetInputOutputMetadata(e,t,s,s+o)&&De("Can't get session input/output metadata.");let a=Number(r.getValue(s,"*"));n=Number(r.getValue(s+o,"*"));let i=r.HEAP32[n/4];if(0===i)return[a,0];let l=r.HEAPU32[n/4+1],u=[];for(let e=0;e<l;e++){let t=Number(r.getValue(n+8+e*o,"*"));u.push(0!==t?r.UTF8ToString(t):Number(r.getValue(n+8+(e+l)*o,"*")))}return[a,i,u]}finally{r.stackRestore(o),0!==n&&r._OrtFree(n)}},rt=e=>{let t=Re(),r=t._malloc(e.byteLength);if(0===r)throw new Error(`Can't create a session. failed to allocate a buffer of size ${e.byteLength}.`);return t.HEAPU8.set(e,r),[r,e.byteLength]},ot=async(e,t)=>{let r,o,n=Re();Array.isArray(e)?[r,o]=e:e.buffer===n.HEAPU8.buffer?[r,o]=[e.byteOffset,e.byteLength]:[r,o]=rt(e);let s=0,a=0,i=[],l=[],u=[];try{if([a,i]=await $e(t),t?.externalData&&n.mountExternalData){let e=[];for(let r of t.externalData){let t="string"==typeof r?r:r.path;e.push(Je("string"==typeof r?r:r.data).then((e=>{n.mountExternalData(t,e)})))}await Promise.all(e)}for(let e of t?.executionProviders??[])if("webnn"===("string"==typeof e?e:e.name)){if(n.shouldTransferToMLTensor=!1,"string"!=typeof e){let t=e,r=t?.context,o=t?.gpuDevice,s=t?.deviceType,a=t?.powerPreference;n.currentContext=r||(o?await n.webnnCreateMLContext(o):await n.webnnCreateMLContext({deviceType:s,powerPreference:a}))}else n.currentContext=await n.webnnCreateMLContext();break}s=await n._OrtCreateSession(r,o,a),n.webgpuOnCreateSession?.(s),0===s&&De("Can't create a session."),n.jsepOnCreateSession?.(),n.currentContext&&(n.webnnRegisterMLContext(s,n.currentContext),n.currentContext=void 0,n.shouldTransferToMLTensor=!0);let[e,d]=et(s),c=!!t?.enableGraphCapture,p=[],f=[],h=[],m=[];for(let t=0;t<e;t++){let[e,r,o]=tt(s,t);0===e&&De("Can't get an input name."),l.push(e);let a=n.UTF8ToString(e);p.push(a),h.push(0===r?{name:a,isTensor:!1}:{name:a,isTensor:!0,type:Ve(r),shape:o})}for(let t=0;t<d;t++){let[r,o,a]=tt(s,t+e);0===r&&De("Can't get an output name."),u.push(r);let i=n.UTF8ToString(r);f.push(i),m.push(0===o?{name:i,isTensor:!1}:{name:i,isTensor:!0,type:Ve(o),shape:a})}return Ye.set(s,[s,l,u,null,c,!1]),[s,p,f,h,m]}catch(e){throw l.forEach((e=>n._OrtFree(e))),u.forEach((e=>n._OrtFree(e))),0!==s&&0!==n._OrtReleaseSession(s)&&De("Can't release session."),e}finally{n._free(r),0!==a&&0!==n._OrtReleaseSessionOptions(a)&&De("Can't release session options."),i.forEach((e=>n._free(e))),n.unmountExternalData?.()}},nt=e=>{let t=Re(),r=Ye.get(e);if(!r)throw new Error(`cannot release session. invalid session id: ${e}`);let[o,n,s,a,i]=r;a&&(i&&0!==t._OrtClearBoundOutputs(a.handle)&&De("Can't clear bound outputs."),0!==t._OrtReleaseBinding(a.handle)&&De("Can't release IO binding.")),t.jsepOnReleaseSession?.(e),t.webnnOnReleaseSession?.(e),t.webgpuOnReleaseSession?.(e),n.forEach((e=>t._OrtFree(e))),s.forEach((e=>t._OrtFree(e))),0!==t._OrtReleaseSession(o)&&De("Can't release session."),Ye.delete(e)},st=async(e,t,r,o,n,s,a=!1)=>{if(!e)return void t.push(0);let i,l,u=Re(),d=u.PTR_SIZE,c=e[0],p=e[1],f=e[3],h=f;if("string"===c&&("gpu-buffer"===f||"ml-tensor"===f))throw new Error("String tensor is not supported on GPU.");if(a&&"gpu-buffer"!==f)throw new Error(`External buffer must be provided for input/output index ${s} when enableGraphCapture is true.`);if("gpu-buffer"===f){let t=e[2].gpuBuffer;l=Ge(je(c),p);{let e=u.jsepRegisterBuffer;if(!e)throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.');i=e(o,s,t,l)}}else if("ml-tensor"===f){let t=e[2].mlTensor;l=Ge(je(c),p);let r=u.webnnRegisterMLTensor;if(!r)throw new Error('Tensor location "ml-tensor" is not supported without using WebNN.');i=r(o,t,je(c),p)}else{let t=e[2];if(Array.isArray(t)){l=d*t.length,i=u._malloc(l),r.push(i);for(let e=0;e<t.length;e++){if("string"!=typeof t[e])throw new TypeError(`tensor data at index ${e} is not a string`);u.setValue(i+e*d,Fe(t[e],r),"*")}}else{let e=u.webnnIsGraphInput,s=u.webnnIsGraphOutput;if("string"!==c&&e&&s){let a=u.UTF8ToString(n);if(e(o,a)||s(o,a)){let e=je(c);l=Ge(e,p),h="ml-tensor";let r=u.webnnCreateTemporaryTensor,n=u.webnnUploadTensor;if(!r||!n)throw new Error('Tensor location "ml-tensor" is not supported without using WebNN.');let s=await r(o,e,p);n(s,new Uint8Array(t.buffer,t.byteOffset,t.byteLength)),i=s}else l=t.byteLength,i=u._malloc(l),r.push(i),u.HEAPU8.set(new Uint8Array(t.buffer,t.byteOffset,l),i)}else l=t.byteLength,i=u._malloc(l),r.push(i),u.HEAPU8.set(new Uint8Array(t.buffer,t.byteOffset,l),i)}}let m=u.stackSave(),w=u.stackAlloc(4*p.length);try{p.forEach(((e,t)=>u.setValue(w+t*d,e,4===d?"i32":"i64")));let e=u._OrtCreateTensor(je(c),i,l,w,p.length,qe(h));0===e&&De(`Can't create tensor for input/output. session=${o}, index=${s}.`),t.push(e)}finally{u.stackRestore(m)}},at=async(e,t,r,o,n,s)=>{let a=Re(),i=a.PTR_SIZE,l=Ye.get(e);if(!l)throw new Error(`cannot run inference. invalid session id: ${e}`);let u=l[0],d=l[1],c=l[2],p=l[3],f=l[4],h=(l[5],t.length),m=o.length,w=0,g=[],y=[],b=[],v=[],S=a.stackSave(),A=a.stackAlloc(h*i),E=a.stackAlloc(h*i),T=a.stackAlloc(m*i),_=a.stackAlloc(m*i);try{[w,g]=Ue(s);for(let o=0;o<h;o++)await st(r[o],y,v,e,d[t[o]],t[o],f);for(let t=0;t<m;t++)await st(n[t],b,v,e,c[o[t]],h+o[t],f);for(let e=0;e<h;e++)a.setValue(A+e*i,y[e],"*"),a.setValue(E+e*i,d[t[e]],"*");for(let e=0;e<m;e++)a.setValue(T+e*i,b[e],"*"),a.setValue(_+e*i,c[o[e]],"*");let l;a.jsepOnRunStart?.(u),a.webnnOnRunStart?.(u),l=await a._OrtRun(u,E,A,h,_,m,T,w),0!==l&&De("failed to call OrtRun().");let S=[],O=[];for(let t=0;t<m;t++){let r=Number(a.getValue(T+t*i,"*"));if(r===b[t]){S.push(n[t]);continue}let s,l=a.stackSave(),u=a.stackAlloc(4*i),d=!1,c=0;try{0!==a._OrtGetTensorData(r,u,u+i,u+2*i,u+3*i)&&De(`Can't access output tensor data on index ${t}.`);let n=4===i?"i32":"i64",l=Number(a.getValue(u,n));c=a.getValue(u+i,"*");let f=a.getValue(u+2*i,"*"),h=Number(a.getValue(u+3*i,n)),m=[];for(let e=0;e<h;e++)m.push(Number(a.getValue(f+e*i,n)));0!==a._OrtFree(f)&&De("Can't free memory for tensor dims.");let w=m.reduce(((e,t)=>e*t),1);s=Ve(l);let g=p?.outputPreferredLocations[o[t]];if("string"===s){if("gpu-buffer"===g||"ml-tensor"===g)throw new Error("String tensor is not supported on GPU.");let e=[];for(let t=0;t<w;t++){let r=a.getValue(c+t*i,"*"),o=a.getValue(c+(t+1)*i,"*"),n=t===w-1?void 0:o-r;e.push(a.UTF8ToString(r,n))}S.push([s,m,e,"cpu"])}else if("gpu-buffer"===g&&w>0){let e=a.jsepGetBuffer;if(!e)throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');let t=e(c),o=Ge(l,w);if(void 0===o||!He(s))throw new Error(`Unsupported data type: ${s}`);d=!0,S.push([s,m,{gpuBuffer:t,download:a.jsepCreateDownloader(t,o,s),dispose:()=>{0!==a._OrtReleaseTensor(r)&&De("Can't release tensor.")}},"gpu-buffer"])}else if("ml-tensor"===g&&w>0){let t=a.webnnEnsureTensor,o=a.webnnIsGraphInputOutputTypeSupported;if(!t||!o)throw new Error('preferredLocation "ml-tensor" is not supported without using WebNN.');if(void 0===Ge(l,w)||!Ze(s))throw new Error(`Unsupported data type: ${s}`);if(!o(e,s,!1))throw new Error(`preferredLocation "ml-tensor" for ${s} output is not supported by current WebNN Context.`);let n=await t(e,c,l,m,!1);d=!0,S.push([s,m,{mlTensor:n,download:a.webnnCreateMLTensorDownloader(c,s),dispose:()=>{a.webnnReleaseTensorId(c),a._OrtReleaseTensor(r)}},"ml-tensor"])}else if("ml-tensor-cpu-output"===g&&w>0){let e=a.webnnCreateMLTensorDownloader(c,s)(),t=S.length;d=!0,O.push((async()=>{let o=[t,await e];return a.webnnReleaseTensorId(c),a._OrtReleaseTensor(r),o})()),S.push([s,m,[],"cpu"])}else{let e=new(We(s))(w);new Uint8Array(e.buffer,e.byteOffset,e.byteLength).set(a.HEAPU8.subarray(c,c+e.byteLength)),S.push([s,m,e,"cpu"])}}finally{a.stackRestore(l),"string"===s&&c&&a._free(c),d||a._OrtReleaseTensor(r)}}p&&!f&&(0!==a._OrtClearBoundOutputs(p.handle)&&De("Can't clear bound outputs."),Ye.set(e,[u,d,c,p,f,!1]));for(let[e,t]of await Promise.all(O))S[e][2]=t;return S}finally{a.webnnOnRunEnd?.(u),a.stackRestore(S),y.forEach((e=>a._OrtReleaseTensor(e))),b.forEach((e=>a._OrtReleaseTensor(e))),v.forEach((e=>a._free(e))),0!==w&&a._OrtReleaseRunOptions(w),g.forEach((e=>a._free(e)))}},it=e=>{let t=Re(),r=Ye.get(e);if(!r)throw new Error("invalid session id");let o=r[0],n=t._OrtEndProfiling(o);0===n&&De("Can't get an profile file name."),t._OrtFree(n)},lt=e=>{let t=[];for(let r of e){let e=r[2];!Array.isArray(e)&&"buffer"in e&&t.push(e.buffer)}return t}})),kt=B((()=>{se(),Nt(),xt(),Ft(),ut=()=>!!u.wasm.proxy&&typeof document<"u",ct=!1,pt=!1,ft=!1,wt=new Map,gt=(e,t)=>{let r=wt.get(e);r?r.push(t):wt.set(e,[t])},yt=()=>{if(ct||!pt||ft||!dt)throw new Error("worker not ready")},bt=e=>{switch(e.data.type){case"init-wasm":ct=!1,e.data.err?(ft=!0,mt[1](e.data.err)):(pt=!0,mt[0]()),ht&&(URL.revokeObjectURL(ht),ht=void 0);break;case"init-ep":case"copy-from":case"create":case"release":case"run":case"end-profiling":{let t=wt.get(e.data.type);e.data.err?t.shift()[1](e.data.err):t.shift()[0](e.data.out);break}}},vt=async()=>{if(!pt){if(ct)throw new Error("multiple calls to 'initWasm()' detected.");if(ft)throw new Error("previous call to 'initWasm()' failed.");if(ct=!0,ut())return new Promise(((e,t)=>{dt?.terminate(),ve().then((([r,o])=>{try{(dt=o).onerror=e=>t(e),dt.onmessage=bt,mt=[e,t];let n={type:"init-wasm",in:u};if(!n.in.wasm.wasmPaths&&r){let e=fe();e&&(n.in.wasm.wasmPaths=e)}dt.postMessage(n),ht=r}catch(e){t(e)}}),t)}));try{await Ce(u.wasm),await Qe(u),pt=!0}catch(e){throw ft=!0,e}finally{ct=!1}}},St=async e=>{if(ut())return yt(),new Promise(((t,r)=>{gt("init-ep",[t,r]);let o={type:"init-ep",in:{epName:e,env:u}};dt.postMessage(o)}));await Xe(u,e)},At=async e=>ut()?(yt(),new Promise(((t,r)=>{gt("copy-from",[t,r]);let o={type:"copy-from",in:{buffer:e}};dt.postMessage(o,[e.buffer])}))):rt(e),Et=async(e,t)=>{if(ut()){if(t?.preferredOutputLocation)throw new Error('session option "preferredOutputLocation" is not supported for proxy.');return yt(),new Promise(((r,o)=>{gt("create",[r,o]);let n={type:"create",in:{model:e,options:{...t}}},s=[];e instanceof Uint8Array&&s.push(e.buffer),dt.postMessage(n,s)}))}return ot(e,t)},Tt=async e=>{if(ut())return yt(),new Promise(((t,r)=>{gt("release",[t,r]);let o={type:"release",in:e};dt.postMessage(o)}));nt(e)},_t=async(e,t,r,o,n,s)=>{if(ut()){if(r.some((e=>"cpu"!==e[3])))throw new Error("input tensor on GPU is not supported for proxy.");if(n.some((e=>e)))throw new Error("pre-allocated output tensor is not supported for proxy.");return yt(),new Promise(((n,a)=>{gt("run",[n,a]);let i=r,l={type:"run",in:{sessionId:e,inputIndices:t,inputs:i,outputIndices:o,options:s}};dt.postMessage(l,lt(i))}))}return at(e,t,r,o,n,s)},Ot=async e=>{if(ut())return yt(),new Promise(((t,r)=>{gt("end-profiling",[t,r]);let o={type:"end-profiling",in:e};dt.postMessage(o)}));it(e)}})),$t=B((()=>{se(),kt(),It(),ae(),Bt(),Pt=(e,t)=>{switch(e.location){case"cpu":return[e.type,e.dims,e.data,"cpu"];case"gpu-buffer":return[e.type,e.dims,{gpuBuffer:e.gpuBuffer},"gpu-buffer"];case"ml-tensor":return[e.type,e.dims,{mlTensor:e.mlTensor},"ml-tensor"];default:throw new Error(`invalid data location: ${e.location} for ${t()}`)}},Mt=e=>{switch(e[3]){case"cpu":return new _(e[0],e[2],e[1]);case"gpu-buffer":{let t=e[0];if(!He(t))throw new Error(`not supported data type: ${t} for deserializing GPU tensor`);let{gpuBuffer:r,download:o,dispose:n}=e[2];return _.fromGpuBuffer(r,{dataType:t,dims:e[1],download:o,dispose:n})}case"ml-tensor":{let t=e[0];if(!Ze(t))throw new Error(`not supported data type: ${t} for deserializing MLTensor tensor`);let{mlTensor:r,download:o,dispose:n}=e[2];return _.fromMLTensor(r,{dataType:t,dims:e[1],download:o,dispose:n})}default:throw new Error(`invalid data location: ${e[3]}`)}},Ct=class{async fetchModelAndCopyToWasmMemory(e){return At(await Je(e))}async loadModel(e,t){let r;M(),r="string"==typeof e?await this.fetchModelAndCopyToWasmMemory(e):e,[this.sessionId,this.inputNames,this.outputNames,this.inputMetadata,this.outputMetadata]=await Et(r,t),C()}async dispose(){return Tt(this.sessionId)}async run(e,t,r){M();let o=[],n=[];Object.entries(e).forEach((e=>{let t=e[0],r=e[1],s=this.inputNames.indexOf(t);if(-1===s)throw new Error(`invalid input '${t}'`);o.push(r),n.push(s)}));let s=[],a=[];Object.entries(t).forEach((e=>{let t=e[0],r=e[1],o=this.outputNames.indexOf(t);if(-1===o)throw new Error(`invalid output '${t}'`);s.push(r),a.push(o)}));let i=o.map(((e,t)=>Pt(e,(()=>`input "${this.inputNames[n[t]]}"`)))),l=s.map(((e,t)=>e?Pt(e,(()=>`output "${this.outputNames[a[t]]}"`)):null)),u=await _t(this.sessionId,n,i,a,l,r),d={};for(let e=0;e<u.length;e++)d[this.outputNames[a[e]]]=s[e]??Mt(u[e]);return C(),d}startProfiling(){}endProfiling(){Ot(this.sessionId)}}})),jt={};N(jt,{OnnxruntimeWebAssemblyBackend:()=>Gt,initializeFlags:()=>Vt,wasmBackend:()=>Wt});var Vt,Gt,Wt,zt=B((()=>{se(),kt(),$t(),Vt=()=>{("number"!=typeof u.wasm.initTimeout||u.wasm.initTimeout<0)&&(u.wasm.initTimeout=0);let e=u.wasm.simd;if("boolean"!=typeof e&&void 0!==e&&"fixed"!==e&&"relaxed"!==e&&(console.warn(`Property "env.wasm.simd" is set to unknown value "${e}". Reset it to \`false\` and ignore SIMD feature checking.`),u.wasm.simd=!1),"boolean"!=typeof u.wasm.proxy&&(u.wasm.proxy=!1),"boolean"!=typeof u.wasm.trace&&(u.wasm.trace=!1),"number"!=typeof u.wasm.numThreads||!Number.isInteger(u.wasm.numThreads)||u.wasm.numThreads<=0)if(typeof self<"u"&&!self.crossOriginIsolated)u.wasm.numThreads=1;else{let e=typeof navigator>"u"?I("node:os").cpus().length:navigator.hardwareConcurrency;u.wasm.numThreads=Math.min(4,Math.ceil((e||1)/2))}},Wt=new(Gt=class{async init(e){Vt(),await vt(),await St(e)}async createInferenceSessionHandler(e,t){let r=new Ct;return await r.loadModel(e,t),r}})})),Ht={};N(Ht,{InferenceSession:()=>F,TRACE:()=>O,TRACE_FUNC_BEGIN:()=>M,TRACE_FUNC_END:()=>C,Tensor:()=>_,default:()=>Zt,env:()=>u,registerBackend:()=>o}),se(),se(),se();var Zt=ne;{let e=(zt(),k(jt)).wasmBackend;o("cpu",e,10),o("wasm",e,10)}return Object.defineProperty(u.versions,"web",{value:"1.22.0",enumerable:!0}),k(Ht)})();e.exports=o},936:e=>{function t(e){var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}t.keys=()=>[],t.resolve=t,t.id=936,e.exports=t},485:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.baseAssetPath=void 0;const r="undefined"!=typeof window&&void 0!==window.document?window.document.currentScript:null;let o="/";r&&(o=r.src.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^/]+$/,"/")),t.baseAssetPath=o},973:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.defaultModelFetcher=void 0,t.defaultModelFetcher=e=>fetch(e).then((e=>e.arrayBuffer()))},362:(e,t,r)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.FrameProcessor=t.validateOptions=t.defaultFrameProcessorOptions=void 0;const o=r(710),n=r(954);t.defaultFrameProcessorOptions={positiveSpeechThreshold:.3,negativeSpeechThreshold:.25,preSpeechPadMs:800,redemptionMs:1400,minSpeechMs:400,submitUserSpeechOnPause:!1},t.validateOptions=function(e){(e.positiveSpeechThreshold<0||e.positiveSpeechThreshold>1)&&o.log.error("positiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&o.log.error("negativeSpeechThreshold should be between 0 and positiveSpeechThreshold"),e.preSpeechPadMs<0&&o.log.error("preSpeechPadMs should be positive"),e.redemptionMs<0&&o.log.error("redemptionMs should be positive"),e.minSpeechMs<0&&o.log.error("minSpeechMs should be positive")};const s=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),r=new Float32Array(t.at(-1));return e.forEach(((e,o)=>{const n=t[o];r.set(e,n)})),r};function a(e,t){return{redemptionFrames:Math.floor(e.redemptionMs/t),preSpeechPadFrames:Math.floor(e.preSpeechPadMs/t),minSpeechFrames:Math.floor(e.minSpeechMs/t)}}t.FrameProcessor=class{constructor(e,t,r,o){this.modelProcessFunc=e,this.modelResetFunc=t,this.options=r,this.msPerFrame=o,this.speaking=!1,this.redemptionCounter=0,this.speechFrameCount=0,this.active=!1,this.speechRealStartFired=!1,this.setOptions=e=>{this.options={...this.options,...e};const{redemptionFrames:t,preSpeechPadFrames:r,minSpeechFrames:o}=a(this.options,this.msPerFrame);this.redemptionFrames=t,this.preSpeechPadFrames=r,this.minSpeechFrames=o},this.reset=()=>{this.speaking=!1,this.speechRealStartFired=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0,this.speechFrameCount=0},this.pause=e=>{this.active=!1,this.options.submitUserSpeechOnPause?this.endSegment(e):this.reset()},this.resume=()=>{this.active=!0},this.endSegment=e=>{const t=this.audioBuffer;this.audioBuffer=[];const r=this.speaking;if(this.reset(),r)if(t.reduce(((e,t)=>t.isSpeech?e+1:e),0)>=this.minSpeechFrames){const r=s(t.map((e=>e.frame)));e({msg:n.Message.SpeechEnd,audio:r})}else e({msg:n.Message.VADMisfire});return{}},this.process=async(e,t)=>{if(!this.active)return;const r=await this.modelProcessFunc(e),o=r.isSpeech>=this.options.positiveSpeechThreshold;if(t({probs:r,msg:n.Message.FrameProcessed,frame:e}),this.audioBuffer.push({frame:e,isSpeech:o}),o&&(this.speechFrameCount++,this.redemptionCounter=0),o&&!this.speaking&&(this.speaking=!0,t({msg:n.Message.SpeechStart})),this.speaking&&this.speechFrameCount===this.minSpeechFrames&&!this.speechRealStartFired&&(this.speechRealStartFired=!0,t({msg:n.Message.SpeechRealStart})),r.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.redemptionFrames){this.redemptionCounter=0,this.speechFrameCount=0,this.speaking=!1,this.speechRealStartFired=!1;const e=this.audioBuffer;if(this.audioBuffer=[],e.reduce(((e,t)=>t.isSpeech?e+1:e),0)>=this.minSpeechFrames){const r=s(e.map((e=>e.frame)));t({msg:n.Message.SpeechEnd,audio:r})}else t({msg:n.Message.VADMisfire})}if(!this.speaking){for(;this.audioBuffer.length>this.preSpeechPadFrames;)this.audioBuffer.shift();this.speechFrameCount=0}},this.audioBuffer=[];const{redemptionFrames:i,preSpeechPadFrames:l,minSpeechFrames:u}=a(this.options,this.msPerFrame);this.redemptionFrames=i,this.preSpeechPadFrames=l,this.minSpeechFrames=u,this.reset()}}},710:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.log=void 0;const r=e=>t=>{console.log(`VAD | ${e} >`,t)};t.log={error:r("error"),debug:r("debug"),warn:r("warn")}},954:(e,t)=>{"use strict";var r;Object.defineProperty(t,"__esModule",{value:!0}),t.Message=void 0,function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END",e.SpeechStop="SPEECH_STOP",e.SpeechRealStart="SPEECH_REAL_START",e.FrameProcessed="FRAME_PROCESSED"}(r||(t.Message=r={}))},650:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0})},559:function(e,t,r){"use strict";var o=this&&this.__createBinding||(Object.create?function(e,t,r,o){void 0===o&&(o=r);var n=Object.getOwnPropertyDescriptor(t,r);n&&!("get"in n?!t.__esModule:n.writable||n.configurable)||(n={enumerable:!0,get:function(){return t[r]}}),Object.defineProperty(e,o,n)}:function(e,t,r,o){void 0===o&&(o=r),e[o]=t[r]}),n=this&&this.__exportStar||function(e,t){for(var r in e)"default"===r||Object.prototype.hasOwnProperty.call(t,r)||o(t,e,r)};Object.defineProperty(t,"__esModule",{value:!0}),t.SileroV5=t.SileroLegacy=void 0,n(r(650),t);var s=r(143);Object.defineProperty(t,"SileroLegacy",{enumerable:!0,get:function(){return s.SileroLegacy}});var a=r(508);Object.defineProperty(t,"SileroV5",{enumerable:!0,get:function(){return a.SileroV5}})},143:(e,t,r)=>{"use strict";var o;Object.defineProperty(t,"__esModule",{value:!0}),t.SileroLegacy=void 0;const n=r(710);class s{constructor(e,t,r,o,n){this.ortInstance=e,this._session=t,this._h=r,this._c=o,this._sr=n,this.reset_state=()=>{const e=Array(128).fill(0);this._h=new this.ortInstance.Tensor("float32",e,[2,1,64]),this._c=new this.ortInstance.Tensor("float32",e,[2,1,64])},this.process=async e=>{const t={input:new this.ortInstance.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},r=await this._session.run(t);this._h=r.hn,this._c=r.cn;const[o]=r.output?.data;return{notSpeech:1-o,isSpeech:o}},this.release=async()=>{await this._session.release(),this._h.dispose(),this._c.dispose(),this._sr.dispose()}}}t.SileroLegacy=s,o=s,s.new=async(e,t)=>{n.log.debug("initializing vad");const r=await t(),s=await e.InferenceSession.create(r),a=new e.Tensor("int64",[16000n]),i=Array(128).fill(0),l=new e.Tensor("float32",i,[2,1,64]),u=new e.Tensor("float32",i,[2,1,64]);return n.log.debug("vad is initialized"),new o(e,s,l,u,a)}},508:(e,t,r)=>{"use strict";var o;Object.defineProperty(t,"__esModule",{value:!0}),t.SileroV5=void 0;const n=r(710);function s(e){const t=Array(256).fill(0);return new e.Tensor("float32",t,[2,1,128])}class a{constructor(e,t,r,o){this._session=e,this._state=t,this._sr=r,this.ortInstance=o,this.reset_state=()=>{this._state=s(this.ortInstance)},this.process=async e=>{const t={input:new this.ortInstance.Tensor("float32",e,[1,e.length]),state:this._state,sr:this._sr},r=await this._session.run(t);if(!r.stateN)throw new Error("No state from model");if(this._state=r.stateN,!r.output?.data)throw new Error("No output from model");const o=r.output.data[0];if("number"!=typeof o)throw new Error("Weird output data");return{notSpeech:1-o,isSpeech:o}},this.release=async()=>{await this._session.release(),this._state.dispose(),this._sr.dispose()}}}t.SileroV5=a,o=a,a.new=async(e,t)=>{n.log.debug("Loading VAD...");const r=await t(),a=await e.InferenceSession.create(r),i=new e.Tensor("int64",[16000n]),l=s(e);return n.log.debug("...finished loading VAD"),new o(a,l,i,e)}},202:function(e,t,r){"use strict";var o=this&&this.__createBinding||(Object.create?function(e,t,r,o){void 0===o&&(o=r);var n=Object.getOwnPropertyDescriptor(t,r);n&&!("get"in n?!t.__esModule:n.writable||n.configurable)||(n={enumerable:!0,get:function(){return t[r]}}),Object.defineProperty(e,o,n)}:function(e,t,r,o){void 0===o&&(o=r),e[o]=t[r]}),n=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),s=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var r in e)"default"!==r&&Object.prototype.hasOwnProperty.call(e,r)&&o(t,e,r);return n(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.NonRealTimeVAD=t.defaultNonRealTimeVADOptions=void 0;const a=s(r(656)),i=r(485),l=r(973),u=r(362),d=r(954),c=r(559),p=r(825);t.defaultNonRealTimeVADOptions={...u.defaultFrameProcessorOptions,modelURL:i.baseAssetPath+"silero_vad_legacy.onnx",modelFetcher:l.defaultModelFetcher},t.NonRealTimeVAD=class{static async new(e={}){const r={...t.defaultNonRealTimeVADOptions,...e};(0,u.validateOptions)(r),void 0!==r.ortConfig&&r.ortConfig(a);const o=()=>r.modelFetcher(r.modelURL),n=await c.SileroLegacy.new(a,o),s=new u.FrameProcessor(n.process,n.reset_state,{positiveSpeechThreshold:r.positiveSpeechThreshold,negativeSpeechThreshold:r.negativeSpeechThreshold,redemptionMs:r.redemptionMs,preSpeechPadMs:r.preSpeechPadMs,minSpeechMs:r.minSpeechMs,submitUserSpeechOnPause:r.submitUserSpeechOnPause},96);return s.resume(),new this(o,a,r,s)}constructor(e,t,r,o){this.modelFetcher=e,this.ort=t,this.options=r,this.frameProcessor=o,this.frameSamples=1536}async*run(e,t){const r={nativeSampleRate:t,targetSampleRate:16e3,targetFrameSize:this.frameSamples},o=new p.Resampler(r);let n=0,s=0,a=0;for await(const t of o.stream(e)){const e=[];await this.frameProcessor.process(t,(t=>{e.push(t)}));for(const t of e)switch(t.msg){case d.Message.SpeechStart:n=a*this.frameSamples/16;break;case d.Message.SpeechEnd:s=(a+1)*this.frameSamples/16,yield{audio:t.audio,start:n,end:s}}a++}const i=[];this.frameProcessor.endSegment((e=>{i.push(e)}));for(const e of i)e.msg===d.Message.SpeechEnd&&(yield{audio:e.audio,start:n,end:a*this.frameSamples/16})}}},746:function(e,t,r){"use strict";var o=this&&this.__createBinding||(Object.create?function(e,t,r,o){void 0===o&&(o=r);var n=Object.getOwnPropertyDescriptor(t,r);n&&!("get"in n?!t.__esModule:n.writable||n.configurable)||(n={enumerable:!0,get:function(){return t[r]}}),Object.defineProperty(e,o,n)}:function(e,t,r,o){void 0===o&&(o=r),e[o]=t[r]}),n=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),s=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var r in e)"default"!==r&&Object.prototype.hasOwnProperty.call(e,r)&&o(t,e,r);return n(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.MicVAD=t.getDefaultRealTimeVADOptions=t.ort=t.DEFAULT_MODEL=void 0;const a=s(r(647)),i=r(973),l=r(362),u=r(710),d=r(954),c=r(559),p=r(825);t.DEFAULT_MODEL="legacy",t.ort=a,t.getDefaultRealTimeVADOptions=e=>({...l.defaultFrameProcessorOptions,onFrameProcessed:()=>{},onVADMisfire:()=>{u.log.debug("VAD misfire")},onSpeechStart:()=>{u.log.debug("Detected speech start")},onSpeechEnd:()=>{u.log.debug("Detected speech end")},onSpeechRealStart:()=>{u.log.debug("Detected real speech start")},baseAssetPath:"./",onnxWASMBasePath:"./",model:e,workletOptions:{},getStream:async()=>await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,echoCancellation:!0,autoGainControl:!0,noiseSuppression:!0}}),pauseStream:async e=>{e.getTracks().forEach((e=>{e.stop()}))},resumeStream:async()=>await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,echoCancellation:!0,autoGainControl:!0,noiseSuppression:!0}}),ortConfig:e=>{e.env.logLevel="error"},startOnLoad:!0,processorType:"auto"});class f{constructor(e,t,r,o,n=!1,s=null,a=null,i=null,l=null,c=null,f=null,h="uninitialized",m=!1){this.options=e,this.frameProcessor=t,this.model=r,this.frameSamples=o,this.listening=n,this.errored=s,this._stream=a,this._audioContext=i,this._vadNode=l,this._mediaStreamAudioSourceNode=c,this._audioProcessorAdapterType=f,this.initializationState=h,this.ownsAudioContext=m,this.getAudioInstances=()=>{if(null===this._stream||null===this._audioContext||null==this._vadNode||null==this._mediaStreamAudioSourceNode)throw new Error("MicVAD has null stream, audio context, or processor adapter");return{stream:this._stream,audioContext:this._audioContext,vadNode:this._vadNode,mediaStreamAudioSourceNode:this._mediaStreamAudioSourceNode}},this.setErrored=e=>{this.initializationState="errored",this.errored=e},this.start=async()=>{switch(this.initializationState){case"uninitialized":u.log.debug("initializing micVAD"),this.initializationState="initializing",this.frameProcessor.resume();try{this._stream=await this.options.getStream()}catch(e){throw e instanceof Error?this.setErrored(e.message):this.setErrored(String(e)),e}if(this.options.audioContext?(console.log("using custom audio context"),this._audioContext=this.options.audioContext):(console.log("using default audio context"),this._audioContext=new AudioContext,this.ownsAudioContext=!0),!this._audioContext)throw this.setErrored("Audio context is null"),Error("Audio context is null");switch(this._audioProcessorAdapterType="auto"==this.options.processorType?"audioWorklet"in this._audioContext&&"function"==typeof AudioWorkletNode?"AudioWorklet":"ScriptProcessor":this.options.processorType,this._audioProcessorAdapterType){case"AudioWorklet":this._vadNode=await async function(e,t,r,o,n){await r.audioWorklet.addModule(e),t.processorOptions={...t.processorOptions??{},frameSamples:o};const s=new AudioWorkletNode(r,"vad-helper-worklet",t);return s.port.onmessage=async e=>{const t=e.data;if("object"==typeof t&&t&&"message"in t)switch(t.message){case d.Message.AudioFrame:{if(!("data"in t&&t.data instanceof ArrayBuffer))return void console.log("Audio frame message has no data");const e=new Float32Array(t.data);await n(e);break}}else console.error("Invalid message event",t)},s}(this.options.baseAssetPath+"vad.worklet.bundle.min.js",this.options.workletOptions,this._audioContext,this.frameSamples,this.processFrame);break;case"ScriptProcessor":this._vadNode=await async function(e,t,r){const o=new p.Resampler({nativeSampleRate:e.sampleRate,targetSampleRate:16e3,targetFrameSize:t});u.log.debug("using script processor");const n=e.createScriptProcessor(4096,1,1);let s=!1;return n.onaudioprocess=async e=>{if(!s){s=!0;try{const t=e.inputBuffer.getChannelData(0);e.outputBuffer.getChannelData(0).fill(0);const n=o.process(t);for(const e of n)await r(e)}catch(e){console.error("Error processing audio:",e)}finally{s=!1}}},n.connect(e.destination),n}(this._audioContext,this.frameSamples,this.processFrame);break;default:throw new Error(`Unsupported audio processor adapter type: ${this._audioProcessorAdapterType}`)}this._mediaStreamAudioSourceNode=new MediaStreamAudioSourceNode(this._audioContext,{mediaStream:this._stream}),this._mediaStreamAudioSourceNode.connect(this._vadNode),u.log.debug("started micVAD"),this.listening=!0,this.initializationState="initialized";break;case"initializing":u.log.warn("start called while initializing");break;case"initialized":{if(this.listening)return;this.listening=!0,this.frameProcessor.resume();const{stream:e,audioContext:t,vadNode:r}=this.getAudioInstances();this._stream=await this.options.resumeStream(e);const o=new MediaStreamAudioSourceNode(t,{mediaStream:this._stream});this._mediaStreamAudioSourceNode=o,o.connect(r);break}case"destroyed":u.log.warn("start called after destroyed");break;case"errored":u.log.error("start called after errored");break;default:u.log.warn("weird initialization state")}},this.pause=async()=>{if(!this.listening)return;this.listening=!1;const{stream:e,mediaStreamAudioSourceNode:t}=this.getAudioInstances();await this.options.pauseStream(e),t.disconnect(),this.frameProcessor.pause(this.handleFrameProcessorEvent)},this.destroy=async()=>{u.log.debug("destroy called"),this.initializationState="destroyed";const{vadNode:e}=this.getAudioInstances();e instanceof AudioWorkletNode&&e.port.postMessage(d.Message.SpeechStop),this.listening&&await this.pause(),await this.model.release(),this.ownsAudioContext&&await(this._audioContext?.close())},this.setOptions=e=>{this.frameProcessor.setOptions(e)},this.processFrame=async e=>{await this.frameProcessor.process(e,this.handleFrameProcessorEvent)},this.handleFrameProcessorEvent=e=>{switch(e.msg){case d.Message.FrameProcessed:this.options.onFrameProcessed(e.probs,e.frame);break;case d.Message.SpeechStart:this.options.onSpeechStart();break;case d.Message.SpeechRealStart:this.options.onSpeechRealStart();break;case d.Message.VADMisfire:this.options.onVADMisfire();break;case d.Message.SpeechEnd:this.options.onSpeechEnd(e.audio)}}}static async new(e={}){const r={...(0,t.getDefaultRealTimeVADOptions)(e.model??t.DEFAULT_MODEL),...e};(0,l.validateOptions)(r),t.ort.env.wasm.wasmPaths=r.onnxWASMBasePath,void 0!==r.ortConfig&&r.ortConfig(t.ort);const o="v5"===r.model?"silero_vad_v5.onnx":"silero_vad_legacy.onnx",n=r.baseAssetPath+o,s="v5"===r.model?c.SileroV5.new:c.SileroLegacy.new;let a;try{a=await s(t.ort,(()=>(0,i.defaultModelFetcher)(n)))}catch(e){throw console.error(`Encountered an error while loading model file ${n}`),e}const u="v5"===r.model?512:1536,d=u/16,p=new l.FrameProcessor(a.process,a.reset_state,{positiveSpeechThreshold:r.positiveSpeechThreshold,negativeSpeechThreshold:r.negativeSpeechThreshold,redemptionMs:r.redemptionMs,preSpeechPadMs:r.preSpeechPadMs,minSpeechMs:r.minSpeechMs,submitUserSpeechOnPause:r.submitUserSpeechOnPause},d),h=new f(r,p,a,u);if(r.startOnLoad)try{await h.start()}catch(e){throw console.error("Error starting micVad",e),e}return h}}t.MicVAD=f},825:(e,t,r)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.Resampler=void 0;const o=r(710);t.Resampler=class{constructor(e){this.options=e,this.process=e=>{const t=[];for(const r of e)for(this.inputBuffer.push(r);this.hasEnoughDataForFrame();){const e=this.generateOutputFrame();t.push(e)}return t},e.nativeSampleRate<16e3&&o.log.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}async*stream(e){for(const t of e)for(this.inputBuffer.push(t);this.hasEnoughDataForFrame();){const e=this.generateOutputFrame();yield e}}hasEnoughDataForFrame(){return this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>=this.options.targetFrameSize}generateOutputFrame(){const e=new Float32Array(this.options.targetFrameSize);let t=0,r=0;for(;t<this.options.targetFrameSize;){let o=0,n=0;for(;r<Math.min(this.inputBuffer.length,(t+1)*this.options.nativeSampleRate/this.options.targetSampleRate);){const e=this.inputBuffer[r];void 0!==e&&(o+=e,n++),r++}e[t]=o/n,t++}return this.inputBuffer=this.inputBuffer.slice(r),e}}},787:(e,t)=>{"use strict";function r(e,t,r){for(let o=0;o<r.length;o++)e.setUint8(t+o,r.charCodeAt(o))}Object.defineProperty(t,"__esModule",{value:!0}),t.audioFileToArray=t.encodeWAV=t.arrayBufferToBase64=t.minFramesForTargetMS=void 0,t.minFramesForTargetMS=function(e,t,r=16e3){return Math.ceil(e*r/1e3/t)},t.arrayBufferToBase64=function(e){const t=new Uint8Array(e),r=t.byteLength,o=new Array(r);for(let e=0;e<r;e++){const r=t[e];if(void 0===r)break;o[e]=String.fromCharCode(r)}return btoa(o.join(""))},t.encodeWAV=function(e,t=3,o=16e3,n=1,s=32){const a=s/8,i=n*a,l=new ArrayBuffer(44+e.length*a),u=new DataView(l);return r(u,0,"RIFF"),u.setUint32(4,36+e.length*a,!0),r(u,8,"WAVE"),r(u,12,"fmt "),u.setUint32(16,16,!0),u.setUint16(20,t,!0),u.setUint16(22,n,!0),u.setUint32(24,o,!0),u.setUint32(28,o*i,!0),u.setUint16(32,i,!0),u.setUint16(34,s,!0),r(u,36,"data"),u.setUint32(40,e.length*a,!0),1===t?function(e,t,r){for(let o=0;o<r.length;o++,t+=2){const n=Math.max(-1,Math.min(1,r[o]));e.setInt16(t,n<0?32768*n:32767*n,!0)}}(u,44,e):function(e,t,r){for(let o=0;o<r.length;o++,t+=4)e.setFloat32(t,r[o],!0)}(u,44,e),l},t.audioFileToArray=async function(e){const t=new OfflineAudioContext(1,1,44100),r=new FileReader;let o=null;if(await new Promise((n=>{r.addEventListener("loadend",(()=>{const e=r.result;t.decodeAudioData(e,(e=>{o=e,t.startRendering().then((()=>{console.log("Rendering completed successfully"),n()})).catch((e=>{console.error("Rendering failed: ",e)}))}),(e=>{console.log("Error with decoding audio data: ",e)}))})),r.readAsArrayBuffer(e)})),null===o)throw Error("some shit");const n=o,s=new Float32Array(n.length);for(let e=0;e<n.length;e++)for(let t=0;t<n.numberOfChannels;t++){const r=n.getChannelData(t)[e],o=s[e];if(void 0===r||void 0===o)throw new Error("sample or out[i] is undefined");s[e]=o+r}return{audio:s,sampleRate:n.sampleRate}}},656:t=>{"use strict";t.exports=e}},r={};function o(e){var n=r[e];if(void 0!==n)return n.exports;var s=r[e]={exports:{}};return t[e].call(s.exports,s,s.exports,o),s.exports}o.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t);var n={};return(()=>{"use strict";var e=n;Object.defineProperty(e,"__esModule",{value:!0}),e.getDefaultRealTimeVADOptions=e.MicVAD=e.DEFAULT_MODEL=e.utils=e.NonRealTimeVAD=e.Message=e.FrameProcessor=e.defaultModelFetcher=e.baseAssetPath=void 0;var t=o(485);Object.defineProperty(e,"baseAssetPath",{enumerable:!0,get:function(){return t.baseAssetPath}});var r=o(973);Object.defineProperty(e,"defaultModelFetcher",{enumerable:!0,get:function(){return r.defaultModelFetcher}});var s=o(362);Object.defineProperty(e,"FrameProcessor",{enumerable:!0,get:function(){return s.FrameProcessor}});var a=o(954);Object.defineProperty(e,"Message",{enumerable:!0,get:function(){return a.Message}});var i=o(202);Object.defineProperty(e,"NonRealTimeVAD",{enumerable:!0,get:function(){return i.NonRealTimeVAD}});const l=o(787);e.utils={audioFileToArray:l.audioFileToArray,minFramesForTargetMS:l.minFramesForTargetMS,arrayBufferToBase64:l.arrayBufferToBase64,encodeWAV:l.encodeWAV};var u=o(746);Object.defineProperty(e,"DEFAULT_MODEL",{enumerable:!0,get:function(){return u.DEFAULT_MODEL}}),Object.defineProperty(e,"MicVAD",{enumerable:!0,get:function(){return u.MicVAD}}),Object.defineProperty(e,"getDefaultRealTimeVADOptions",{enumerable:!0,get:function(){return u.getDefaultRealTimeVADOptions}})})(),n})()));
\ No newline at end of file
diff --git a/dashboard/vad/silero_vad_legacy.onnx b/dashboard/vad/silero_vad_legacy.onnx
new file mode 100644
index 0000000..e6db48d
Binary files /dev/null and b/dashboard/vad/silero_vad_legacy.onnx differ
diff --git a/dashboard/vad/silero_vad_v5.onnx b/dashboard/vad/silero_vad_v5.onnx
new file mode 100644
index 0000000..b3e3a90
Binary files /dev/null and b/dashboard/vad/silero_vad_v5.onnx differ
diff --git a/dashboard/vad/vad.worklet.bundle.min.js b/dashboard/vad/vad.worklet.bundle.min.js
new file mode 100644
index 0000000..dfa2c2b
--- /dev/null
+++ b/dashboard/vad/vad.worklet.bundle.min.js
@@ -0,0 +1 @@
+(()=>{"use strict";var e={710:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.log=void 0;const s=e=>t=>{console.log(`VAD | ${e} >`,t)};t.log={error:s("error"),debug:s("debug"),warn:s("warn")}},954:(e,t)=>{var s;Object.defineProperty(t,"__esModule",{value:!0}),t.Message=void 0,function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END",e.SpeechStop="SPEECH_STOP",e.SpeechRealStart="SPEECH_REAL_START",e.FrameProcessed="FRAME_PROCESSED"}(s||(t.Message=s={}))},825:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.Resampler=void 0;const r=s(710);t.Resampler=class{constructor(e){this.options=e,this.process=e=>{const t=[];for(const s of e)for(this.inputBuffer.push(s);this.hasEnoughDataForFrame();){const e=this.generateOutputFrame();t.push(e)}return t},e.nativeSampleRate<16e3&&r.log.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}async*stream(e){for(const t of e)for(this.inputBuffer.push(t);this.hasEnoughDataForFrame();){const e=this.generateOutputFrame();yield e}}hasEnoughDataForFrame(){return this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>=this.options.targetFrameSize}generateOutputFrame(){const e=new Float32Array(this.options.targetFrameSize);let t=0,s=0;for(;t<this.options.targetFrameSize;){let r=0,o=0;for(;s<Math.min(this.inputBuffer.length,(t+1)*this.options.nativeSampleRate/this.options.targetSampleRate);){const e=this.inputBuffer[s];void 0!==e&&(r+=e,o++),s++}e[t]=r/o,t++}return this.inputBuffer=this.inputBuffer.slice(s),e}}}},t={};function s(r){var o=t[r];if(void 0!==o)return o.exports;var a=t[r]={exports:{}};return e[r](a,a.exports,s),a.exports}(()=>{const e=s(710),t=s(954),r=s(825);class o extends AudioWorkletProcessor{constructor(s){super(),this._stopProcessing=!1,this.options=s.processorOptions,this.port.onmessage=s=>{s.data===t.Message.SpeechStop&&(e.log.debug("Worklet received speech stop message"),this._stopProcessing=!0)},this.resampler=new r.Resampler({nativeSampleRate:sampleRate,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples})}process(e){if(this._stopProcessing)return!1;const s=e[0];if(void 0===s)return!0;const r=s[0];if(void 0===r)return!0;const o=this.resampler.process(r);for(const e of o)this.port.postMessage({message:t.Message.AudioFrame,data:e.buffer},[e.buffer]);return!0}}registerProcessor("vad-helper-worklet",o)})()})();
\ No newline at end of file
diff --git a/http_api.py b/http_api.py
index 420568a..45a7fb8 100644
--- a/http_api.py
+++ b/http_api.py
@@ -1,4 +1,4 @@
-"""Mod³ HTTP API — REST interface for TTS synthesis and VAD.
+"""Mod³ HTTP API — REST interface for TTS synthesis, VAD, and dashboard.
 
 Endpoints:
   POST /v1/synthesize  — text → audio bytes (WAV/PCM) + structured metrics
@@ -9,18 +9,29 @@
   GET  /v1/jobs        — list recent generation jobs with full metrics
   GET  /v1/jobs/{id}   — get a specific job's metrics
   GET  /health         — server health check
+  POST /shutdown       — graceful server shutdown (kernel lifecycle)
+  GET  /capabilities   — machine-readable capability manifest
+  WS   /ws/chat        — dashboard voice/text chat
+  GET  /dashboard      — dashboard UI
 """
 
+import asyncio
 import io
+import logging
+import os
+import signal
 import struct
 import time
 import uuid
 import wave
 from collections import OrderedDict
+from pathlib import Path
 from threading import Lock
+from typing import Optional
 
-from fastapi import FastAPI, Response, UploadFile
-from fastapi.responses import JSONResponse
+from fastapi import FastAPI, Request, Response, UploadFile, WebSocket
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel, Field
 
 from bus import ModalityBus
@@ -33,6 +44,29 @@
 
 app = FastAPI(title="Mod³", description="Local multi-model TTS on Apple Silicon")
 
+logger = logging.getLogger("mod3.http")
+
+_server_start_time = time.time()
+_shutting_down = False
+
+
+@app.on_event("startup")
+async def _warmup_kokoro():
+    """Pre-load Kokoro TTS engine in background to avoid ~60s cold start on first request."""
+    import threading
+
+    def _do_warmup():
+        try:
+            from engine import get_model
+
+            get_model("kokoro")
+            logger.info("Kokoro TTS engine pre-warmed successfully")
+        except Exception as e:
+            logger.warning("Kokoro pre-warm failed (will lazy-load on first request): %s", e)
+
+    threading.Thread(target=_do_warmup, daemon=True, name="kokoro-warmup").start()
+
+
 try:
     from server import _bus as _shared_bus
 except Exception:
@@ -170,6 +204,29 @@ class SpeechRequest(BaseModel):
     speed: float = Field(default=1.0)
 
 
+class ShutdownRequest(BaseModel):
+    """Graceful shutdown request from the kernel."""
+
+    timeout_sec: float = Field(default=5.0, ge=0, le=60)
+    reason: str = Field(default="shutdown-requested")
+
+
+# ---------------------------------------------------------------------------
+# Shutdown middleware — reject new requests once shutdown is initiated
+# ---------------------------------------------------------------------------
+
+
+@app.middleware("http")
+async def _reject_during_shutdown(request: Request, call_next):
+    """Return 503 for new requests once graceful shutdown has been initiated."""
+    if _shutting_down and request.url.path != "/health":
+        return JSONResponse(
+            status_code=503,
+            content={"error": "server is shutting down"},
+        )
+    return await call_next(request)
+
+
 # ---------------------------------------------------------------------------
 # Endpoints
 # ---------------------------------------------------------------------------
@@ -505,24 +562,190 @@ def voices():
     return {"engines": engines}
 
 
+@app.post("/v1/stop")
+def stop_speech(job_id: str = ""):
+    """Stop current speech and/or cancel queued items.
+
+    If job_id is provided, cancels that specific job.
+    If empty, interrupts current playback and clears the queue.
+    Returns interruption context for barge-in support.
+    """
+    try:
+        from server import _speech_queue, pipeline_state
+
+        if job_id:
+            cancelled = _speech_queue.cancel(job_id)
+            return {"status": "ok", "message": f"Cancelled {job_id}" if cancelled else f"Job {job_id} not found"}
+        else:
+            # Get interrupt info before stopping
+            interrupt_info = None
+            if pipeline_state.is_speaking:
+                info = pipeline_state.interrupt(reason="http_barge_in")
+                if info:
+                    interrupt_info = {
+                        "spoken_pct": info.spoken_pct,
+                        "delivered_text": info.delivered_text,
+                        "full_text": info.full_text,
+                        "reason": info.reason,
+                    }
+            cancelled_count = _speech_queue.cancel_all_queued()
+            return {
+                "status": "ok",
+                "message": f"Interrupted playback; cancelled {cancelled_count} queued items",
+                "interrupted": interrupt_info,
+            }
+    except ImportError:
+        return JSONResponse(status_code=503, content={"error": "Speech queue not available in HTTP-only mode"})
+
+
 @app.get("/health")
 def health():
-    """Health check with summary stats."""
-    with _jobs_lock:
-        total = len(_jobs)
-        active = sum(1 for j in _jobs.values() if j.get("status") in ("generating", "processing"))
-        by_type = {}
-        for j in _jobs.values():
-            t = j.get("type", "unknown")
-            by_type[t] = by_type.get(t, 0) + 1
+    """Health check — standardized CogOS service format."""
+    try:
+        loaded = get_loaded_engines()
+
+        # Engine status: loaded/unloaded for each registered engine
+        engines = {}
+        for engine_name in MODELS:
+            engines[engine_name] = "loaded" if engine_name in loaded else "unloaded"
+
+        # Modality availability
+        modalities = {
+            "tts": len(loaded) > 0,
+            "stt": False,  # STT not yet implemented as a server modality
+            "vad": vad_loaded(),
+        }
+
+        # Queue state from job ledger
+        with _jobs_lock:
+            total = len(_jobs)
+            active = sum(1 for j in _jobs.values() if j.get("status") in ("generating", "processing"))
+
+        # Overall status: ok if at least one TTS engine loaded, degraded if none
+        status = "ok" if loaded else "degraded"
+
+        return {
+            "status": status,
+            "service": "mod3",
+            "version": "0.3.0",
+            "uptime_sec": round(time.time() - _server_start_time, 1),
+            "engines": engines,
+            "modalities": modalities,
+            "queue": {
+                "depth": total,
+                "active_jobs": active,
+            },
+        }
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={
+                "status": "error",
+                "service": "mod3",
+                "version": "0.3.0",
+                "error": str(e),
+            },
+        )
+
+
+@app.post("/shutdown")
+async def shutdown(req: Optional[ShutdownRequest] = None):
+    """Initiate graceful server shutdown.
+
+    Called by the CogOS kernel for lifecycle management. Returns immediately
+    with confirmation, then drains active jobs and exits.
+
+    Body (optional): {"timeout_sec": 5, "reason": "kernel-restart"}
+    """
+    global _shutting_down
+
+    if _shutting_down:
+        return JSONResponse(
+            status_code=409,
+            content={"status": "already_shutting_down"},
+        )
+
+    if req is None:
+        req = ShutdownRequest()
+
+    timeout_sec = req.timeout_sec
+    reason = req.reason
+
+    _shutting_down = True
+    logger.info("Shutdown requested: reason=%s timeout=%.1fs", reason, timeout_sec)
+
+    async def _graceful_exit():
+        """Wait for active jobs to drain, then signal the process to stop."""
+        deadline = time.time() + timeout_sec
+        while time.time() < deadline:
+            with _jobs_lock:
+                active = sum(1 for j in _jobs.values() if j.get("status") in ("generating", "processing"))
+            if active == 0:
+                break
+            await asyncio.sleep(0.25)
+
+        with _jobs_lock:
+            remaining = sum(1 for j in _jobs.values() if j.get("status") in ("generating", "processing"))
+
+        if remaining:
+            logger.warning("Shutdown timeout reached with %d active jobs — forcing exit", remaining)
+        else:
+            logger.info("All jobs drained — exiting cleanly")
+
+        # Send SIGINT to our own process, which uvicorn handles gracefully
+        os.kill(os.getpid(), signal.SIGINT)
+
+    # Fire-and-forget: schedule the shutdown coroutine on the running loop
+    asyncio.ensure_future(_graceful_exit())
+
     return {
-        "status": "ok",
-        "engines_loaded": get_loaded_engines(),
-        "vad_loaded": vad_loaded(),
-        "jobs": {
-            "total": total,
-            "active": active,
-            "by_type": by_type,
+        "status": "shutting_down",
+        "reason": reason,
+        "timeout_sec": timeout_sec,
+    }
+
+
+@app.get("/capabilities")
+def capabilities():
+    """Machine-readable capability manifest for service discovery."""
+    voices = {name: cfg["voices"] for name, cfg in MODELS.items()}
+    return {
+        "service": "mod3",
+        "version": "0.3.0",
+        "description": "Model Modality Modulator — local TTS, STT, and VAD on Apple Silicon",
+        "modalities": ["voice"],
+        "capabilities": {
+            "tts": {
+                "engines": list(MODELS.keys()),
+                "default_voice": "bm_lewis",
+                "default_speed": 1.25,
+                "endpoint": "/v1/synthesize",
+            },
+            "stt": {
+                "engine": "mlx_whisper",
+                "model": "mlx-community/whisper-large-v3-turbo",
+                "languages": ["en"],
+                "endpoint": None,
+            },
+            "vad": {
+                "engine": "silero_v5",
+                "endpoint": "/v1/vad",
+            },
+        },
+        "voices": voices,
+        "endpoints": {
+            "synthesize": "POST /v1/synthesize",
+            "speech": "POST /v1/audio/speech",
+            "vad": "POST /v1/vad",
+            "voices": "GET /v1/voices",
+            "health": "GET /health",
+            "shutdown": "POST /shutdown",
+            "capabilities": "GET /capabilities",
+        },
+        "protocols": {
+            "mcp": True,
+            "http": True,
+            "websocket": True,
         },
     }
 
@@ -626,6 +849,91 @@ def get_bus() -> ModalityBus:
     return _bus
 
 
+# ---------------------------------------------------------------------------
+# Dashboard — voice/text chat via WebSocket
+# ---------------------------------------------------------------------------
+
+_logger = logging.getLogger("mod3.dashboard")
+
+_dashboard_dir = Path(__file__).parent / "dashboard"
+
+
+@app.get("/dashboard")
+async def dashboard_page():
+    """Serve the dashboard UI."""
+    index = _dashboard_dir / "index.html"
+    if index.exists():
+        return FileResponse(str(index))
+    return JSONResponse({"error": "dashboard not found"}, status_code=404)
+
+
+@app.websocket("/ws/chat")
+async def ws_chat(websocket: WebSocket):
+    """Dashboard voice/text chat — one session per connection."""
+    await websocket.accept()
+
+    loop = asyncio.get_running_loop()
+
+    from agent_loop import AgentLoop
+    from channels import BrowserChannel
+    from pipeline_state import PipelineState
+    from providers import auto_detect_provider
+
+    provider = auto_detect_provider()
+    ps = PipelineState()
+
+    agent = AgentLoop(
+        bus=_bus,
+        provider=provider,
+        pipeline_state=ps,
+    )
+
+    channel = BrowserChannel(
+        ws=websocket,
+        bus=_bus,
+        pipeline_state=ps,
+        loop=loop,
+        on_event=agent.handle_event,
+    )
+
+    agent.channel_id = channel.channel_id
+    agent._channel_ref = channel
+
+    _logger.info("Dashboard session started: %s (provider: %s)", channel.channel_id, provider.name)
+
+    try:
+        await channel.run()
+    finally:
+        _logger.info("Dashboard session ended: %s", channel.channel_id)
+
+
+# Mount dashboard static files (after explicit routes so they don't shadow /v1/*)
+if _dashboard_dir.exists():
+    # VAD assets need their own mount (ONNX workers request from this path)
+    _vad_dir = _dashboard_dir / "vad"
+    if _vad_dir.exists():
+        app.mount("/dashboard/vad", StaticFiles(directory=str(_vad_dir)), name="dashboard_vad")
+    app.mount("/dashboard", StaticFiles(directory=str(_dashboard_dir)), name="dashboard_static")
+
+
+# ONNX Runtime WASM workers request .wasm and .onnx files at the root path.
+# These catch-all routes serve them from dashboard/vad/.
+@app.get("/{filename:path}.wasm")
+async def serve_wasm(filename: str):
+    wasm_path = _dashboard_dir / "vad" / f"{filename}.wasm"
+    if wasm_path.exists():
+        return FileResponse(str(wasm_path), media_type="application/wasm")
+    return JSONResponse({"detail": "Not Found"}, status_code=404)
+
+
+@app.get("/{filename:path}.onnx")
+async def serve_onnx(filename: str):
+    onnx_path = _dashboard_dir / "vad" / f"{filename}.onnx"
+    if onnx_path.exists():
+        return FileResponse(str(onnx_path), media_type="application/octet-stream")
+    return JSONResponse({"detail": "Not Found"}, status_code=404)
+
+
 # ---------------------------------------------------------------------------
 # Internal helpers
 # ---------------------------------------------------------------------------
diff --git a/integrations/bargein-producer.py b/integrations/bargein-producer.py
new file mode 100755
index 0000000..2b9fa51
--- /dev/null
+++ b/integrations/bargein-producer.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+"""
+Barge-in signal producer — detects SuperWhisper recording and writes
+the signal file that Mod3's barge-in consumer watches.
+
+Detection method:
+  SuperWhisper creates a timestamped directory in its recordings folder
+  the instant recording begins (the dir is empty). When recording finishes,
+  it writes output.wav and meta.json into that directory. We poll for new
+  empty directories to detect start, and for the appearance of output.wav
+  to detect end.
+
+Signal file: /tmp/mod3-barge-in.json
+  Start:  {"event": "user_speaking_start", "timestamp": "...", "source": "superwhisper"}
+  End:    {"event": "user_speaking_end",   "timestamp": "...", "source": "superwhisper"}
+
+Usage:
+  python3 bargein-producer.py          # foreground (logs to stderr)
+  python3 bargein-producer.py &        # background
+  launchctl load com.cogos.bargein-producer.plist  # launchd
+
+Environment variables:
+  BARGEIN_SIGNAL      — override signal file path (default: /tmp/mod3-barge-in.json)
+  BARGEIN_POLL_MS     — poll interval in ms (default: 150)
+  SW_RECORDINGS_DIR   — override recordings path
+"""
+
+import json
+import logging
+import os
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+
+SIGNAL_FILE = os.environ.get("BARGEIN_SIGNAL", "/tmp/mod3-barge-in.json")
+POLL_INTERVAL = int(os.environ.get("BARGEIN_POLL_MS", "150")) / 1000.0
+
+# SuperWhisper recordings directory
+_default_rec_dir = os.path.expanduser("~/Documents/superwhisper/recordings")
+RECORDINGS_DIR = os.environ.get("SW_RECORDINGS_DIR", _default_rec_dir)
+
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+
+logging.basicConfig(
+    stream=sys.stderr,
+    level=logging.INFO,
+    format="%(asctime)s [bargein] %(levelname)s %(message)s",
+    datefmt="%H:%M:%S",
+)
+log = logging.getLogger("bargein-producer")
+
+# ---------------------------------------------------------------------------
+# State
+# ---------------------------------------------------------------------------
+
+
+class State:
+    """Tracks whether a recording is currently active."""
+
+    def __init__(self):
+        self.recording = False
+        # The folder name (timestamp) of the active recording
+        self.active_folder: str | None = None
+        # Set of known-completed folder names (avoid re-triggering)
+        self.known_folders: set[str] = set()
+
+    def start(self, folder: str):
+        if self.recording and self.active_folder == folder:
+            return  # already tracking
+        self.recording = True
+        self.active_folder = folder
+        _write_signal("user_speaking_start")
+        log.info("Recording started  (folder=%s)", folder)
+
+    def end(self):
+        if not self.recording:
+            return
+        folder = self.active_folder
+        self.recording = False
+        if self.active_folder:
+            self.known_folders.add(self.active_folder)
+        self.active_folder = None
+        _write_signal("user_speaking_end")
+        log.info("Recording finished (folder=%s)", folder)
+
+
+# ---------------------------------------------------------------------------
+# Signal file
+# ---------------------------------------------------------------------------
+
+
+def _write_signal(event: str):
+    """Atomically write the barge-in signal file."""
+    payload = {
+        "event": event,
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "source": "superwhisper",
+    }
+    tmp = SIGNAL_FILE + ".tmp"
+    try:
+        with open(tmp, "w") as f:
+            json.dump(payload, f)
+        os.replace(tmp, SIGNAL_FILE)
+    except OSError as e:
+        log.error("Failed to write signal file: %s", e)
+
+
+# ---------------------------------------------------------------------------
+# Detection logic
+# ---------------------------------------------------------------------------
+
+
+def _is_empty_dir(path: Path) -> bool:
+    """True if path is a directory with no children (except . and ..)."""
+    try:
+        return path.is_dir() and not any(path.iterdir())
+    except OSError:
+        return False
+
+
+def _has_output(path: Path) -> bool:
+    """True if the recording directory has output.wav (recording complete)."""
+    return (path / "output.wav").exists() or (path / "meta.json").exists()
+
+
+_last_dir_mtime: float = 0.0
+
+
+def _scan(state: State, rec_dir: Path):
+    """One poll cycle: scan the recordings directory for state changes.
+
+    Optimisation: stat() the recordings dir first (~1us). Only do a full
+    iterdir() when the directory mtime has changed (new folder created)
+    OR when we're actively tracking a recording (need to check for output.wav).
+    """
+    global _last_dir_mtime
+
+    # Fast path: if we're tracking an active recording, just check that folder
+    if state.recording and state.active_folder:
+        active_path = rec_dir / state.active_folder
+        if _has_output(active_path):
+            state.end()
+        return
+
+    # Check if directory changed since last scan
+    try:
+        dir_mtime = os.stat(rec_dir).st_mtime
+    except OSError:
+        return
+    if dir_mtime == _last_dir_mtime:
+        return  # nothing changed — skip expensive iterdir()
+    _last_dir_mtime = dir_mtime
+
+    # Directory changed — scan for new entries
+    try:
+        candidates: list[Path] = []
+        for entry in rec_dir.iterdir():
+            if entry.is_dir() and entry.name.isdigit() and entry.name not in state.known_folders:
+                candidates.append(entry)
+    except OSError:
+        return
+
+    candidates.sort(key=lambda p: p.name, reverse=True)
+    for entry in candidates[:5]:
+        name = entry.name
+
+        if _is_empty_dir(entry):
+            # New empty directory = recording just started
+            state.start(name)
+            return
+
+        # Non-empty, not tracked — it's a completed recording we haven't seen
+        state.known_folders.add(name)
+
+
+# ---------------------------------------------------------------------------
+# Staleness guard
+# ---------------------------------------------------------------------------
+
+_STALE_TIMEOUT = 120  # 2 minutes — if a recording folder stays empty this long,
+#                       assume SuperWhisper cancelled/crashed and clear state
+
+
+def _check_stale(state: State, rec_dir: Path):
+    """Clear recording state if the active folder has been empty too long."""
+    if not state.recording or not state.active_folder:
+        return
+    folder = rec_dir / state.active_folder
+    try:
+        # Use folder creation time as the start timestamp
+        ctime = folder.stat().st_birthtime
+    except (OSError, AttributeError):
+        return
+    if time.time() - ctime > _STALE_TIMEOUT:
+        log.warning("Stale recording detected (>%ds), clearing state", _STALE_TIMEOUT)
+        state.end()
+
+
+# ---------------------------------------------------------------------------
+# Main loop
+# ---------------------------------------------------------------------------
+
+
+def main():
+    rec_dir = Path(RECORDINGS_DIR)
+    if not rec_dir.is_dir():
+        log.error("Recordings directory not found: %s", rec_dir)
+        log.error("Is SuperWhisper installed and configured?")
+        sys.exit(1)
+
+    state = State()
+
+    # Pre-populate known folders so we don't false-trigger on startup.
+    # Only treat an empty folder as "currently recording" if it was created
+    # within the last 30 seconds — older empties are stale/cancelled.
+    _STARTUP_FRESH_SECS = 30
+    now = time.time()
+    newest_empty: tuple[str, float] | None = None
+    try:
+        for entry in rec_dir.iterdir():
+            if entry.is_dir() and entry.name.isdigit():
+                if _has_output(entry):
+                    state.known_folders.add(entry.name)
+                elif _is_empty_dir(entry):
+                    try:
+                        age = now - entry.stat().st_birthtime
+                    except (OSError, AttributeError):
+                        age = float("inf")
+                    if age < _STARTUP_FRESH_SECS:
+                        # Possibly active — track the newest one
+                        if newest_empty is None or entry.name > newest_empty[0]:
+                            newest_empty = (entry.name, age)
+                    else:
+                        # Stale empty folder — ignore it
+                        state.known_folders.add(entry.name)
+                        log.debug("Ignoring stale empty folder: %s (age=%.0fs)", entry.name, age)
+    except OSError as e:
+        log.warning("Error during startup scan: %s", e)
+
+    if newest_empty:
+        state.start(newest_empty[0])
+        log.info("Detected in-progress recording on startup (age=%.1fs)", newest_empty[1])
+
+    log.info(
+        "Barge-in producer started (poll=%dms, signal=%s, recordings=%s, known=%d)",
+        POLL_INTERVAL * 1000,
+        SIGNAL_FILE,
+        RECORDINGS_DIR,
+        len(state.known_folders),
+    )
+
+    stale_counter = 0
+    try:
+        while True:
+            _scan(state, rec_dir)
+
+            # Check for stale recordings every ~5 seconds
+            stale_counter += 1
+            if stale_counter >= int(5.0 / POLL_INTERVAL):
+                _check_stale(state, rec_dir)
+                stale_counter = 0
+
+            time.sleep(POLL_INTERVAL)
+    except KeyboardInterrupt:
+        log.info("Shutting down")
+        # Clean up signal file on exit
+        if state.recording:
+            state.end()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/output_queue.py b/output_queue.py
index 02d84f9..31d733f 100644
--- a/output_queue.py
+++ b/output_queue.py
@@ -102,6 +102,16 @@ def submit(self, channel_id: str, fn: Callable, **metadata) -> QueuedJob:
         """Submit a job to a channel's queue. Non-blocking."""
         return self.get_queue(channel_id).submit(fn, **metadata)
 
+    def cancel_channel(self, channel_id: str) -> int:
+        """Cancel all pending jobs for a channel. Returns number of jobs cancelled."""
+        queue = self._queues.get(channel_id)
+        if not queue:
+            return 0
+        with queue._lock:
+            cancelled = len(queue._queue)
+            queue._queue.clear()
+        return cancelled
+
     def status(self) -> dict[str, Any]:
         """Snapshot of all channel queues."""
         return {
diff --git a/providers.py b/providers.py
new file mode 100644
index 0000000..cc551c4
--- /dev/null
+++ b/providers.py
@@ -0,0 +1,439 @@
+"""Inference providers for the Mod³ agent loop.
+
+Abstracts LLM backends behind an InferenceProvider protocol.
+Each provider returns structured ToolCall responses, not streaming text.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass, field
+from typing import Any, Protocol, runtime_checkable
+
+import httpx
+
+logger = logging.getLogger("mod3.providers")
+
+# ---------------------------------------------------------------------------
+# Types
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ToolCall:
+    """A single tool invocation from the LLM."""
+
+    name: str
+    arguments: dict[str, Any]
+
+
+@dataclass
+class ProviderResponse:
+    """Structured response from an inference provider."""
+
+    tool_calls: list[ToolCall] = field(default_factory=list)
+    text: str = ""  # fallback plain text (no tool call)
+    raw: dict | None = None
+
+
+# ---------------------------------------------------------------------------
+# Protocol
+# ---------------------------------------------------------------------------
+
+
+@runtime_checkable
+class InferenceProvider(Protocol):
+    @property
+    def name(self) -> str: ...
+
+    async def chat(
+        self,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        system: str = "",
+    ) -> ProviderResponse: ...
+
+
+# ---------------------------------------------------------------------------
+# Tool definitions
+# ---------------------------------------------------------------------------
+
+AGENT_TOOLS: list[dict] = [
+    {
+        "type": "function",
+        "function": {
+            "name": "speak",
+            "description": "Speak text aloud to the user via TTS. Use for conversational responses.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "text": {
+                        "type": "string",
+                        "description": "The text to speak aloud",
+                    }
+                },
+                "required": ["text"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "send_text",
+            "description": "Send text to the chat panel (no speech). Use for code, lists, links, or anything better read than heard.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "text": {
+                        "type": "string",
+                        "description": "The text to display in chat",
+                    }
+                },
+                "required": ["text"],
+            },
+        },
+    },
+]
+
+
+# ---------------------------------------------------------------------------
+# MLX provider (in-process, Apple Silicon)
+# ---------------------------------------------------------------------------
+
+
+def _format_tools_for_prompt(tools: list[dict]) -> str:
+    """Format tool definitions into a text block for Gemma's system prompt."""
+    lines = ["You have access to the following tools:\n"]
+    for tool in tools:
+        fn = tool.get("function", tool)
+        name = fn.get("name", "")
+        desc = fn.get("description", "")
+        params = fn.get("parameters", {})
+        lines.append(f"- **{name}**: {desc}")
+        props = params.get("properties", {})
+        required = set(params.get("required", []))
+        if props:
+            lines.append("  Parameters:")
+            for pname, pinfo in props.items():
+                req_marker = " (required)" if pname in required else ""
+                lines.append(
+                    f"    - {pname} ({pinfo.get('type', 'string')}): {pinfo.get('description', '')}{req_marker}"
+                )
+    lines.append(
+        "\nTo call a tool, output exactly:\n"
+        "<tool_call>\n"
+        '{"name": "<tool_name>", "arguments": {<args>}}\n'
+        "</tool_call>\n"
+        "\nYou may make multiple tool calls. Every tool call must be wrapped "
+        "in its own <tool_call> block."
+    )
+    return "\n".join(lines)
+
+
+_TOOL_CALL_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
+
+
+def _parse_tool_calls(text: str) -> list[ToolCall]:
+    """Extract <tool_call> JSON blocks from model output."""
+    calls: list[ToolCall] = []
+    for match in _TOOL_CALL_RE.finditer(text):
+        try:
+            obj = json.loads(match.group(1))
+            name = obj.get("name", "")
+            args = obj.get("arguments", {})
+            if isinstance(args, str):
+                try:
+                    args = json.loads(args)
+                except Exception:
+                    args = {"text": args}
+            if name:
+                calls.append(ToolCall(name=name, arguments=args))
+        except json.JSONDecodeError:
+            logger.warning("mlx: failed to parse tool_call JSON: %s", match.group(1))
+    return calls
+
+
+class MlxProvider:
+    """In-process Gemma inference on Apple Silicon via mlx-lm.
+
+    Lazy-loads the model on first call. The model stays resident in memory
+    for the lifetime of the process — no cold-start on subsequent calls.
+    """
+
+    def __init__(self, model_id: str | None = None):
+        self._model_id = model_id or os.environ.get("MLX_MODEL", "mlx-community/gemma-3-4b-it-4bit")
+        self._model = None
+        self._tokenizer = None
+
+    @property
+    def name(self) -> str:
+        return f"mlx/{self._model_id}"
+
+    def _ensure_loaded(self) -> None:
+        """Load model + tokenizer on first use (synchronous, called from thread)."""
+        if self._model is not None:
+            return
+        from mlx_lm import load
+
+        logger.info("mlx: loading model %s (first call, this may take a moment)", self._model_id)
+        self._model, self._tokenizer = load(self._model_id)
+        logger.info("mlx: model loaded successfully")
+
+    def _generate_sync(
+        self,
+        messages: list[dict],
+        tools: list[dict] | None,
+        system: str,
+    ) -> ProviderResponse:
+        """Run generation synchronously (meant to be called via asyncio.to_thread)."""
+        from mlx_lm import generate
+
+        self._ensure_loaded()
+
+        # Build messages list with system prompt
+        msgs = list(messages)
+        system_parts: list[str] = []
+        if system:
+            system_parts.append(system)
+        if tools:
+            system_parts.append(_format_tools_for_prompt(tools))
+        if system_parts:
+            msgs = [{"role": "system", "content": "\n\n".join(system_parts)}] + msgs
+
+        # Apply chat template
+        prompt = self._tokenizer.apply_chat_template(msgs, add_generation_prompt=True, tokenize=False)
+
+        max_tokens = int(os.environ.get("MLX_MAX_TOKENS", "512"))
+        raw_output = generate(
+            self._model,
+            self._tokenizer,
+            prompt=prompt,
+            max_tokens=max_tokens,
+        )
+
+        # Parse tool calls from output
+        tool_calls = _parse_tool_calls(raw_output)
+
+        # Strip tool_call blocks from the text to get any remaining content
+        text = _TOOL_CALL_RE.sub("", raw_output).strip()
+
+        return ProviderResponse(
+            tool_calls=tool_calls,
+            text=text,
+            raw={"model": self._model_id, "output": raw_output},
+        )
+
+    async def chat(
+        self,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        system: str = "",
+    ) -> ProviderResponse:
+        return await asyncio.to_thread(self._generate_sync, messages, tools, system)
+
+
+# ---------------------------------------------------------------------------
+# Ollama provider
+# ---------------------------------------------------------------------------
+
+
+class OllamaProvider:
+    """Ollama inference with native tool calling (validated with Gemma 4 E4B)."""
+
+    def __init__(
+        self,
+        endpoint: str | None = None,
+        model: str | None = None,
+    ):
+        self._endpoint = endpoint or os.environ.get("OLLAMA_ENDPOINT", "http://localhost:11434")
+        self._model = model or os.environ.get("OLLAMA_MODEL", "gemma4:e4b")
+
+    @property
+    def name(self) -> str:
+        return f"ollama/{self._model}"
+
+    async def chat(
+        self,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        system: str = "",
+    ) -> ProviderResponse:
+        msgs = list(messages)
+        if system:
+            msgs = [{"role": "system", "content": system}] + msgs
+
+        body: dict[str, Any] = {
+            "model": self._model,
+            "messages": msgs,
+            "stream": False,
+            "think": False,
+        }
+        if tools:
+            body["tools"] = tools
+
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            resp = await client.post(f"{self._endpoint}/api/chat", json=body)
+            resp.raise_for_status()
+            data = resp.json()
+
+        msg = data.get("message", {})
+        raw_tool_calls = msg.get("tool_calls", [])
+        content = msg.get("content", "")
+
+        tool_calls = []
+        for tc in raw_tool_calls:
+            fn = tc.get("function", {})
+            name = fn.get("name", "")
+            args = fn.get("arguments", {})
+            if isinstance(args, str):
+                import json
+
+                try:
+                    args = json.loads(args)
+                except Exception:
+                    args = {"text": args}
+            if name:
+                tool_calls.append(ToolCall(name=name, arguments=args))
+
+        return ProviderResponse(tool_calls=tool_calls, text=content, raw=data)
+
+
+# ---------------------------------------------------------------------------
+# CogOS provider (OpenAI-compatible SSE)
+# ---------------------------------------------------------------------------
+
+
+class CogOSProvider:
+    """CogOS kernel — OpenAI-compatible chat/completions with tool support."""
+
+    def __init__(self, endpoint: str | None = None):
+        self._endpoint = endpoint or os.environ.get("COGOS_ENDPOINT", "http://localhost:5100")
+
+    @property
+    def name(self) -> str:
+        return "cogos"
+
+    async def chat(
+        self,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        system: str = "",
+    ) -> ProviderResponse:
+        msgs = list(messages)
+        if system:
+            msgs = [{"role": "system", "content": system}] + msgs
+
+        body: dict[str, Any] = {
+            "model": "cogos/auto",
+            "messages": msgs,
+            "stream": False,
+        }
+        if tools:
+            body["tools"] = tools
+
+        headers = {
+            "X-UCP-Identity": '{"name":"cog"}',
+            "X-Session-ID": "mod3-dashboard",
+            "X-Origin": "mod3-dashboard",
+        }
+
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            resp = await client.post(
+                f"{self._endpoint}/v1/chat/completions",
+                json=body,
+                headers=headers,
+            )
+            resp.raise_for_status()
+            data = resp.json()
+
+        choice = data.get("choices", [{}])[0]
+        msg = choice.get("message", {})
+        content = msg.get("content", "")
+        raw_tool_calls = msg.get("tool_calls", [])
+
+        tool_calls = []
+        for tc in raw_tool_calls:
+            fn = tc.get("function", {})
+            name = fn.get("name", "")
+            args = fn.get("arguments", {})
+            if isinstance(args, str):
+                import json
+
+                try:
+                    args = json.loads(args)
+                except Exception:
+                    args = {"text": args}
+            if name:
+                tool_calls.append(ToolCall(name=name, arguments=args))
+
+        return ProviderResponse(tool_calls=tool_calls, text=content, raw=data)
+
+
+# ---------------------------------------------------------------------------
+# Auto-detection
+# ---------------------------------------------------------------------------
+
+
+async def _probe(url: str, timeout: float = 2.0) -> bool:
+    try:
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            r = await client.get(url)
+            return r.status_code < 500
+    except Exception:
+        return False
+
+
+def _mlx_available() -> bool:
+    """Check whether mlx-lm is importable (Apple Silicon with mlx installed)."""
+    try:
+        import mlx_lm  # noqa: F401
+
+        return True
+    except ImportError:
+        return False
+
+
+async def auto_detect_provider_async() -> InferenceProvider:
+    """Probe available backends: MLX > CogOS > Ollama."""
+    # MLX — in-process, fastest, no network overhead
+    if _mlx_available():
+        logger.info("auto-detect: using MLX (in-process)")
+        return MlxProvider()
+
+    # CogOS — local kernel with OpenAI-compat API
+    cogos_endpoint = os.environ.get("COGOS_ENDPOINT", "http://localhost:5100")
+    if await _probe(f"{cogos_endpoint}/health"):
+        logger.info("auto-detect: using CogOS at %s", cogos_endpoint)
+        return CogOSProvider(endpoint=cogos_endpoint)
+
+    # Ollama — local daemon
+    ollama_endpoint = os.environ.get("OLLAMA_ENDPOINT", "http://localhost:11434")
+    if await _probe(f"{ollama_endpoint}/api/tags"):
+        logger.info("auto-detect: using Ollama at %s", ollama_endpoint)
+        return OllamaProvider(endpoint=ollama_endpoint)
+
+    logger.warning("auto-detect: no provider found, defaulting to Ollama")
+    return OllamaProvider()
+
+
+def auto_detect_provider() -> InferenceProvider:
+    """Synchronous wrapper for auto-detection."""
+    import asyncio
+
+    # Fast path: MLX doesn't need async probing
+    if _mlx_available():
+        logger.info("auto-detect: using MLX (in-process)")
+        return MlxProvider()
+
+    try:
+        asyncio.get_running_loop()
+    except RuntimeError:
+        return asyncio.run(auto_detect_provider_async())
+
+    # If called from an async context, can't use asyncio.run — default to Ollama
+    logger.info("auto-detect: async context, defaulting to Ollama")
+    return OllamaProvider()
diff --git a/requirements.txt b/requirements.txt
index 772b8ea..b927da7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@ espeakng-loader>=0.2.0
 setuptools
 fastapi>=0.115.0
 uvicorn>=0.30.0
+httpx>=0.24.0
diff --git a/server.py b/server.py
index 690156d..4d18002 100644
--- a/server.py
+++ b/server.py
@@ -20,6 +20,7 @@
 
 import json
 import logging
+import os
 import threading
 import time
 import uuid
@@ -359,6 +360,53 @@ async def _filter_read_stream():
 pipeline_state = PipelineState()
 
 
+# ---------------------------------------------------------------------------
+# Barge-in file watcher — monitors /tmp/mod3-barge-in.json for pause signals
+# ---------------------------------------------------------------------------
+
+_BARGEIN_SIGNAL = "/tmp/mod3-barge-in.json"
+_bargein_last_mtime: float = 0.0
+
+
+def _bargein_watcher():
+    """Background thread that watches for barge-in signal file changes."""
+    global _bargein_last_mtime
+    import json as _json
+
+    while True:
+        try:
+            import os
+
+            if os.path.exists(_BARGEIN_SIGNAL):
+                mtime = os.path.getmtime(_BARGEIN_SIGNAL)
+                if mtime > _bargein_last_mtime:
+                    _bargein_last_mtime = mtime
+                    with open(_BARGEIN_SIGNAL) as f:
+                        signal = _json.load(f)
+                    if signal.get("event") == "user_speaking_start":
+                        if pipeline_state.is_speaking:
+                            info = pipeline_state.interrupt(reason="barge_in")
+                            if info:
+                                # Write interrupt context back to signal file
+                                signal["interrupted"] = {
+                                    "spoken_pct": info.spoken_pct,
+                                    "delivered_text": info.delivered_text,
+                                    "full_text": info.full_text,
+                                }
+                                with open(_BARGEIN_SIGNAL, "w") as f:
+                                    _json.dump(signal, f, indent=2)
+                            logging.info(
+                                "Barge-in: paused playback (%.0f%% delivered)", info.spoken_pct * 100 if info else 0
+                            )
+        except Exception as e:
+            logging.debug("Barge-in watcher error: %s", e)
+        time.sleep(0.1)  # 100ms poll
+
+
+_bargein_thread = threading.Thread(target=_bargein_watcher, daemon=True)
+_bargein_thread.start()
+
+
 async def _emit_interruption(info: InterruptInfo):
     """Emit a channel notification when playback is interrupted.
 
@@ -702,6 +750,34 @@ def speak(
     if not text.strip():
         return json.dumps({"status": "error", "error": "Nothing to say"})
 
+    # Check if user is currently speaking (barge-in signal file)
+    user_state = "idle"
+    try:
+        if os.path.exists(_BARGEIN_SIGNAL):
+            with open(_BARGEIN_SIGNAL) as _bf:
+                _bsig = json.load(_bf)
+            if _bsig.get("event") == "user_speaking_start":
+                user_state = "recording"
+    except Exception:
+        pass  # signal file missing or corrupt — assume idle
+
+    # If user is currently recording, don't play — just inform the agent.
+    # The agent is responsible for re-calling speak() after the user finishes.
+    # We intentionally do NOT enqueue the job or create a _jobs entry, because
+    # a "held" job in the queue becomes a zombie: the drain thread tries to play
+    # it immediately (ignoring the hold), and if anything goes wrong the job
+    # can't be cleared by stop().
+    if user_state == "recording":
+        est_duration = _estimate_duration_sec(text, speed)
+        return json.dumps(
+            {
+                "status": "held",
+                "reason": "User is currently speaking — re-send this speak() call after user finishes.",
+                "user_state": "recording",
+                "estimated_duration_sec": round(est_duration, 1),
+            }
+        )
+
     try:
         job_id, position = _start_speech(text, voice, stream=stream, speed=speed, emotion=emotion)
     except ValueError as e:
@@ -714,7 +790,8 @@ def speak(
 
     if currently_playing is None or currently_playing["job_id"] == job_id:
         # Playing immediately (no queue ahead)
-        return json.dumps({"status": "speaking", "job_id": job_id})
+        result = {"status": "speaking", "job_id": job_id}
+        return json.dumps(result)
 
     # Something is already playing — return enriched queue status
     queue_snapshot = _speech_queue.get_queue_snapshot()
@@ -756,6 +833,8 @@ def speak(
             "To cancel all and speak immediately, call stop() then speak()."
         ),
     }
+    if user_state != "idle":
+        result["user_state"] = user_state
     return json.dumps(result)
 
 
@@ -797,7 +876,7 @@ def speech_status(job_id: str = "", verbose: bool = False) -> str:
             if entry["job_id"] == job_id:
                 result["queue_position"] = i + 1
                 break
-    if job["metrics"]:
+    if job.get("metrics"):
         metrics = job["metrics"]
         if not verbose and "chunks" in metrics:
             chunks = metrics["chunks"]["per_chunk"]
@@ -811,7 +890,7 @@ def speech_status(job_id: str = "", verbose: bool = False) -> str:
                 },
             }
         result["metrics"] = metrics
-    if job["error"]:
+    if job.get("error"):
         result["error"] = job["error"]
 
     # Always include queue state
@@ -882,9 +961,9 @@ def stop(job_id: str = "") -> str:
 
     # No job_id: stop everything — interrupt current + clear queue
     cleared = _speech_queue.cancel_all_queued()
-    # Mark all cleared queued jobs
+    # Mark all cleared queued and held jobs as cancelled
     for jid, jdata in _jobs.items():
-        if jdata["status"] == "queued":
+        if jdata["status"] in ("queued", "held"):
             jdata["status"] = "cancelled"
 
     with _current_player_lock:
@@ -1097,6 +1176,7 @@ def _run_http(host: str = "0.0.0.0", port: int = 7860):
     parser.add_argument("--http", action="store_true", help="Run HTTP API only")
     parser.add_argument("--all", action="store_true", help="Run both MCP (stdio) and HTTP")
     parser.add_argument("--channel", action="store_true", help="Run as channel server with voice input")
+    parser.add_argument("--dashboard", action="store_true", help="Run HTTP API with voice/text dashboard (no MCP)")
     parser.add_argument("--port", type=int, default=7860, help="HTTP port (default: 7860)")
     parser.add_argument("--host", type=str, default="0.0.0.0", help="HTTP bind address")
     args = parser.parse_args()
@@ -1112,6 +1192,18 @@ def _run_http(host: str = "0.0.0.0", port: int = 7860):
         )
         http_thread.start()
         mcp.run()
+    elif args.dashboard:
+        # Dashboard mode: HTTP server with WebSocket voice/text chat
+        # Swap PlaceholderDecoder → WhisperDecoder for real STT
+        from modules.text import TextModule
+        from modules.voice import VoiceModule, WhisperDecoder
+
+        _bus._modules.clear()
+        _bus.register(VoiceModule(decoder=WhisperDecoder()))
+        _bus.register(TextModule())
+        logging.basicConfig(level=logging.INFO)
+        logger.info("Starting dashboard mode (WhisperDecoder enabled)")
+        _run_http(host=args.host, port=args.port)
     elif args.channel:
         # Channel mode: MCP on stdio + inbound voice pipeline
         from bus import ModalityBus