fix(prod): long-running qwen-search sidecar and deploy wiring

ilyar · ilyar · commit a0f92d31538f · 2026-03-24T17:08:48.000+01:00
- Run qwen-search as HTTP OpenAI-compatible service on :8790 with /corpus mount
- Point production QWEN_API_URL at sidecar; wire API_TOKEN into deployed .env
- Send full knowledge-augmented prompt in QWEN_MODE=http (preformattedPrompt)
- Drop docker.sock from mcp-server for http-only search stack
- Help/unknown mention /getToken

Made-with: Cursor
diff --git a/.env.example b/.env.example
@@ -7,7 +7,8 @@ PORT=3000
 RATE_LIMIT_RPS=10
 QWEN_MODE=http
 # container = run Qwen CLI inside Docker/Podman with knowledge/ mounted :ro (see docker/qwen-search/README.md)
-QWEN_API_URL=http://localhost:8080
+# In docker-compose.prod.yml, use the sidecar: http://qwen-search:8790
+QWEN_API_URL=http://qwen-search:8790
 HF_TOKEN=
 QWEN_TIMEOUT_MS=60000
 QWEN_CODE_COMMAND=qwen
@@ -24,6 +25,8 @@ STATE_FILE=/app/.spawndock/state.json
 # Required when QWEN_MODE=container in Compose: host-absolute path to knowledge/ (e.g. /srv/spawndock-api/knowledge)
 QWEN_KNOWLEDGE_HOST_PATH=
 SPAWNDOCK_BOT_SECRET=replace-with-random-secret
+# Shared MCP + dev-tunnel auth (optional in dev; set in production). Bot command /getToken prints this value.
+API_TOKEN=
 
 TELEGRAM_BOT_TOKEN=
 TELEGRAM_BOT_USERNAME=rustgpt_bot
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
@@ -1,41 +1,48 @@
 # Production stack: MCP + control plane + Telegram bot + reverse proxy (Caddy).
 # External HTTP(S) on ports 80/443 only; mcp-server listens on :3000 inside the Docker network.
 # Configure Caddy via PUBLIC_HOST in `.env` (see OPERATOR.md).
-# QWEN_MODE=container: Docker socket + qwen-search image — see OPERATOR.md.
+# qwen-search: long-running OpenAI-compatible HTTP service (Qwen CLI + /corpus).
+# mcp-server uses QWEN_MODE=http and QWEN_API_URL=http://qwen-search:8790 (set in .env).
 
 name: spawndock-api
 
 services:
-  # One-shot build so `spawndock/qwen-search:prod` exists before mcp-server runs `docker run`.
   qwen-search:
     build:
       context: .
       dockerfile: docker/qwen-search/Dockerfile
     image: spawndock/qwen-search:prod
-    restart: "no"
-    entrypoint: ["/bin/sh", "-c"]
-    command: ["exit 0"]
+    env_file:
+      - .env
+    environment:
+      QWEN_SEARCH_SERVER: "1"
+      QWEN_HTTP_PORT: "8790"
+      QWEN_OAUTH: "true"
+    volumes:
+      - ./knowledge:/corpus:ro
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD-SHELL", "node -e \"fetch('http://127.0.0.1:8790/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))\""]
+      interval: 25s
+      timeout: 8s
+      retries: 5
+      start_period: 90s
 
   mcp-server:
     build:
       context: .
       dockerfile: Dockerfile
-    # Root allows docker.sock access for QWEN_MODE=container; see OPERATOR.md to harden with DOCKER_GID.
-    user: "0:0"
     env_file:
       - .env
     environment:
       QWEN_CONTAINER_IMAGE: spawndock/qwen-search:prod
-      # Host-absolute path to this repo's knowledge/ (required for QWEN_MODE=container bind mounts)
       QWEN_KNOWLEDGE_HOST_PATH: ${QWEN_KNOWLEDGE_HOST_PATH:-}
     volumes:
       - ./data/state:/app/.spawndock
-      # Host knowledge tree (same path must be passed to docker -v for QWEN_MODE=container)
       - ./knowledge:/app/knowledge:ro
-      - /var/run/docker.sock:/var/run/docker.sock
     depends_on:
       qwen-search:
-        condition: service_completed_successfully
+        condition: service_healthy
     healthcheck:
       test: ["CMD-SHELL", "node -e \"fetch('http://127.0.0.1:3000/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))\""]
       interval: 15s
diff --git a/docker/qwen-search/Dockerfile b/docker/qwen-search/Dockerfile
@@ -15,7 +15,8 @@ WORKDIR /workspace
 # Runtime wrapper creates ~/.qwen/oauth_creds.json from env and runs qwen.
 COPY docker/qwen-search/entrypoint.sh /usr/local/bin/qwen-entrypoint
 COPY docker/qwen-search/qwen-search.sh /usr/local/bin/qwen-search
-RUN chmod +x /usr/local/bin/qwen-entrypoint /usr/local/bin/qwen-search
+COPY docker/qwen-search/http-server.mjs /opt/qwen-search/http-server.mjs
+RUN chmod +x /usr/local/bin/qwen-entrypoint /usr/local/bin/qwen-search /opt/qwen-search/http-server.mjs
 
 # Default corpus mount point (host maps repo/api/knowledge here with :ro)
 VOLUME ["/corpus"]
diff --git a/docker/qwen-search/entrypoint.sh b/docker/qwen-search/entrypoint.sh
@@ -13,4 +13,9 @@ ensure_oauth_file() {
 }
 
 ensure_oauth_file
+
+if [ "${QWEN_SEARCH_SERVER:-}" = "1" ]; then
+  exec node /opt/qwen-search/http-server.mjs
+fi
+
 exec /usr/local/bin/qwen-search "$@"
diff --git a/docker/qwen-search/http-server.mjs b/docker/qwen-search/http-server.mjs
@@ -0,0 +1,161 @@
+#!/usr/bin/env node
+/**
+ * OpenAI-compatible HTTP surface for Qwen Code CLI (knowledge corpus at /corpus).
+ * Used by mcp-server with QWEN_MODE=http and QWEN_API_URL=http://qwen-search:8790
+ */
+import http from "node:http";
+import { spawn } from "node:child_process";
+
+const PORT = parseInt(process.env.QWEN_HTTP_PORT || "8790", 10);
+const TIMEOUT_MS = parseInt(process.env.QWEN_TIMEOUT_MS || "120000", 10);
+const MAX_STDOUT = parseInt(process.env.QWEN_SEARCH_MAX_STDOUT || "524288", 10);
+const LISTEN = process.env.QWEN_HTTP_BIND || "0.0.0.0";
+
+function extractQwenCliResult(stdout) {
+  const trimmed = stdout.trim();
+  if (!trimmed) {
+    throw new Error("Qwen returned empty output");
+  }
+  const parsed = JSON.parse(trimmed);
+  if (!Array.isArray(parsed)) {
+    throw new Error("Qwen output is not a JSON array");
+  }
+  const resultEvent = parsed.find((entry) => entry?.type === "result") ?? null;
+  if (resultEvent === null || typeof resultEvent.result !== "string") {
+    throw new Error("Qwen output is missing a final result event");
+  }
+  let text = resultEvent.result.trim();
+  text = text
+    .replace(/^```json\s*/i, "")
+    .replace(/^```\s*/i, "")
+    .replace(/\s*```$/, "")
+    .trim();
+  return text;
+}
+
+function messagesToPrompt(messages) {
+  if (!Array.isArray(messages)) {
+    return "";
+  }
+  return messages
+    .map((m) => {
+      const role = typeof m.role === "string" ? m.role : "user";
+      const content = typeof m.content === "string" ? m.content : JSON.stringify(m.content ?? "");
+      return `${role}:\n${content}`;
+    })
+    .join("\n\n---\n\n");
+}
+
+function runQwenPrompt(prompt) {
+  return new Promise((resolve, reject) => {
+    const child = spawn("qwen", ["--output-format", "json", "--prompt", prompt], {
+      env: {
+        ...process.env,
+        QWEN_OAUTH: process.env.QWEN_OAUTH || "true",
+      },
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+    const chunks = [];
+    let stderr = "";
+    let size = 0;
+    const timer = setTimeout(() => {
+      try {
+        child.kill("SIGKILL");
+      } catch {
+        /* ignore */
+      }
+      reject(new Error("Qwen search timeout"));
+    }, TIMEOUT_MS);
+
+    child.stdout?.on("data", (buf) => {
+      size += buf.length;
+      if (size > MAX_STDOUT) {
+        clearTimeout(timer);
+        try {
+          child.kill("SIGKILL");
+        } catch {
+          /* ignore */
+        }
+        reject(new Error("Qwen stdout exceeded max size"));
+        return;
+      }
+      chunks.push(buf);
+    });
+    child.stderr?.on("data", (buf) => {
+      stderr += buf.toString();
+    });
+    child.on("error", (err) => {
+      clearTimeout(timer);
+      reject(err);
+    });
+    child.on("close", (code) => {
+      clearTimeout(timer);
+      if (code !== 0) {
+        reject(new Error(stderr.trim() || `Qwen exited with status ${code}`));
+        return;
+      }
+      resolve(Buffer.concat(chunks).toString("utf8"));
+    });
+  });
+}
+
+function openAiChatCompletion(content) {
+  return JSON.stringify({
+    id: "qwen-search",
+    object: "chat.completion",
+    model: "qwen-search",
+    choices: [{ message: { role: "assistant", content } }],
+  });
+}
+
+async function handleRequest(req, res) {
+  const url = req.url ?? "/";
+
+  if (req.method === "GET" && url.startsWith("/health")) {
+    res.writeHead(200, { "content-type": "application/json" });
+    res.end(JSON.stringify({ status: "ok", service: "qwen-search" }));
+    return;
+  }
+
+  if (req.method === "POST" && url.startsWith("/v1/chat/completions")) {
+    let body = "";
+    for await (const chunk of req) {
+      body += chunk;
+    }
+    try {
+      const json = JSON.parse(body || "{}");
+      const prompt = messagesToPrompt(json.messages);
+      if (!prompt.trim()) {
+        res.writeHead(400, { "content-type": "application/json" });
+        res.end(JSON.stringify({ error: "messages required" }));
+        return;
+      }
+      const stdout = await runQwenPrompt(prompt);
+      const assistantContent = extractQwenCliResult(stdout);
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(openAiChatCompletion(assistantContent));
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      res.writeHead(502, { "content-type": "application/json" });
+      res.end(JSON.stringify({ error: message }));
+    }
+    return;
+  }
+
+  res.writeHead(404, { "content-type": "application/json" });
+  res.end(JSON.stringify({ error: "not_found" }));
+}
+
+http
+  .createServer((req, res) => {
+    handleRequest(req, res).catch((err) => {
+      const message = err instanceof Error ? err.message : String(err);
+      if (!res.headersSent) {
+        res.writeHead(500, { "content-type": "application/json" });
+      }
+      res.end(JSON.stringify({ error: message }));
+    });
+  })
+  .listen(PORT, LISTEN, () => {
+    console.error(`qwen-search HTTP listening on http://${LISTEN}:${PORT}`);
+  });
diff --git a/scripts/deploy-prod.sh b/scripts/deploy-prod.sh
@@ -19,6 +19,7 @@ if [[ -z "$QWEN_OAUTH_CREDS_B64" ]]; then
 fi
 TARGET_DIR="/srv/spawndock-api"
 BOT_SECRET="$(openssl rand -hex 24)"
+API_TOKEN_VALUE="$(openssl rand -hex 32)"
 BOT_CONTROL_PLANE_URL="http://mcp-server:3000"
 PUBLIC_HOST=":80"
 TELEGRAM_MINI_APP_SHORT_NAME="tma"
@@ -61,14 +62,15 @@ PORT=3000
 PUBLIC_ORIGIN=$PUBLIC_ORIGIN
 STATE_FILE=/app/.spawndock/state.json
 SPAWNDOCK_BOT_SECRET=$BOT_SECRET
+API_TOKEN=$API_TOKEN_VALUE
 TELEGRAM_BOT_TOKEN=$TELEGRAM_BOT_TOKEN
 TELEGRAM_BOT_USERNAME=$TELEGRAM_BOT_USERNAME
 TELEGRAM_MINI_APP_SHORT_NAME=$TELEGRAM_MINI_APP_SHORT_NAME
 CONTROL_PLANE_URL=$BOT_CONTROL_PLANE_URL
 BOT_POLL_TIMEOUT=25
 RATE_LIMIT_RPS=10
 QWEN_MODE=http
-QWEN_API_URL=https://router.huggingface.co/hf-inference/v1
+QWEN_API_URL=http://qwen-search:8790
 QWEN_TIMEOUT_MS=60000
 QWEN_CODE_COMMAND=qwen
 QWEN_CODE_AUTH_TYPE=qwen-oauth
diff --git a/src/bot/i18n.ts b/src/bot/i18n.ts
@@ -84,9 +84,9 @@ export function getTokenMessage(locale: BotLocale, token: string): string {
 
 export function unknownMessage(locale: BotLocale): string {
   if (locale === "ru") {
-    return "Не понял команду.\n\nИспользуй /new &lt;название проекта&gt; или /help.";
+    return "Не понял команду.\n\nИспользуй /new, /launch, /getToken или /help.";
   }
-  return "I did not understand that command.\n\nUse /new &lt;project title&gt; or /help.";
+  return "I did not understand that command.\n\nUse /new, /launch, /getToken, or /help.";
 }
 
 export function launchUsageMessage(locale: BotLocale): string {
diff --git a/src/mcp.ts b/src/mcp.ts
@@ -88,13 +88,15 @@ async function runKnowledgeSearch(query: string): Promise<SearchResult> {
     return sanitizeSearchResultSources(parsed, knowledgeRoot);
   }
 
-  const parsed = await queryQwen(query, {
+  const fullPrompt = buildQwenCodePrompt(query, matches);
+  const parsed = await queryQwen(fullPrompt, {
     apiUrl: config.openrouterApiKey
       ? "https://openrouter.ai/api"
       : config.qwenApiUrl,
     apiKey: config.openrouterApiKey || undefined,
     model: config.openrouterApiKey ? config.openrouterModel : undefined,
     timeoutMs: config.qwenTimeoutMs,
+    preformattedPrompt: true,
   });
   return sanitizeSearchResultSources(parsed, knowledgeRoot);
 }
diff --git a/src/qwen/__tests__/client.test.ts b/src/qwen/__tests__/client.test.ts
@@ -64,6 +64,22 @@ describe("queryQwen", () => {
     expect(body.model).toBe("Qwen/Qwen3-Coder");
   });
 
+  it("preformattedPrompt sends a single user message (no duplicate system prompt)", async () => {
+    (fetch as any).mockResolvedValue({
+      ok: true,
+      json: () => Promise.resolve(MOCK_RESPONSE),
+    });
+
+    await queryQwen("full prompt body", {
+      apiUrl: "http://localhost:8080",
+      timeoutMs: 5000,
+      preformattedPrompt: true,
+    });
+
+    const body = JSON.parse((fetch as any).mock.calls[0][1].body);
+    expect(body.messages).toEqual([{ role: "user", content: "full prompt body" }]);
+  });
+
   it("throws on non-ok response", async () => {
     (fetch as any).mockResolvedValue({
       ok: false,
diff --git a/src/qwen/client.ts b/src/qwen/client.ts
@@ -7,6 +7,8 @@ export interface QwenOptions {
   timeoutMs: number;
   apiKey?: string;
   model?: string;
+  /** When true, `query` is already a full prompt (e.g. includes system + knowledge excerpts). */
+  preformattedPrompt?: boolean;
 }
 
 export async function queryQwen(query: string, options: QwenOptions): Promise<SearchResult> {
@@ -24,16 +26,20 @@ export async function queryQwen(query: string, options: QwenOptions): Promise<Se
     headers["Authorization"] = `Bearer ${resolvedApiKey}`;
   }
 
+  const messages = options.preformattedPrompt
+    ? [{ role: "user" as const, content: query }]
+    : [
+        { role: "system" as const, content: SYSTEM_PROMPT },
+        { role: "user" as const, content: query },
+      ];
+
   try {
     const res = await fetch(`${options.apiUrl}/v1/chat/completions`, {
       method: "POST",
       headers,
       body: JSON.stringify({
         model: resolvedModel,
-        messages: [
-          { role: "system", content: SYSTEM_PROMPT },
-          { role: "user", content: query },
-        ],
+        messages,
         temperature: 0.3,
       }),
       signal: controller.signal,

Original file line number	Diff line number	Diff line change
`@@ -13,4 +13,9 @@ ensure_oauth_file() {`
`13`	`13`	`}`
`14`	`14`
`15`	`15`	`ensure_oauth_file`
	`16`	`+`
	`17`	`+if [ "${QWEN_SEARCH_SERVER:-}" = "1" ]; then`
	`18`	`+ exec node /opt/qwen-search/http-server.mjs`
	`19`	`+fi`
	`20`	`+`
`16`	`21`	`exec /usr/local/bin/qwen-search "$@"`
Original file line number	Diff line number	Diff line change
`@@ -84,9 +84,9 @@ export function getTokenMessage(locale: BotLocale, token: string): string {`
`84`	`84`
`85`	`85`	`export function unknownMessage(locale: BotLocale): string {`
`86`	`86`	`if (locale === "ru") {`
`87`		`- return "Не понял команду.\n\nИспользуй /new <название проекта> или /help.";`
	`87`	`+ return "Не понял команду.\n\nИспользуй /new, /launch, /getToken или /help.";`
`88`	`88`	`}`
`89`		`- return "I did not understand that command.\n\nUse /new <project title> or /help.";`
	`89`	`+ return "I did not understand that command.\n\nUse /new, /launch, /getToken, or /help.";`
`90`	`90`	`}`
`91`	`91`
`92`	`92`	`export function launchUsageMessage(locale: BotLocale): string {`