Skip to content

Commit a0f92d3

Browse files
committed
fix(prod): long-running qwen-search sidecar and deploy wiring
- Run qwen-search as HTTP OpenAI-compatible service on :8790 with /corpus mount - Point production QWEN_API_URL at sidecar; wire API_TOKEN into deployed .env - Send full knowledge-augmented prompt in QWEN_MODE=http (preformattedPrompt) - Drop docker.sock from mcp-server for http-only search stack - Help/unknown mention /getToken Made-with: Cursor
1 parent 5a46c35 commit a0f92d3

10 files changed

Lines changed: 224 additions & 21 deletions

File tree

.env.example

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ PORT=3000
77
RATE_LIMIT_RPS=10
88
QWEN_MODE=http
99
# container = run Qwen CLI inside Docker/Podman with knowledge/ mounted :ro (see docker/qwen-search/README.md)
10-
QWEN_API_URL=http://localhost:8080
10+
# In docker-compose.prod.yml, use the sidecar: http://qwen-search:8790
11+
QWEN_API_URL=http://qwen-search:8790
1112
HF_TOKEN=
1213
QWEN_TIMEOUT_MS=60000
1314
QWEN_CODE_COMMAND=qwen
@@ -24,6 +25,8 @@ STATE_FILE=/app/.spawndock/state.json
2425
# Required when QWEN_MODE=container in Compose: host-absolute path to knowledge/ (e.g. /srv/spawndock-api/knowledge)
2526
QWEN_KNOWLEDGE_HOST_PATH=
2627
SPAWNDOCK_BOT_SECRET=replace-with-random-secret
28+
# Shared MCP + dev-tunnel auth (optional in dev; set in production). Bot command /getToken prints this value.
29+
API_TOKEN=
2730

2831
TELEGRAM_BOT_TOKEN=
2932
TELEGRAM_BOT_USERNAME=rustgpt_bot

docker-compose.prod.yml

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,48 @@
11
# Production stack: MCP + control plane + Telegram bot + reverse proxy (Caddy).
22
# External HTTP(S) on ports 80/443 only; mcp-server listens on :3000 inside the Docker network.
33
# Configure Caddy via PUBLIC_HOST in `.env` (see OPERATOR.md).
4-
# QWEN_MODE=container: Docker socket + qwen-search image — see OPERATOR.md.
4+
# qwen-search: long-running OpenAI-compatible HTTP service (Qwen CLI + /corpus).
5+
# mcp-server uses QWEN_MODE=http and QWEN_API_URL=http://qwen-search:8790 (set in .env).
56

67
name: spawndock-api
78

89
services:
9-
# One-shot build so `spawndock/qwen-search:prod` exists before mcp-server runs `docker run`.
1010
qwen-search:
1111
build:
1212
context: .
1313
dockerfile: docker/qwen-search/Dockerfile
1414
image: spawndock/qwen-search:prod
15-
restart: "no"
16-
entrypoint: ["/bin/sh", "-c"]
17-
command: ["exit 0"]
15+
env_file:
16+
- .env
17+
environment:
18+
QWEN_SEARCH_SERVER: "1"
19+
QWEN_HTTP_PORT: "8790"
20+
QWEN_OAUTH: "true"
21+
volumes:
22+
- ./knowledge:/corpus:ro
23+
restart: unless-stopped
24+
healthcheck:
25+
test: ["CMD-SHELL", "node -e \"fetch('http://127.0.0.1:8790/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))\""]
26+
interval: 25s
27+
timeout: 8s
28+
retries: 5
29+
start_period: 90s
1830

1931
mcp-server:
2032
build:
2133
context: .
2234
dockerfile: Dockerfile
23-
# Root allows docker.sock access for QWEN_MODE=container; see OPERATOR.md to harden with DOCKER_GID.
24-
user: "0:0"
2535
env_file:
2636
- .env
2737
environment:
2838
QWEN_CONTAINER_IMAGE: spawndock/qwen-search:prod
29-
# Host-absolute path to this repo's knowledge/ (required for QWEN_MODE=container bind mounts)
3039
QWEN_KNOWLEDGE_HOST_PATH: ${QWEN_KNOWLEDGE_HOST_PATH:-}
3140
volumes:
3241
- ./data/state:/app/.spawndock
33-
# Host knowledge tree (same path must be passed to docker -v for QWEN_MODE=container)
3442
- ./knowledge:/app/knowledge:ro
35-
- /var/run/docker.sock:/var/run/docker.sock
3643
depends_on:
3744
qwen-search:
38-
condition: service_completed_successfully
45+
condition: service_healthy
3946
healthcheck:
4047
test: ["CMD-SHELL", "node -e \"fetch('http://127.0.0.1:3000/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))\""]
4148
interval: 15s

docker/qwen-search/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ WORKDIR /workspace
1515
# Runtime wrapper creates ~/.qwen/oauth_creds.json from env and runs qwen.
1616
COPY docker/qwen-search/entrypoint.sh /usr/local/bin/qwen-entrypoint
1717
COPY docker/qwen-search/qwen-search.sh /usr/local/bin/qwen-search
18-
RUN chmod +x /usr/local/bin/qwen-entrypoint /usr/local/bin/qwen-search
18+
COPY docker/qwen-search/http-server.mjs /opt/qwen-search/http-server.mjs
19+
RUN chmod +x /usr/local/bin/qwen-entrypoint /usr/local/bin/qwen-search /opt/qwen-search/http-server.mjs
1920

2021
# Default corpus mount point (host maps repo/api/knowledge here with :ro)
2122
VOLUME ["/corpus"]

docker/qwen-search/entrypoint.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,9 @@ ensure_oauth_file() {
1313
}
1414

1515
ensure_oauth_file
16+
17+
if [ "${QWEN_SEARCH_SERVER:-}" = "1" ]; then
18+
exec node /opt/qwen-search/http-server.mjs
19+
fi
20+
1621
exec /usr/local/bin/qwen-search "$@"

docker/qwen-search/http-server.mjs

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env node
2+
/**
3+
* OpenAI-compatible HTTP surface for Qwen Code CLI (knowledge corpus at /corpus).
4+
* Used by mcp-server with QWEN_MODE=http and QWEN_API_URL=http://qwen-search:8790
5+
*/
6+
import http from "node:http";
7+
import { spawn } from "node:child_process";
8+
9+
const PORT = parseInt(process.env.QWEN_HTTP_PORT || "8790", 10);
10+
const TIMEOUT_MS = parseInt(process.env.QWEN_TIMEOUT_MS || "120000", 10);
11+
const MAX_STDOUT = parseInt(process.env.QWEN_SEARCH_MAX_STDOUT || "524288", 10);
12+
const LISTEN = process.env.QWEN_HTTP_BIND || "0.0.0.0";
13+
14+
function extractQwenCliResult(stdout) {
15+
const trimmed = stdout.trim();
16+
if (!trimmed) {
17+
throw new Error("Qwen returned empty output");
18+
}
19+
const parsed = JSON.parse(trimmed);
20+
if (!Array.isArray(parsed)) {
21+
throw new Error("Qwen output is not a JSON array");
22+
}
23+
const resultEvent = parsed.find((entry) => entry?.type === "result") ?? null;
24+
if (resultEvent === null || typeof resultEvent.result !== "string") {
25+
throw new Error("Qwen output is missing a final result event");
26+
}
27+
let text = resultEvent.result.trim();
28+
text = text
29+
.replace(/^```json\s*/i, "")
30+
.replace(/^```\s*/i, "")
31+
.replace(/\s*```$/, "")
32+
.trim();
33+
return text;
34+
}
35+
36+
function messagesToPrompt(messages) {
37+
if (!Array.isArray(messages)) {
38+
return "";
39+
}
40+
return messages
41+
.map((m) => {
42+
const role = typeof m.role === "string" ? m.role : "user";
43+
const content = typeof m.content === "string" ? m.content : JSON.stringify(m.content ?? "");
44+
return `${role}:\n${content}`;
45+
})
46+
.join("\n\n---\n\n");
47+
}
48+
49+
function runQwenPrompt(prompt) {
50+
return new Promise((resolve, reject) => {
51+
const child = spawn("qwen", ["--output-format", "json", "--prompt", prompt], {
52+
env: {
53+
...process.env,
54+
QWEN_OAUTH: process.env.QWEN_OAUTH || "true",
55+
},
56+
stdio: ["ignore", "pipe", "pipe"],
57+
});
58+
const chunks = [];
59+
let stderr = "";
60+
let size = 0;
61+
const timer = setTimeout(() => {
62+
try {
63+
child.kill("SIGKILL");
64+
} catch {
65+
/* ignore */
66+
}
67+
reject(new Error("Qwen search timeout"));
68+
}, TIMEOUT_MS);
69+
70+
child.stdout?.on("data", (buf) => {
71+
size += buf.length;
72+
if (size > MAX_STDOUT) {
73+
clearTimeout(timer);
74+
try {
75+
child.kill("SIGKILL");
76+
} catch {
77+
/* ignore */
78+
}
79+
reject(new Error("Qwen stdout exceeded max size"));
80+
return;
81+
}
82+
chunks.push(buf);
83+
});
84+
child.stderr?.on("data", (buf) => {
85+
stderr += buf.toString();
86+
});
87+
child.on("error", (err) => {
88+
clearTimeout(timer);
89+
reject(err);
90+
});
91+
child.on("close", (code) => {
92+
clearTimeout(timer);
93+
if (code !== 0) {
94+
reject(new Error(stderr.trim() || `Qwen exited with status ${code}`));
95+
return;
96+
}
97+
resolve(Buffer.concat(chunks).toString("utf8"));
98+
});
99+
});
100+
}
101+
102+
function openAiChatCompletion(content) {
103+
return JSON.stringify({
104+
id: "qwen-search",
105+
object: "chat.completion",
106+
model: "qwen-search",
107+
choices: [{ message: { role: "assistant", content } }],
108+
});
109+
}
110+
111+
async function handleRequest(req, res) {
112+
const url = req.url ?? "/";
113+
114+
if (req.method === "GET" && url.startsWith("/health")) {
115+
res.writeHead(200, { "content-type": "application/json" });
116+
res.end(JSON.stringify({ status: "ok", service: "qwen-search" }));
117+
return;
118+
}
119+
120+
if (req.method === "POST" && url.startsWith("/v1/chat/completions")) {
121+
let body = "";
122+
for await (const chunk of req) {
123+
body += chunk;
124+
}
125+
try {
126+
const json = JSON.parse(body || "{}");
127+
const prompt = messagesToPrompt(json.messages);
128+
if (!prompt.trim()) {
129+
res.writeHead(400, { "content-type": "application/json" });
130+
res.end(JSON.stringify({ error: "messages required" }));
131+
return;
132+
}
133+
const stdout = await runQwenPrompt(prompt);
134+
const assistantContent = extractQwenCliResult(stdout);
135+
res.writeHead(200, { "content-type": "application/json" });
136+
res.end(openAiChatCompletion(assistantContent));
137+
} catch (err) {
138+
const message = err instanceof Error ? err.message : String(err);
139+
res.writeHead(502, { "content-type": "application/json" });
140+
res.end(JSON.stringify({ error: message }));
141+
}
142+
return;
143+
}
144+
145+
res.writeHead(404, { "content-type": "application/json" });
146+
res.end(JSON.stringify({ error: "not_found" }));
147+
}
148+
149+
http
150+
.createServer((req, res) => {
151+
handleRequest(req, res).catch((err) => {
152+
const message = err instanceof Error ? err.message : String(err);
153+
if (!res.headersSent) {
154+
res.writeHead(500, { "content-type": "application/json" });
155+
}
156+
res.end(JSON.stringify({ error: message }));
157+
});
158+
})
159+
.listen(PORT, LISTEN, () => {
160+
console.error(`qwen-search HTTP listening on http://${LISTEN}:${PORT}`);
161+
});

scripts/deploy-prod.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ if [[ -z "$QWEN_OAUTH_CREDS_B64" ]]; then
1919
fi
2020
TARGET_DIR="/srv/spawndock-api"
2121
BOT_SECRET="$(openssl rand -hex 24)"
22+
API_TOKEN_VALUE="$(openssl rand -hex 32)"
2223
BOT_CONTROL_PLANE_URL="http://mcp-server:3000"
2324
PUBLIC_HOST=":80"
2425
TELEGRAM_MINI_APP_SHORT_NAME="tma"
@@ -61,14 +62,15 @@ PORT=3000
6162
PUBLIC_ORIGIN=$PUBLIC_ORIGIN
6263
STATE_FILE=/app/.spawndock/state.json
6364
SPAWNDOCK_BOT_SECRET=$BOT_SECRET
65+
API_TOKEN=$API_TOKEN_VALUE
6466
TELEGRAM_BOT_TOKEN=$TELEGRAM_BOT_TOKEN
6567
TELEGRAM_BOT_USERNAME=$TELEGRAM_BOT_USERNAME
6668
TELEGRAM_MINI_APP_SHORT_NAME=$TELEGRAM_MINI_APP_SHORT_NAME
6769
CONTROL_PLANE_URL=$BOT_CONTROL_PLANE_URL
6870
BOT_POLL_TIMEOUT=25
6971
RATE_LIMIT_RPS=10
7072
QWEN_MODE=http
71-
QWEN_API_URL=https://router.huggingface.co/hf-inference/v1
73+
QWEN_API_URL=http://qwen-search:8790
7274
QWEN_TIMEOUT_MS=60000
7375
QWEN_CODE_COMMAND=qwen
7476
QWEN_CODE_AUTH_TYPE=qwen-oauth

src/bot/i18n.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ export function getTokenMessage(locale: BotLocale, token: string): string {
8484

8585
export function unknownMessage(locale: BotLocale): string {
8686
if (locale === "ru") {
87-
return "Не понял команду.\n\nИспользуй /new <название проекта> или /help.";
87+
return "Не понял команду.\n\nИспользуй /new, /launch, /getToken или /help.";
8888
}
89-
return "I did not understand that command.\n\nUse /new <project title> or /help.";
89+
return "I did not understand that command.\n\nUse /new, /launch, /getToken, or /help.";
9090
}
9191

9292
export function launchUsageMessage(locale: BotLocale): string {

src/mcp.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,15 @@ async function runKnowledgeSearch(query: string): Promise<SearchResult> {
8888
return sanitizeSearchResultSources(parsed, knowledgeRoot);
8989
}
9090

91-
const parsed = await queryQwen(query, {
91+
const fullPrompt = buildQwenCodePrompt(query, matches);
92+
const parsed = await queryQwen(fullPrompt, {
9293
apiUrl: config.openrouterApiKey
9394
? "https://openrouter.ai/api"
9495
: config.qwenApiUrl,
9596
apiKey: config.openrouterApiKey || undefined,
9697
model: config.openrouterApiKey ? config.openrouterModel : undefined,
9798
timeoutMs: config.qwenTimeoutMs,
99+
preformattedPrompt: true,
98100
});
99101
return sanitizeSearchResultSources(parsed, knowledgeRoot);
100102
}

src/qwen/__tests__/client.test.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,22 @@ describe("queryQwen", () => {
6464
expect(body.model).toBe("Qwen/Qwen3-Coder");
6565
});
6666

67+
it("preformattedPrompt sends a single user message (no duplicate system prompt)", async () => {
68+
(fetch as any).mockResolvedValue({
69+
ok: true,
70+
json: () => Promise.resolve(MOCK_RESPONSE),
71+
});
72+
73+
await queryQwen("full prompt body", {
74+
apiUrl: "http://localhost:8080",
75+
timeoutMs: 5000,
76+
preformattedPrompt: true,
77+
});
78+
79+
const body = JSON.parse((fetch as any).mock.calls[0][1].body);
80+
expect(body.messages).toEqual([{ role: "user", content: "full prompt body" }]);
81+
});
82+
6783
it("throws on non-ok response", async () => {
6884
(fetch as any).mockResolvedValue({
6985
ok: false,

src/qwen/client.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ export interface QwenOptions {
77
timeoutMs: number;
88
apiKey?: string;
99
model?: string;
10+
/** When true, `query` is already a full prompt (e.g. includes system + knowledge excerpts). */
11+
preformattedPrompt?: boolean;
1012
}
1113

1214
export async function queryQwen(query: string, options: QwenOptions): Promise<SearchResult> {
@@ -24,16 +26,20 @@ export async function queryQwen(query: string, options: QwenOptions): Promise<Se
2426
headers["Authorization"] = `Bearer ${resolvedApiKey}`;
2527
}
2628

29+
const messages = options.preformattedPrompt
30+
? [{ role: "user" as const, content: query }]
31+
: [
32+
{ role: "system" as const, content: SYSTEM_PROMPT },
33+
{ role: "user" as const, content: query },
34+
];
35+
2736
try {
2837
const res = await fetch(`${options.apiUrl}/v1/chat/completions`, {
2938
method: "POST",
3039
headers,
3140
body: JSON.stringify({
3241
model: resolvedModel,
33-
messages: [
34-
{ role: "system", content: SYSTEM_PROMPT },
35-
{ role: "user", content: query },
36-
],
42+
messages,
3743
temperature: 0.3,
3844
}),
3945
signal: controller.signal,

0 commit comments

Comments
 (0)