@@ -20,10 +20,51 @@ COLLECTION_NAME=codebase
2020
2121# Embeddings
2222EMBEDDING_MODEL = BAAI/bge-base-en-v1.5
23- EMBEDDING_PROVIDER = fastembed
2423# Optional repo tag attached to each payload
2524REPO_NAME = workspace
2625
26+ # ---------------------------------------------------------------------------
27+ # Embedding Service Configuration (Shared ONNX for scale)
28+ # ---------------------------------------------------------------------------
29+ # EMBEDDING_PROVIDER: local | remote
30+ # local = Use in-process ONNX (default, high memory per worker)
31+ # remote = Use shared embedding service (recommended for scale)
32+ EMBEDDING_PROVIDER = local
33+
34+ # When EMBEDDING_PROVIDER=remote, calls this service
35+ EMBEDDING_SERVICE_URL = http://embedding:8100
36+ EMBEDDING_SERVICE_TIMEOUT = 60
37+
38+ # Max concurrent ONNX inferences (local mode or in embedding service)
39+ # Prevents memory explosion with parallel workers
40+ EMBED_MAX_CONCURRENT = 2
41+
42+ # Max batch size per embed request
43+ EMBED_MAX_BATCH = 256
44+
45+ # ---------------------------------------------------------------------------
46+ # ONNX CPU Optimizations (for embedding service)
47+ # ---------------------------------------------------------------------------
48+ # ONNX_THREADS: Number of threads for intra-op parallelism
49+ # 0 = auto (1 per physical core), or set explicit count (e.g., 4-6)
50+ ONNX_THREADS = 0
51+
52+ # ONNX_DISABLE_SPINNING: Disable thread spin-wait (saves CPU cycles)
53+ # 0 = spinning enabled (faster, burns CPU), 1 = disabled (power efficient)
54+ ONNX_DISABLE_SPINNING = 0
55+
56+ # EMBED_OPTIMAL_BATCH: Internal batch size for chunking large requests
57+ # Sweet spot for CPU is 32-64. Too small = overhead, too large = memory pressure
58+ EMBED_OPTIMAL_BATCH = 32
59+
60+ # ---------------------------------------------------------------------------
61+ # Embedding Model Options
62+ # ---------------------------------------------------------------------------
63+ # Model options (changing model requires re-indexing!):
64+ # BAAI/bge-base-en-v1.5 - Default, solid quality (768 dim, 0.21 GB)
65+ # nomic-ai/nomic-embed-text-v1.5 - Faster, outperforms BGE on MTEB (768 dim, 0.13 GB)
66+ # BAAI/bge-large-en-v1.5 - Higher quality, slower (1024 dim, 0.67 GB)
67+ #
2768# Qwen3-Embedding Feature Flag (optional, experimental)
2869# Enable to use Qwen3-Embedding-0.6B instead of BGE-base (requires reindex)
2970# QWEN3_EMBEDDING_ENABLED=0
0 commit comments