Skip to content

Commit 3853760

Browse files
authored
Merge pull request #205 from Context-Engine-AI/bubble-bullshit
Bubble bullshit
2 parents 10f5704 + 4379f3b commit 3853760

25 files changed

Lines changed: 1062 additions & 112 deletions

.env.example

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,51 @@ COLLECTION_NAME=codebase
2020

2121
# Embeddings
2222
EMBEDDING_MODEL=BAAI/bge-base-en-v1.5
23-
EMBEDDING_PROVIDER=fastembed
2423
# Optional repo tag attached to each payload
2524
REPO_NAME=workspace
2625

26+
# ---------------------------------------------------------------------------
27+
# Embedding Service Configuration (Shared ONNX for scale)
28+
# ---------------------------------------------------------------------------
29+
# EMBEDDING_PROVIDER: local | remote
30+
# local = Use in-process ONNX (default, high memory per worker)
31+
# remote = Use shared embedding service (recommended for scale)
32+
EMBEDDING_PROVIDER=local
33+
34+
# When EMBEDDING_PROVIDER=remote, calls this service
35+
EMBEDDING_SERVICE_URL=http://embedding:8100
36+
EMBEDDING_SERVICE_TIMEOUT=60
37+
38+
# Max concurrent ONNX inferences (local mode or in embedding service)
39+
# Prevents memory explosion with parallel workers
40+
EMBED_MAX_CONCURRENT=2
41+
42+
# Max batch size per embed request
43+
EMBED_MAX_BATCH=256
44+
45+
# ---------------------------------------------------------------------------
46+
# ONNX CPU Optimizations (for embedding service)
47+
# ---------------------------------------------------------------------------
48+
# ONNX_THREADS: Number of threads for intra-op parallelism
49+
# 0 = auto (1 per physical core), or set explicit count (e.g., 4-6)
50+
ONNX_THREADS=0
51+
52+
# ONNX_DISABLE_SPINNING: Disable thread spin-wait (saves CPU cycles)
53+
# 0 = spinning enabled (faster, burns CPU), 1 = disabled (power efficient)
54+
ONNX_DISABLE_SPINNING=0
55+
56+
# EMBED_OPTIMAL_BATCH: Internal batch size for chunking large requests
57+
# Sweet spot for CPU is 32-64. Too small = overhead, too large = memory pressure
58+
EMBED_OPTIMAL_BATCH=32
59+
60+
# ---------------------------------------------------------------------------
61+
# Embedding Model Options
62+
# ---------------------------------------------------------------------------
63+
# Model options (changing model requires re-indexing!):
64+
# BAAI/bge-base-en-v1.5 - Default, solid quality (768 dim, 0.21 GB)
65+
# nomic-ai/nomic-embed-text-v1.5 - Faster, outperforms BGE on MTEB (768 dim, 0.13 GB)
66+
# BAAI/bge-large-en-v1.5 - Higher quality, slower (1024 dim, 0.67 GB)
67+
#
2768
# Qwen3-Embedding Feature Flag (optional, experimental)
2869
# Enable to use Qwen3-Embedding-0.6B instead of BGE-base (requires reindex)
2970
# QWEN3_EMBEDDING_ENABLED=0

deploy/helm/context-engine/templates/configmap.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ data:
1111
QDRANT_URL: {{ include "context-engine.qdrantUrl" . | quote }}
1212
EMBEDDING_MODEL: {{ .Values.config.embeddingModel | quote }}
1313
EMBEDDING_PROVIDER: {{ .Values.config.embeddingProvider | quote }}
14+
EMBEDDING_SERVICE_URL: {{ .Values.config.embeddingServiceUrl | quote }}
1415
EMBEDDING_WARMUP: "0"
16+
INDEX_WORKERS: {{ .Values.config.indexWorkers | default "4" | quote }}
1517

1618
FASTMCP_HOST: {{ .Values.config.fastmcp.host | quote }}
1719
FASTMCP_PORT: {{ .Values.config.fastmcp.port | quote }}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
{{- if eq .Values.config.embeddingProvider "remote" }}
2+
apiVersion: apps/v1
3+
kind: Deployment
4+
metadata:
5+
name: {{ include "context-engine.fullname" . }}-embedding
6+
labels:
7+
{{- include "context-engine.labels" . | nindent 4 }}
8+
app.kubernetes.io/component: embedding
9+
spec:
10+
replicas: {{ .Values.embedding.replicas | default 2 }}
11+
selector:
12+
matchLabels:
13+
{{- include "context-engine.selectorLabels" . | nindent 6 }}
14+
app.kubernetes.io/component: embedding
15+
template:
16+
metadata:
17+
labels:
18+
{{- include "context-engine.selectorLabels" . | nindent 8 }}
19+
app.kubernetes.io/component: embedding
20+
spec:
21+
containers:
22+
- name: embedding
23+
image: "{{ .Values.embedding.image.repository | default "context-engine-embedding" }}:{{ .Values.embedding.image.tag | default "latest" }}"
24+
ports:
25+
- containerPort: 8100
26+
env:
27+
- name: EMBEDDING_MODEL
28+
value: {{ .Values.config.embeddingModel | quote }}
29+
- name: EMBED_MAX_CONCURRENT
30+
value: {{ .Values.embedding.maxConcurrent | default "2" | quote }}
31+
- name: EMBED_OPTIMAL_BATCH
32+
value: {{ .Values.embedding.optimalBatch | default "32" | quote }}
33+
- name: ONNX_THREADS
34+
value: {{ .Values.embedding.onnxThreads | default "4" | quote }}
35+
- name: ONNX_DISABLE_SPINNING
36+
value: "1"
37+
- name: OMP_NUM_THREADS
38+
value: {{ .Values.embedding.onnxThreads | default "4" | quote }}
39+
- name: MKL_NUM_THREADS
40+
value: {{ .Values.embedding.onnxThreads | default "4" | quote }}
41+
resources:
42+
{{- toYaml .Values.embedding.resources | nindent 12 }}
43+
readinessProbe:
44+
httpGet:
45+
path: /health
46+
port: 8100
47+
initialDelaySeconds: 30
48+
periodSeconds: 10
49+
livenessProbe:
50+
httpGet:
51+
path: /health
52+
port: 8100
53+
initialDelaySeconds: 60
54+
periodSeconds: 30
55+
---
56+
apiVersion: v1
57+
kind: Service
58+
metadata:
59+
name: embedding
60+
labels:
61+
{{- include "context-engine.labels" . | nindent 4 }}
62+
spec:
63+
selector:
64+
{{- include "context-engine.selectorLabels" . | nindent 4 }}
65+
app.kubernetes.io/component: embedding
66+
ports:
67+
- port: 8100
68+
targetPort: 8100
69+
---
70+
{{- if .Values.embedding.autoscaling.enabled }}
71+
apiVersion: autoscaling/v2
72+
kind: HorizontalPodAutoscaler
73+
metadata:
74+
name: {{ include "context-engine.fullname" . }}-embedding
75+
spec:
76+
scaleTargetRef:
77+
apiVersion: apps/v1
78+
kind: Deployment
79+
name: {{ include "context-engine.fullname" . }}-embedding
80+
minReplicas: {{ .Values.embedding.autoscaling.minReplicas | default 2 }}
81+
maxReplicas: {{ .Values.embedding.autoscaling.maxReplicas | default 10 }}
82+
metrics:
83+
- type: Resource
84+
resource:
85+
name: cpu
86+
target:
87+
type: Utilization
88+
averageUtilization: {{ .Values.embedding.autoscaling.targetCPU | default 70 }}
89+
{{- end }}
90+
{{- end }}
91+

deploy/helm/context-engine/values.yaml

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,37 @@ qdrant:
110110
initialDelaySeconds: 5
111111
periodSeconds: 5
112112

113+
# -----------------------------------------------------------------------------
114+
# Embedding Service Configuration (shared ONNX model)
115+
# -----------------------------------------------------------------------------
116+
embedding:
117+
# -- Number of replicas
118+
replicas: 2
119+
# -- Image configuration
120+
image:
121+
repository: context-engine-embedding
122+
tag: latest
123+
# -- Max concurrent embeddings per replica
124+
maxConcurrent: 2
125+
# -- Optimal batch size for CPU cache
126+
optimalBatch: 32
127+
# -- ONNX threads per replica
128+
onnxThreads: 4
129+
# -- Resource requests and limits
130+
resources:
131+
requests:
132+
cpu: "2"
133+
memory: 4Gi
134+
limits:
135+
cpu: "4"
136+
memory: 6Gi
137+
# -- Autoscaling configuration
138+
autoscaling:
139+
enabled: true
140+
minReplicas: 2
141+
maxReplicas: 10
142+
targetCPU: 70
143+
113144
# -----------------------------------------------------------------------------
114145
# MCP Indexer HTTP Configuration
115146
# -----------------------------------------------------------------------------
@@ -463,10 +494,14 @@ config:
463494
# -- Qdrant URL (auto-generated if not set)
464495
qdrantUrl: ""
465496
# -- Embedding model
466-
embeddingModel: BAAI/bge-base-en-v1.5
467-
# -- Embedding provider
468-
embeddingProvider: fastembed
469-
497+
embeddingModel: nomic-ai/nomic-embed-text-v1.5
498+
# -- Embedding provider (remote = shared service, fastembed = local)
499+
embeddingProvider: remote
500+
# -- Embedding service URL (when provider=remote)
501+
embeddingServiceUrl: http://embedding:8100
502+
# -- Index workers (parallel file processing)
503+
indexWorkers: 4
504+
470505
# -- FastMCP settings
471506
fastmcp:
472507
host: "0.0.0.0"

deploy/kubernetes/configmap.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ data:
1515
CTX_SUMMARY_CHARS: '0'
1616
CURRENT_REPO: ''
1717
DECODER_MAX_TOKENS: '4000'
18-
EMBEDDING_MODEL: BAAI/bge-base-en-v1.5
19-
EMBEDDING_PROVIDER: fastembed
18+
EMBEDDING_MODEL: nomic-ai/nomic-embed-text-v1.5
19+
EMBEDDING_PROVIDER: remote
20+
EMBEDDING_SERVICE_URL: http://embedding:8100
21+
INDEX_WORKERS: "4"
2022
EMBEDDING_WARMUP: '0'
2123
FASTMCP_HOST: 0.0.0.0
2224
FASTMCP_HTTP_HEALTH_PORT: '18002'
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: embedding-service
5+
namespace: context-engine
6+
labels:
7+
app: embedding-service
8+
spec:
9+
replicas: 2
10+
selector:
11+
matchLabels:
12+
app: embedding-service
13+
template:
14+
metadata:
15+
labels:
16+
app: embedding-service
17+
spec:
18+
containers:
19+
- name: embedding
20+
image: context-engine-embedding:latest
21+
ports:
22+
- containerPort: 8100
23+
env:
24+
- name: EMBEDDING_MODEL
25+
valueFrom:
26+
configMapKeyRef:
27+
name: context-engine-config
28+
key: EMBEDDING_MODEL
29+
- name: EMBED_MAX_CONCURRENT
30+
value: "2"
31+
- name: EMBED_MAX_BATCH
32+
value: "256"
33+
- name: EMBED_OPTIMAL_BATCH
34+
value: "32"
35+
- name: ONNX_THREADS
36+
value: "4"
37+
- name: ONNX_DISABLE_SPINNING
38+
value: "1"
39+
- name: OMP_NUM_THREADS
40+
value: "4"
41+
- name: MKL_NUM_THREADS
42+
value: "4"
43+
- name: HF_HOME
44+
value: /tmp/huggingface
45+
- name: FASTEMBED_CACHE_PATH
46+
value: /tmp/fastembed
47+
resources:
48+
requests:
49+
memory: "4Gi"
50+
cpu: "2"
51+
limits:
52+
memory: "6Gi"
53+
cpu: "4"
54+
readinessProbe:
55+
httpGet:
56+
path: /health
57+
port: 8100
58+
initialDelaySeconds: 30
59+
periodSeconds: 10
60+
livenessProbe:
61+
httpGet:
62+
path: /health
63+
port: 8100
64+
initialDelaySeconds: 60
65+
periodSeconds: 30
66+
volumeMounts:
67+
- name: embedding-cache
68+
mountPath: /tmp/huggingface
69+
volumes:
70+
- name: embedding-cache
71+
emptyDir:
72+
sizeLimit: 2Gi
73+
---
74+
apiVersion: v1
75+
kind: Service
76+
metadata:
77+
name: embedding
78+
namespace: context-engine
79+
spec:
80+
selector:
81+
app: embedding-service
82+
ports:
83+
- port: 8100
84+
targetPort: 8100
85+
type: ClusterIP
86+
---
87+
apiVersion: autoscaling/v2
88+
kind: HorizontalPodAutoscaler
89+
metadata:
90+
name: embedding-service-hpa
91+
namespace: context-engine
92+
spec:
93+
scaleTargetRef:
94+
apiVersion: apps/v1
95+
kind: Deployment
96+
name: embedding-service
97+
minReplicas: 2
98+
maxReplicas: 10
99+
metrics:
100+
- type: Resource
101+
resource:
102+
name: cpu
103+
target:
104+
type: Utilization
105+
averageUtilization: 70
106+

deploy/kubernetes/indexer-services.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,21 @@ spec:
7171
configMapKeyRef:
7272
name: context-engine-config
7373
key: EMBEDDING_MODEL
74+
- name: EMBEDDING_PROVIDER
75+
valueFrom:
76+
configMapKeyRef:
77+
name: context-engine-config
78+
key: EMBEDDING_PROVIDER
79+
- name: EMBEDDING_SERVICE_URL
80+
valueFrom:
81+
configMapKeyRef:
82+
name: context-engine-config
83+
key: EMBEDDING_SERVICE_URL
84+
- name: INDEX_WORKERS
85+
valueFrom:
86+
configMapKeyRef:
87+
name: context-engine-config
88+
key: INDEX_WORKERS
7489
- name: HF_HOME
7590
value: /work/models/hf-cache
7691
- name: XDG_CACHE_HOME
@@ -209,6 +224,21 @@ spec:
209224
configMapKeyRef:
210225
name: context-engine-config
211226
key: EMBEDDING_MODEL
227+
- name: EMBEDDING_PROVIDER
228+
valueFrom:
229+
configMapKeyRef:
230+
name: context-engine-config
231+
key: EMBEDDING_PROVIDER
232+
- name: EMBEDDING_SERVICE_URL
233+
valueFrom:
234+
configMapKeyRef:
235+
name: context-engine-config
236+
key: EMBEDDING_SERVICE_URL
237+
- name: INDEX_WORKERS
238+
valueFrom:
239+
configMapKeyRef:
240+
name: context-engine-config
241+
key: INDEX_WORKERS
212242
- name: HF_HOME
213243
value: /work/models/hf-cache
214244
- name: XDG_CACHE_HOME

0 commit comments

Comments
 (0)