Skip to content

Commit a87916b

Browse files
authored
Update app
1 parent 8658e12 commit a87916b

4 files changed

Lines changed: 507 additions & 69 deletions

File tree

src/components/GpuCalculator.astro

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ const iTarget = `<svg width="24" height="24" viewBox="0 0 24 24" fill="none" str
2222
<label for="sel-model">Model Preset <span class="info-icon" data-tooltip="Pre-fills the architecture details below based on popular open-source models." set:html={iInfo}></span></label>
2323
<select id="sel-model">
2424
{models.map((m, i) => (
25-
<option value={i}>{m.name}{m.params > 0 ? ` (${m.params}B)` : ''}</option>
25+
<option value={i}>{m.name}</option>
2626
))}
2727
</select>
2828
</div>
@@ -364,13 +364,14 @@ const iTarget = `<svg width="24" height="24" viewBox="0 0 24 24" fill="none" str
364364
const m = models[parseInt(target.value)];
365365
if (m && m.params > 0) {
366366
(getEl('inp-params') as HTMLInputElement).value = String(m.params);
367-
(getEl('inp-active-params') as HTMLInputElement).value = String(m.experts > 0 ? Math.round(m.params * (0.1 + 0.9 * (m.activeExperts/m.experts))) : m.params);
367+
(getEl('inp-active-params') as HTMLInputElement).value = String(m.activeParams);
368368
(getEl('inp-layers') as HTMLInputElement).value = String(m.layers);
369369
(getEl('inp-hidden') as HTMLInputElement).value = String(m.hidden);
370370
(getEl('inp-intermediate') as HTMLInputElement).value = String(m.intermediate);
371371
(getEl('inp-num-heads') as HTMLInputElement).value = String(m.numHeads);
372372
(getEl('inp-kv-heads') as HTMLInputElement).value = String(m.numKvHeads);
373373
(getEl('inp-head-dim') as HTMLInputElement).value = String(m.headDim);
374+
(getEl('sel-modality') as HTMLSelectElement).value = m.modality ? String(m.modality) : "1.0";
374375
}
375376
updateAttnInfo();
376377
calculate();

src/components/VectorDbPlanner.astro

Lines changed: 96 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ const iClipboard = `<svg width="24" height="24" viewBox="0 0 24 24" fill="none"
5858
<label for="vdb-metadata">Avg Metadata/Text per Vector (KB) <span class="info-icon" data-tooltip="Size of additional JSON metadata and raw text stored alongside each vector." set:html={iInfo}></span></label>
5959
<input type="number" id="vdb-metadata" min="0" step="0.5" value="2.0" />
6060
</div>
61+
<div class="field">
62+
<label for="vdb-index">Index Type <span class="info-icon" data-tooltip="Algorithm used to structure vectors. HNSW is fast but uses more RAM. FLAT uses no extra RAM but is slow." set:html={iInfo}></span></label>
63+
<select id="vdb-index">
64+
<option value="HNSW" selected>HNSW (High Speed, High RAM)</option>
65+
<option value="FLAT">FLAT / Exact (No RAM overhead, Slow)</option>
66+
<option value="IVF_FLAT">IVF_FLAT (Low RAM overhead)</option>
67+
<option value="IVF_PQ">IVF_PQ (High Compression, Low RAM)</option>
68+
</select>
69+
</div>
6170
</div>
6271
</div>
6372

@@ -172,7 +181,7 @@ const iClipboard = `<svg width="24" height="24" viewBox="0 0 24 24" fill="none"
172181
</div>
173182
</div>
174183

175-
<style>
184+
<style is:global>
176185
.info-icon {
177186
display: inline-flex;
178187
align-items: center;
@@ -234,9 +243,9 @@ const iClipboard = `<svg width="24" height="24" viewBox="0 0 24 24" fill="none"
234243

235244
<script>
236245
const dbs = JSON.parse(document.getElementById('__vdb_data')!.textContent!);
237-
238246
const svgCheck = `<svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="var(--accent2)" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg>`;
239247
const svgX = `<svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="var(--accent3)" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg>`;
248+
const svgInfo = `<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"></circle><line x1="12" y1="16" x2="12" y2="12"></line><line x1="12" y1="8" x2="12.01" y2="8"></line></svg>`;
240249

241250
(window as any).selectVdb = function(dbName: string, ramNeeded: number, coresNeeded: number, diskNeeded: number) {
242251
const plan = JSON.parse(localStorage.getItem('ai_infra_plan') || '{}');
@@ -259,7 +268,6 @@ const iClipboard = `<svg width="24" height="24" viewBox="0 0 24 24" fill="none"
259268
toast.style.opacity = '1';
260269
toast.style.transform = 'translateY(0)';
261270
});
262-
263271
setTimeout(() => {
264272
window.location.href = import.meta.env.BASE_URL + '/base-machine';
265273
}, 1200);
@@ -275,52 +283,105 @@ const iClipboard = `<svg width="24" height="24" viewBox="0 0 24 24" fill="none"
275283
const ha = parseInt((document.getElementById('vdb-ha') as HTMLSelectElement).value);
276284
const bytesPerDim = parseInt((document.getElementById('vdb-quant') as HTMLSelectElement).value);
277285
const metaKb = parseFloat((document.getElementById('vdb-metadata') as HTMLInputElement).value);
286+
const indexType = (document.getElementById('vdb-index') as HTMLSelectElement).value;
278287

279288
const haFactor = ha ? 2 : 1;
280289
const container = document.getElementById('vdb-results')!;
281290
container.innerHTML = '';
282291

283292
for (const db of dbs) {
284-
285-
const actualRawBytesPerM = dims * bytesPerDim * 1e6;
286-
const actualRawGibPerM = actualRawBytesPerM / (1024 ** 3);
287-
const metaGibPerM = (metaKb * 1024 * 1e6) / (1024 ** 3);
288-
const base768Gib = (768 * 4 * 1e6) / (1024 ** 3);
289-
const dbRamOverheadGibPerM = Math.max(0, db.ramPerMillionVecs768Gb - base768Gib);
290-
291-
const ramNeeded = ((actualRawGibPerM + dbRamOverheadGibPerM + (metaGibPerM * 0.1)) * millions * replicas * haFactor).toFixed(1);
292-
293-
const actualRawGbPerM = actualRawBytesPerM / 1e9;
294-
const metaGbPerM = (metaKb * 1024 * 1e6) / 1e9;
295-
const base768Gb = (768 * 4 * 1e6) / 1e9;
296-
const dbDiskOverheadGbPerM = Math.max(0, db.diskPerMillionVecs768Gb - base768Gb);
297-
298-
const diskNeeded = ((actualRawGbPerM + dbDiskOverheadGbPerM + metaGbPerM) * millions * replicas * haFactor).toFixed(1);
299-
300-
const coresNeeded = Math.max(db.minCores, Math.ceil(db.recCores * (qps / 200) * replicas * haFactor));
301-
const minRam = Math.max(db.minRamGb, Math.ceil(parseFloat(ramNeeded) * 1.15));
302-
293+
// 1. Calculate Raw Vector Size (Dims * Precision * Vector Count)
294+
const rawVectorBytesPerM = dims * bytesPerDim * 1e6;
295+
let rawVectorGbPerM = rawVectorBytesPerM / (1024 ** 3);
296+
297+
// 2. Extract the HNSW Graph Overhead from the baseline in vectordbs.ts
298+
// (Baseline assumes 768 dims at FP32)
299+
const base768RawGb = (768 * 4 * 1e6) / (1024 ** 3);
300+
let dbRamOverheadGbPerM = Math.max(0, db.ramPerMillionVecs768Gb - base768RawGb);
301+
let dbDiskOverheadGbPerM = Math.max(0, db.diskPerMillionVecs768Gb - base768RawGb);
302+
303+
// 3. Apply Index Strategy Modifiers
304+
let isSupported = true;
305+
306+
switch(indexType) {
307+
case 'HNSW':
308+
isSupported = db.indexTypes.some((t: string) => t.includes('HNSW'));
309+
break;
310+
case 'FLAT':
311+
// FLAT search requires zero index overhead
312+
dbRamOverheadGbPerM = 0;
313+
dbDiskOverheadGbPerM = 0;
314+
// Most DBs support exact/flat search natively or via M=0
315+
isSupported = db.indexTypes.some((t: string) => t.includes('FLAT')) || ['Qdrant', 'Chroma', 'pgvector'].includes(db.name);
316+
break;
317+
case 'IVF_FLAT':
318+
// IVF drastically reduces memory overhead (just storing cluster centroids)
319+
dbRamOverheadGbPerM *= 0.05;
320+
dbDiskOverheadGbPerM *= 0.05;
321+
isSupported = db.indexTypes.some((t: string) => t.includes('IVF_FLAT'));
322+
break;
323+
case 'IVF_PQ':
324+
// Product Quantization shrinks index overhead AND compresses raw vectors
325+
dbRamOverheadGbPerM *= 0.05;
326+
dbDiskOverheadGbPerM *= 0.05;
327+
rawVectorGbPerM *= 0.25; // Roughly 4x compression of the vector payloads
328+
isSupported = db.indexTypes.some((t: string) => t.includes('IVF_PQ'));
329+
break;
330+
}
331+
332+
// 4. Calculate Metadata (Fixing previous unit conversion logic)
333+
// KB -> Bytes -> GB
334+
const metaGbPerM = (metaKb * 1024 * 1e6) / (1024 ** 3);
335+
336+
// 5. Final Footprint Totals
337+
const ramNeededRaw = (rawVectorGbPerM + dbRamOverheadGbPerM + metaGbPerM) * millions * replicas * haFactor;
338+
const ramNeeded = Math.max(0, ramNeededRaw).toFixed(1);
339+
340+
const diskNeededRaw = (rawVectorGbPerM + dbDiskOverheadGbPerM + metaGbPerM) * millions * replicas * haFactor;
341+
const diskNeeded = Math.max(0, diskNeededRaw).toFixed(1);
342+
343+
const coresNeeded = Math.max(db.minCores, Math.ceil(db.recCores * (qps / 250) * replicas * haFactor));
344+
const minRam = Math.max(db.minRamGb, Math.ceil(parseFloat(ramNeeded) * 1.15));
345+
346+
// 6. Suitability & UI Flags
303347
let suitClass = 'badge-green';
304348
let suitText = 'Recommended';
305-
306-
if (millions > 100 && !db.distributed) { suitClass = 'badge-red'; suitText = 'Not distributed'; }
307-
else if (millions > 50 && db.name === 'Chroma') { suitClass = 'badge-red'; suitText = 'Over capacity'; }
308-
else if (qps > 500 && !db.distributed) { suitClass = 'badge-purple'; suitText = 'May struggle'; }
349+
let disableBtn = false;
350+
351+
if (!isSupported) {
352+
suitClass = 'badge-red';
353+
suitText = 'Index Not Supported';
354+
disableBtn = true;
355+
} else if (millions > 100 && !db.distributed) {
356+
suitClass = 'badge-red';
357+
suitText = 'Not Recommended';
358+
} else if (millions > 50 && db.name === 'Chroma') {
359+
suitClass = 'badge-red';
360+
suitText = 'Over Capacity';
361+
} else if (qps > 500 && !db.distributed) {
362+
suitClass = 'badge-purple';
363+
suitText = 'May Struggle';
364+
}
309365

310366
const pct = Math.min(100, (parseFloat(ramNeeded) / minRam) * 100);
311367
const barColor = pct > 85 ? 'var(--accent3)' : 'var(--accent2)';
312368

369+
// Render Card
313370
const card = document.createElement('div');
314371
card.className = 'card';
372+
card.style.opacity = isSupported ? '1' : '0.5'; // Dim card if index isn't supported
373+
315374
card.innerHTML = `
316375
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:.8rem">
317376
<h3>${db.name}</h3>
318377
<span class="badge ${suitClass}">${suitText}</span>
319378
</div>
320-
<div class="stat"><span class="label">RAM Needed</span><span class="value">${minRam} GB</span></div>
321-
<div class="stat"><span class="label">Index + Data on Disk</span><span class="value">${diskNeeded} GB</span></div>
322-
<div class="stat"><span class="label">CPU Cores</span><span class="value">${coresNeeded} vCPU</span></div>
323-
<div class="stat"><span class="label">RAM for Vectors</span><span class="value">${ramNeeded} GB</span></div>
379+
380+
<div class="stat"><span class="label">RAM Needed <span class="info-icon" data-tooltip="Total recommended system RAM, including an extra 15% safety buffer for OS and database operations.">${svgInfo}</span></span><span class="value">${minRam} GB</span></div>
381+
<div class="stat"><span class="label">Index + Data on Disk <span class="info-icon" data-tooltip="Storage space required for the raw vectors, metadata, and the structural index on disk.">${svgInfo}</span></span><span class="value">${diskNeeded} GB</span></div>
382+
<div class="stat"><span class="label">CPU Cores <span class="info-icon" data-tooltip="Estimated vCPUs needed to handle the target QPS (Queries Per Second) and indexing workload.">${svgInfo}</span></span><span class="value">${coresNeeded} vCPU</span></div>
383+
<div class="stat"><span class="label">RAM for Vectors (Raw) <span class="info-icon" data-tooltip="Exact memory footprint required just for the vectors, metadata, and the active index structure in memory.">${svgInfo}</span></span><span class="value">${ramNeeded} GB</span></div>
384+
324385
<div style="margin-top:.8rem">
325386
<div style="display:flex;justify-content:space-between;font-size:.82rem;color:var(--muted)">
326387
<span>RAM utilization</span><span>${ramNeeded} / ${minRam} GB</span>
@@ -329,10 +390,13 @@ const iClipboard = `<svg width="24" height="24" viewBox="0 0 24 24" fill="none"
329390
<div class="progress-fill" style="width: ${pct}%; background: ${barColor}"></div>
330391
</div>
331392
</div>
393+
332394
<p style="margin-top:.8rem;font-size:.82rem;color:var(--muted)">${db.notes}</p>
333-
<button class="btn btn-primary" style="margin-top:1rem; width:100%; justify-content:center;"
395+
396+
<button class="btn btn-primary" style="margin-top:1rem; width:100%; justify-content:center;"
397+
${disableBtn ? 'disabled style="background:var(--border);color:var(--muted);cursor:not-allowed;"' : ''}
334398
onclick="window.selectVdb('${db.name}', ${minRam}, ${coresNeeded}, ${diskNeeded})">
335-
Add to Plan
399+
${disableBtn ? 'Unavailable' : 'Add to Plan'}
336400
</button>
337401
`;
338402
container.appendChild(card);

0 commit comments

Comments
 (0)