getagentseal · NihalJain · May 20, 2026
diff --git a/scripts/bundle-litellm.mjs b/scripts/bundle-litellm.mjs
@@ -9,6 +9,9 @@ const outPath = join(__dirname, '..', 'src', 'data', 'litellm-snapshot.json')
 const MANUAL_ENTRIES = {
   'MiniMax-M2.7':           [0.3e-6, 1.2e-6, 0.375e-6, 0.06e-6],
   'MiniMax-M2.7-highspeed': [0.6e-6, 2.4e-6, 0.375e-6, 0.06e-6],
+  // LiteLLM PR #27056 (not yet merged). Prices: https://api-docs.deepseek.com/quick_start/pricing
+  'deepseek-v4-flash':      [1.4e-7, 2.8e-7, 0, 2.8e-9],
+  'deepseek-v4-pro':        [1.74e-6, 3.48e-6, 0, 1.45e-8],
 }
 
 const res = await fetch(LITELLM_URL)

diff --git a/src/data/litellm-snapshot.json b/src/data/litellm-snapshot.json
diff --git a/src/models.ts b/src/models.ts
@@ -132,15 +132,27 @@ async function loadCachedPricing(): Promise<Map<string, ModelCosts> | null> {
 }
 
 export async function loadPricing(): Promise<void> {
+  // The bundled snapshot contains MANUAL_ENTRIES for models not yet in LiteLLM.
+  // These must survive regardless of which code path populates pricingCache below,
+  // so we load the snapshot once here and use it to fill in any gaps afterwards.
+  const snap = loadSnapshot()
+
   const cached = await loadCachedPricing()
   if (cached) {
+    for (const [k, v] of snap) {
+      if (!cached.has(k)) cached.set(k, v)
+    }
     pricingCache = cached
     sortedPricingKeys = null
     return
   }
 
   try {
-    pricingCache = await fetchAndCachePricing()
+    const fetched = await fetchAndCachePricing()
+    for (const [k, v] of snap) {
+      if (!fetched.has(k)) fetched.set(k, v)
+    }
+    pricingCache = fetched
     sortedPricingKeys = null
   } catch {
     // snapshot already loaded at init; nothing more to do
@@ -421,6 +433,8 @@ const SHORT_NAMES: Record<string, string> = {
   'kimi-k2': 'Kimi K2',
   'kimi-latest': 'Kimi Latest',
   'moonshot-v1': 'Moonshot v1',
+  'deepseek-v4-pro': 'DeepSeek v4 Pro',
+  'deepseek-v4-flash': 'DeepSeek v4 Flash',
   'deepseek-coder-max': 'DeepSeek Coder Max',
   'deepseek-coder': 'DeepSeek Coder',
   'deepseek-r1': 'DeepSeek R1',

diff --git a/src/providers/claude.ts b/src/providers/claude.ts
@@ -17,6 +17,8 @@ const shortNames: Record<string, string> = {
   'claude-3-5-sonnet': 'Sonnet 3.5',
   'claude-haiku-4-5': 'Haiku 4.5',
   'claude-3-5-haiku': 'Haiku 3.5',
+  'deepseek-v4-pro': 'DeepSeek v4 Pro',
+  'deepseek-v4-flash': 'DeepSeek v4 Flash',
 }
 
 function expandHome(p: string): string {

diff --git a/tests/models.test.ts b/tests/models.test.ts
@@ -260,3 +260,60 @@ describe('Cursor model variants resolve to pricing', () => {
     })
   }
 })
+
+// Regression: DeepSeek v4 models were priced at $0 because (a) the snapshot
+// lacked entries and (b) loadPricing() unconditionally replaced pricingCache
+// with the 24-hour runtime cache file, dropping MANUAL_ENTRIES from the
+// bundled snapshot.  The fix merges snapshot entries back after any cache
+// load so manually-added models survive regardless of cache state.
+describe('DeepSeek v4 models resolve to correct pricing', () => {
+  it('deepseek-v4-pro has non-zero pricing', () => {
+    const costs = getModelCosts('deepseek-v4-pro')
+    expect(costs).not.toBeNull()
+    expect(costs!.inputCostPerToken).toBe(1.74e-6)
+    expect(costs!.outputCostPerToken).toBe(3.48e-6)
+    expect(costs!.cacheReadCostPerToken).toBe(1.45e-8)
+    expect(costs!.cacheWriteCostPerToken).toBe(0)
+  })
+
+  it('deepseek-v4-flash has non-zero pricing', () => {
+    const costs = getModelCosts('deepseek-v4-flash')
+    expect(costs).not.toBeNull()
+    expect(costs!.inputCostPerToken).toBe(1.4e-7)
+    expect(costs!.outputCostPerToken).toBe(2.8e-7)
+    expect(costs!.cacheReadCostPerToken).toBe(2.8e-9)
+    expect(costs!.cacheWriteCostPerToken).toBe(0)
+  })
+
+  it('deepseek/deepseek-v4-pro (provider-prefixed) resolves to same pricing', () => {
+    expect(getModelCosts('deepseek/deepseek-v4-pro')).toEqual(getModelCosts('deepseek-v4-pro'))
+  })
+
+  it('deepseek/deepseek-v4-flash (provider-prefixed) resolves to same pricing', () => {
+    expect(getModelCosts('deepseek/deepseek-v4-flash')).toEqual(getModelCosts('deepseek-v4-flash'))
+  })
+
+  it('calculateCost is non-zero for deepseek-v4-pro with observed token counts', () => {
+    // Observed from production: 2.5M input, 763K output, 258M cache-reads
+    const cost = calculateCost('deepseek-v4-pro', 2_477_914, 762_994, 0, 258_556_928, 0)
+    // Expected: ~$4.31 input + ~$2.66 output + ~$3.75 cache-read ≈ $10.72
+    expect(cost).toBeCloseTo(10.72, 0)
+  })
+
+  it('calculateCost is non-zero for deepseek-v4-flash with observed token counts', () => {
+    // Observed from production: 1.6M input, 354K output, 48M cache-reads
+    const cost = calculateCost('deepseek-v4-flash', 1_552_573, 353_914, 0, 48_388_608, 0)
+    // Expected: ~$0.22 input + ~$0.10 output + ~$0.14 cache-read ≈ $0.45
+    expect(cost).toBeCloseTo(0.45, 1)
+  })
+})
+
+describe('DeepSeek v4 display names', () => {
+  it('deepseek-v4-pro -> DeepSeek v4 Pro', () => {
+    expect(getShortModelName('deepseek-v4-pro')).toBe('DeepSeek v4 Pro')
+  })
+
+  it('deepseek-v4-flash -> DeepSeek v4 Flash', () => {
+    expect(getShortModelName('deepseek-v4-flash')).toBe('DeepSeek v4 Flash')
+  })
+})