Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scripts/bundle-litellm.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ const outPath = join(__dirname, '..', 'src', 'data', 'litellm-snapshot.json')
const MANUAL_ENTRIES = {
'MiniMax-M2.7': [0.3e-6, 1.2e-6, 0.375e-6, 0.06e-6],
'MiniMax-M2.7-highspeed': [0.6e-6, 2.4e-6, 0.375e-6, 0.06e-6],
// LiteLLM PR #27056 (not yet merged). Prices: https://api-docs.deepseek.com/quick_start/pricing
'deepseek-v4-flash': [1.4e-7, 2.8e-7, 0, 2.8e-9],
'deepseek-v4-pro': [1.74e-6, 3.48e-6, 0, 1.45e-8],
}

const res = await fetch(LITELLM_URL)
Expand Down
2 changes: 1 addition & 1 deletion src/data/litellm-snapshot.json

Large diffs are not rendered by default.

16 changes: 15 additions & 1 deletion src/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,27 @@ async function loadCachedPricing(): Promise<Map<string, ModelCosts> | null> {
}

export async function loadPricing(): Promise<void> {
// The bundled snapshot contains MANUAL_ENTRIES for models not yet in LiteLLM.
// These must survive regardless of which code path populates pricingCache below,
// so we load the snapshot once here and use it to fill in any gaps afterwards.
const snap = loadSnapshot()

const cached = await loadCachedPricing()
if (cached) {
for (const [k, v] of snap) {
if (!cached.has(k)) cached.set(k, v)
}
pricingCache = cached
sortedPricingKeys = null
return
}

try {
pricingCache = await fetchAndCachePricing()
const fetched = await fetchAndCachePricing()
for (const [k, v] of snap) {
if (!fetched.has(k)) fetched.set(k, v)
}
pricingCache = fetched
sortedPricingKeys = null
} catch {
// snapshot already loaded at init; nothing more to do
Expand Down Expand Up @@ -421,6 +433,8 @@ const SHORT_NAMES: Record<string, string> = {
'kimi-k2': 'Kimi K2',
'kimi-latest': 'Kimi Latest',
'moonshot-v1': 'Moonshot v1',
'deepseek-v4-pro': 'DeepSeek v4 Pro',
'deepseek-v4-flash': 'DeepSeek v4 Flash',
'deepseek-coder-max': 'DeepSeek Coder Max',
'deepseek-coder': 'DeepSeek Coder',
'deepseek-r1': 'DeepSeek R1',
Expand Down
2 changes: 2 additions & 0 deletions src/providers/claude.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ const shortNames: Record<string, string> = {
'claude-3-5-sonnet': 'Sonnet 3.5',
'claude-haiku-4-5': 'Haiku 4.5',
'claude-3-5-haiku': 'Haiku 3.5',
'deepseek-v4-pro': 'DeepSeek v4 Pro',
'deepseek-v4-flash': 'DeepSeek v4 Flash',
}

function expandHome(p: string): string {
Expand Down
57 changes: 57 additions & 0 deletions tests/models.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -260,3 +260,60 @@ describe('Cursor model variants resolve to pricing', () => {
})
}
})

// Regression: DeepSeek v4 models were priced at $0 because (a) the snapshot
// lacked entries and (b) loadPricing() unconditionally replaced pricingCache
// with the 24-hour runtime cache file, dropping MANUAL_ENTRIES from the
// bundled snapshot. The fix merges snapshot entries back after any cache
// load so manually-added models survive regardless of cache state.
describe('DeepSeek v4 models resolve to correct pricing', () => {
it('deepseek-v4-pro has non-zero pricing', () => {
const costs = getModelCosts('deepseek-v4-pro')
expect(costs).not.toBeNull()
expect(costs!.inputCostPerToken).toBe(1.74e-6)
expect(costs!.outputCostPerToken).toBe(3.48e-6)
expect(costs!.cacheReadCostPerToken).toBe(1.45e-8)
expect(costs!.cacheWriteCostPerToken).toBe(0)
})

it('deepseek-v4-flash has non-zero pricing', () => {
const costs = getModelCosts('deepseek-v4-flash')
expect(costs).not.toBeNull()
expect(costs!.inputCostPerToken).toBe(1.4e-7)
expect(costs!.outputCostPerToken).toBe(2.8e-7)
expect(costs!.cacheReadCostPerToken).toBe(2.8e-9)
expect(costs!.cacheWriteCostPerToken).toBe(0)
})

it('deepseek/deepseek-v4-pro (provider-prefixed) resolves to same pricing', () => {
expect(getModelCosts('deepseek/deepseek-v4-pro')).toEqual(getModelCosts('deepseek-v4-pro'))
})

it('deepseek/deepseek-v4-flash (provider-prefixed) resolves to same pricing', () => {
expect(getModelCosts('deepseek/deepseek-v4-flash')).toEqual(getModelCosts('deepseek-v4-flash'))
})

it('calculateCost is non-zero for deepseek-v4-pro with observed token counts', () => {
// Observed from production: 2.5M input, 763K output, 258M cache-reads
const cost = calculateCost('deepseek-v4-pro', 2_477_914, 762_994, 0, 258_556_928, 0)
// Expected: ~$4.31 input + ~$2.66 output + ~$3.75 cache-read ≈ $10.72
expect(cost).toBeCloseTo(10.72, 0)
})

it('calculateCost is non-zero for deepseek-v4-flash with observed token counts', () => {
// Observed from production: 1.6M input, 354K output, 48M cache-reads
const cost = calculateCost('deepseek-v4-flash', 1_552_573, 353_914, 0, 48_388_608, 0)
// Expected: ~$0.22 input + ~$0.10 output + ~$0.14 cache-read ≈ $0.45
expect(cost).toBeCloseTo(0.45, 1)
})
})

describe('DeepSeek v4 display names', () => {
it('deepseek-v4-pro -> DeepSeek v4 Pro', () => {
expect(getShortModelName('deepseek-v4-pro')).toBe('DeepSeek v4 Pro')
})

it('deepseek-v4-flash -> DeepSeek v4 Flash', () => {
expect(getShortModelName('deepseek-v4-flash')).toBe('DeepSeek v4 Flash')
})
})
Loading