diff --git a/src/act/cli.ts b/src/act/cli.ts index 15bf2f73..e1036f43 100644 --- a/src/act/cli.ts +++ b/src/act/cli.ts @@ -65,6 +65,44 @@ export function registerActCommands(program: Command): void { } }) + act + .command('apply-model ') + .description('Apply the model default recommendation for a project') + .action(async (project: string) => { + try { + const { parseAllSessions, filterProjectsByName } = await import('../parser.js') + const { recommendModelDefault, buildApplyModelDefaultPlan } = await import('./model-defaults.js') + const { runAction } = await import('./apply.js') + const chalk = (await import('chalk')).default + + const projects = filterProjectsByName(await parseAllSessions(), [project]) + const p = projects[0] + if (!p) { + console.error(`Project "${project}" not found in session history.`) + process.exitCode = 1 + return + } + + const recommendation = recommendModelDefault(p) + if (!recommendation) { + console.error(`No default model recommendation available for ${project} at this time.`) + process.exitCode = 1 + return + } + + const plan = await buildApplyModelDefaultPlan(recommendation) + const record = await runAction(plan) + + console.log(`Applied default model ${chalk.green(recommendation.candidateModel)} for ${project}`) + console.log(chalk.dim(` Evidence: ${recommendation.candidateEditTurns} turns, ${(recommendation.candidateOneShotRate * 100).toFixed(1)}% one-shot, $${recommendation.candidateCostPerEdit.toFixed(3)}/edit`)) + console.log(chalk.dim(` Undo anytime: codeburn act undo ${shortId(record.id)}`)) + console.log(chalk.dim(` Per-session override: --model `)) + } catch (err) { + console.error(err instanceof Error ? err.message : String(err)) + process.exitCode = 1 + } + }) + act .command('report') .description('Realized vs estimated savings for applied actions older than 3 days') diff --git a/src/act/model-defaults.ts b/src/act/model-defaults.ts new file mode 100644 index 00000000..5103a982 --- /dev/null +++ b/src/act/model-defaults.ts @@ -0,0 +1,170 @@ +import { readFile } from 'node:fs/promises' +import { join } from 'node:path' + +import { aggregateModelStats, type ModelStats } from '../compare-stats.js' +import type { ProjectSummary } from '../types.js' +import { sha256File } from './backup.js' +import type { ActionPlan } from './types.js' + +const MIN_EDIT_TURNS = 30 +const MAX_COST_RATIO = 0.6 +const ONE_SHOT_TOLERANCE = 0.03 +const DEBUGGING_HEAVY_THRESHOLD = 0.4 +const RECENCY_DAYS = 14 +const MS_PER_DAY = 24 * 60 * 60 * 1000 + +export type ModelDefaultRecommendation = { + project: string + projectPath: string + currentModel: string + candidateModel: string + provider: string + currentEditTurns: number + candidateEditTurns: number + currentOneShotRate: number + candidateOneShotRate: number + currentCostPerEdit: number + candidateCostPerEdit: number + savingsPct: number + debuggingHeavy: boolean +} + +function oneShotRate(s: ModelStats): number { + return s.editTurns > 0 ? s.oneShotTurns / s.editTurns : 0 +} + +function costPerEdit(s: ModelStats): number { + return s.editTurns > 0 ? s.editCost / s.editTurns : Number.POSITIVE_INFINITY +} + +function isRecent(lastSeen: string, now: Date): boolean { + if (!lastSeen) return false + const seen = new Date(lastSeen) + if (Number.isNaN(seen.getTime())) return false + return now.getTime() - seen.getTime() <= RECENCY_DAYS * MS_PER_DAY +} + +function providerByModel(project: ProjectSummary): Map { + const providers = new Map() + for (const session of project.sessions) { + for (const turn of session.turns) { + const primary = turn.assistantCalls[0] + if (!primary || primary.model === '') continue + if (!providers.has(primary.model)) providers.set(primary.model, primary.provider) + for (const call of turn.assistantCalls) { + if (call.model === '') continue + if (!providers.has(call.model)) providers.set(call.model, call.provider) + } + } + } + return providers +} + +function isDebuggingHeavy(project: ProjectSummary): boolean { + let debuggingEditTurns = 0 + let totalEditTurns = 0 + for (const session of project.sessions) { + for (const breakdown of Object.values(session.categoryBreakdown)) { + totalEditTurns += breakdown.editTurns + } + debuggingEditTurns += session.categoryBreakdown.debugging?.editTurns ?? 0 + } + return totalEditTurns > 0 && debuggingEditTurns / totalEditTurns > DEBUGGING_HEAVY_THRESHOLD +} + +export function recommendModelDefault(project: ProjectSummary, opts: { now?: Date } = {}): ModelDefaultRecommendation | null { + const now = opts.now ?? new Date() + const stats = aggregateModelStats([project]) + .filter(s => s.model !== '' && s.editTurns >= MIN_EDIT_TURNS) + .sort((a, b) => b.editTurns - a.editTurns || b.editCost - a.editCost) + + const current = stats[0] + if (!current) return null + + const providers = providerByModel(project) + const provider = providers.get(current.model) + if (!provider || !isRecent(current.lastSeen, now)) return null + + const currentRate = oneShotRate(current) + const currentCost = costPerEdit(current) + if (!Number.isFinite(currentCost) || currentCost <= 0) return null + + const debuggingHeavy = isDebuggingHeavy(project) + const tolerance = debuggingHeavy ? 0 : ONE_SHOT_TOLERANCE + + const candidates = stats + .slice(1) + .filter(candidate => providers.get(candidate.model) === provider) + .filter(candidate => isRecent(candidate.lastSeen, now)) + .map(candidate => ({ + candidate, + candidateRate: oneShotRate(candidate), + candidateCost: costPerEdit(candidate), + })) + .filter(({ candidateRate }) => candidateRate >= currentRate - tolerance) + .filter(({ candidateCost }) => candidateCost <= currentCost * MAX_COST_RATIO) + .sort((a, b) => { + const savingsA = 1 - a.candidateCost / currentCost + const savingsB = 1 - b.candidateCost / currentCost + return savingsB - savingsA || b.candidateRate - a.candidateRate + }) + + const best = candidates[0] + if (!best) return null + + return { + project: project.project, + projectPath: project.projectPath, + currentModel: current.model, + candidateModel: best.candidate.model, + provider, + currentEditTurns: current.editTurns, + candidateEditTurns: best.candidate.editTurns, + currentOneShotRate: currentRate, + candidateOneShotRate: best.candidateRate, + currentCostPerEdit: currentCost, + candidateCostPerEdit: best.candidateCost, + savingsPct: (1 - best.candidateCost / currentCost) * 100, + debuggingHeavy, + } +} + +export async function buildApplyModelDefaultPlan(recommendation: ModelDefaultRecommendation): Promise { + const settingsPath = join(recommendation.projectPath, '.claude', 'settings.json') + let settings: Record = {} + let expectedHash: string | null = null + + try { + const raw = await readFile(settingsPath, 'utf-8') + expectedHash = await sha256File(settingsPath) + settings = JSON.parse(raw) as Record + if (!settings || Array.isArray(settings) || typeof settings !== 'object') settings = {} + } catch (err) { + const code = (err as NodeJS.ErrnoException).code + if (code !== 'ENOENT') throw err + } + + settings.model = recommendation.candidateModel + + return { + kind: 'model-default', + findingId: `model-default:${recommendation.project}`, + description: `Set Claude Code default model to ${recommendation.candidateModel} for ${recommendation.project}`, + changes: [{ + op: 'edit', + path: settingsPath, + content: JSON.stringify(settings, null, 2) + '\n', + expectedHash, + }], + baseline: { + windowDays: 30, + capturedAt: new Date().toISOString(), + estimatedTokens: 0, + sessions: recommendation.currentEditTurns + recommendation.candidateEditTurns, + metrics: { + [recommendation.candidateModel]: recommendation.candidateOneShotRate, + [recommendation.currentModel]: recommendation.currentOneShotRate, + }, + }, + } +} diff --git a/src/act/report.ts b/src/act/report.ts index af173d8e..72e350a9 100644 --- a/src/act/report.ts +++ b/src/act/report.ts @@ -284,6 +284,53 @@ async function guardRow( } } +async function modelDefaultRow( + base: ActReportRow, rec: ActionRecord, sessions: SessionSummary[], + baseline: ActionBaseline, afterStart: Date, now: Date, +): Promise { + const models = Object.keys(baseline.metrics) + if (models.length < 2) return { ...base, note: 'not measurable: invalid baseline' } + const candidateModel = models[0]! + const preApplyRate = baseline.metrics[candidateModel]! + + const mockProject: ProjectSummary = { + project: 'mock', + projectPath: 'mock', + totalCostUSD: 0, + totalSavingsUSD: 0, + totalApiCalls: 0, + totalProxiedCostUSD: 0, + sessions, + } + + const { aggregateModelStats } = await import('../compare-stats.js') + const stats = aggregateModelStats([mockProject]).find(s => s.model === candidateModel) + + if (!stats || stats.editTurns < 20) { + return { ...base, note: `not measurable: < 20 edit turns for ${candidateModel} since apply` } + } + + const postApplyRate = stats.oneShotTurns / stats.editTurns + + if (postApplyRate < preApplyRate - 0.05) { + return { + ...base, + status: 'measured', + realizedTokens: 0, + confidence: 'low', + note: `quality regression, consider undo: one-shot rate ${(preApplyRate * 100).toFixed(1)}% -> ${(postApplyRate * 100).toFixed(1)}%` + } + } + + return { + ...base, + status: 'measured', + realizedTokens: 0, + confidence: 'normal', + note: `correlation, not attribution: one-shot rate ${(preApplyRate * 100).toFixed(1)}% -> ${(postApplyRate * 100).toFixed(1)}%` + } +} + async function computeRow(rec: ActionRecord, sessions: SessionSummary[], afterStart: Date, now: Date, opts: ActReportOptions): Promise { const estimatedAtApply = rec.baseline?.estimatedTokens ?? 0 const base: ActReportRow = { @@ -307,6 +354,7 @@ async function computeRow(rec: ActionRecord, sessions: SessionSummary[], afterSt if (rec.kind === 'claude-md-rule') return readEditRow(base, sessions, baseline, afterStart, now) if (rec.kind === 'shell-config') return { ...base, note: 'not measurable: bash result token sizes are not retained in the summary' } if (rec.kind === 'guard-install') return guardRow(base, afterStart, now, baseline, opts) + if (rec.kind === 'model-default') return modelDefaultRow(base, rec, sessions, baseline, afterStart, now) return { ...base, note: 'not measurable: kind is not tracked by act report' } } diff --git a/src/compare.tsx b/src/compare.tsx index 2fd71d49..3b05f15d 100644 --- a/src/compare.tsx +++ b/src/compare.tsx @@ -8,6 +8,7 @@ import { parseAllSessions } from './parser.js' import { getAllProviders } from './providers/index.js' import type { ProjectSummary, DateRange } from './types.js' import { patchStdoutForWindows } from './ink-win.js' +import { recommendModelDefault, type ModelDefaultRecommendation } from './act/model-defaults.js' const ORANGE = '#FF8C42' const GREEN = '#5BF5A0' @@ -51,11 +52,12 @@ function barWidth(rate: number): number { type ModelSelectorProps = { models: ModelStats[] + recommendations: ModelDefaultRecommendation[] onSelect: (a: ModelStats, b: ModelStats) => void onBack: () => void } -function ModelSelector({ models, onSelect, onBack }: ModelSelectorProps) { +function ModelSelector({ models, recommendations, onSelect, onBack }: ModelSelectorProps) { const { exit } = useApp() const [cursor, setCursor] = useState(0) const [selected, setSelected] = useState>(new Set()) @@ -126,6 +128,26 @@ function ModelSelector({ models, onSelect, onBack }: ModelSelectorProps) { [esc] back [q] quit + + {recommendations.length > 0 && ( + + Model defaults recommendation + + {recommendations.map(rec => ( + + + {rec.project}: + {rec.currentModel} + {' -> '} + {rec.candidateModel} + + Current: {(rec.currentOneShotRate*100).toFixed(1)}% one-shot over {rec.currentEditTurns} edits, {formatCost(rec.currentCostPerEdit)}/edit + Candidate: {(rec.candidateOneShotRate*100).toFixed(1)}% one-shot over {rec.candidateEditTurns} edits, {formatCost(rec.candidateCostPerEdit)}/edit + To apply: codeburn act apply-model {rec.project} + + ))} + + )} ) } @@ -317,6 +339,14 @@ export function CompareView({ projects, onBack }: CompareViewProps) { const { exit } = useApp() const [phase, setPhase] = useState<'select' | 'loading' | 'results'>('select') const [models, setModels] = useState(() => aggregateModelStats(projects)) + const [recommendations, setRecommendations] = useState(() => { + const recs: ModelDefaultRecommendation[] = [] + for (const p of projects) { + const rec = recommendModelDefault(p) + if (rec) recs.push(rec) + } + return recs + }) const [pickedNames, setPickedNames] = useState<[string, string] | null>(null) const [selectedA, setSelectedA] = useState(null) const [selectedB, setSelectedB] = useState(null) @@ -331,6 +361,13 @@ export function CompareView({ projects, onBack }: CompareViewProps) { const newModels = aggregateModelStats(projects) setModels(newModels) + const recs: ModelDefaultRecommendation[] = [] + for (const p of projects) { + const rec = recommendModelDefault(p) + if (rec) recs.push(rec) + } + setRecommendations(recs) + if (!pickedNames) return const hasA = newModels.some(m => m.model === pickedNames[0]) const hasB = newModels.some(m => m.model === pickedNames[1]) @@ -460,6 +497,7 @@ export function CompareView({ projects, onBack }: CompareViewProps) { return ( diff --git a/src/optimize.ts b/src/optimize.ts index 8c59e0ec..998bc7b1 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -10,6 +10,7 @@ import { parseJsonlLine, shouldSkipLine } from './parser.js' import type { DateRange, ProjectSummary } from './types.js' import { formatCost } from './currency.js' import { formatTokens } from './format.js' +import { recommendModelDefault, type ModelDefaultRecommendation } from './act/model-defaults.js' // ============================================================================ // Display constants @@ -233,6 +234,7 @@ export type OptimizeResult = { costRate: number healthScore: number healthGrade: HealthGrade + modelRecommendations?: ModelDefaultRecommendation[] } export type OptimizeJsonReport = { @@ -263,6 +265,7 @@ export type OptimizeJsonReport = { estimatedSavingsUSD: number fix: WasteAction }> + modelRecommendations?: Array } export type ToolCall = { @@ -2370,7 +2373,7 @@ export async function scanAndDetect( dateRange?: DateRange, ): Promise { if (projects.length === 0) { - return { findings: [], costRate: 0, healthScore: 100, healthGrade: 'A' } + return { findings: [], costRate: 0, healthScore: 100, healthGrade: 'A', modelRecommendations: [] } } const key = cacheKey(projects, dateRange) @@ -2421,7 +2424,14 @@ export async function scanAndDetect( findings.sort((a, b) => urgencyScore(b) - urgencyScore(a)) const { score, grade } = computeHealth(findings) - const result: OptimizeResult = { findings, costRate, healthScore: score, healthGrade: grade } + + const modelRecommendations: ModelDefaultRecommendation[] = [] + for (const project of projects) { + const rec = recommendModelDefault(project, { now: dateRange?.end }) + if (rec) modelRecommendations.push(rec) + } + + const result: OptimizeResult = { findings, costRate, healthScore: score, healthGrade: grade, modelRecommendations } resultCache.set(key, { data: result, ts: Date.now() }) return result } @@ -2528,6 +2538,7 @@ function renderOptimize( healthGrade: HealthGrade, appliedHeader?: string, previouslyApplied?: Record, + modelRecommendations?: ModelDefaultRecommendation[], ): string { const lines: string[] = [] lines.push('') @@ -2573,6 +2584,19 @@ function renderOptimize( lines.push(chalk.hex(DIM)(' ' + SEP.repeat(PANEL_WIDTH))) lines.push(chalk.dim(' Estimates only.')) lines.push('') + + if (modelRecommendations && modelRecommendations.length > 0) { + lines.push(chalk.bold.hex(ORANGE)(' Model defaults recommendation')) + lines.push(chalk.hex(DIM)(' ' + SEP.repeat(PANEL_WIDTH))) + for (const rec of modelRecommendations) { + lines.push(` ${rec.project}: ${chalk.bold(rec.currentModel)} -> ${chalk.bold.hex(GREEN)(rec.candidateModel)}`) + lines.push(chalk.dim(` Current: ${(rec.currentOneShotRate*100).toFixed(1)}% one-shot over ${rec.currentEditTurns} edits, ${formatCost(rec.currentCostPerEdit)}/edit`)) + lines.push(chalk.dim(` Candidate: ${(rec.candidateOneShotRate*100).toFixed(1)}% one-shot over ${rec.candidateEditTurns} edits, ${formatCost(rec.candidateCostPerEdit)}/edit`)) + lines.push(` To apply: ${chalk.hex(CYAN)(`codeburn act apply-model ${rec.project}`)}`) + lines.push('') + } + } + return lines.join('\n') } @@ -2603,7 +2627,7 @@ export async function runOptimize( return } - const output = renderOptimize(findings, costRate, periodLabel, periodCost, sessions.length, callCount, healthScore, healthGrade, opts.appliedHeader, opts.previouslyApplied) + const output = renderOptimize(findings, costRate, periodLabel, periodCost, sessions.length, callCount, healthScore, healthGrade, opts.appliedHeader, opts.previouslyApplied, result.modelRecommendations) console.log(output) } @@ -2650,5 +2674,6 @@ export function buildOptimizeJsonReport( estimatedSavingsUSD: f.tokensSaved * result.costRate, fix: f.fix, })), + modelRecommendations: result.modelRecommendations, } } diff --git a/tests/act-model-defaults.test.ts b/tests/act-model-defaults.test.ts new file mode 100644 index 00000000..6de3651b --- /dev/null +++ b/tests/act-model-defaults.test.ts @@ -0,0 +1,243 @@ +import { mkdtemp, mkdir, readFile, rm, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { dirname, join } from 'node:path' +import { describe, expect, it } from 'vitest' + +import { runAction } from '../src/act/apply.js' +import { undoAction } from '../src/act/undo.js' +import { buildApplyModelDefaultPlan, recommendModelDefault } from '../src/act/model-defaults.js' +import type { ClassifiedTurn, ProjectSummary, SessionSummary, TaskCategory } from '../src/types.js' + +const NOW = new Date('2026-07-04T12:00:00.000Z') +const RECENT = '2026-07-03T12:00:00.000Z' +const OLD = '2026-06-18T12:00:00.000Z' + +function usage() { + return { + inputTokens: 100, + outputTokens: 50, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens: 0, + webSearchRequests: 0, + } +} + +function turn(opts: { + model: string + provider?: string + timestamp?: string + costUSD?: number + category?: TaskCategory + retries?: number + hasEdits?: boolean +}): ClassifiedTurn { + return { + userMessage: 'edit the code', + timestamp: opts.timestamp ?? RECENT, + sessionId: `session-${opts.model}-${opts.timestamp ?? RECENT}-${opts.retries ?? 0}`, + category: opts.category ?? 'feature', + retries: opts.retries ?? 0, + hasEdits: opts.hasEdits ?? true, + assistantCalls: [{ + provider: opts.provider ?? 'claude', + model: opts.model, + usage: usage(), + costUSD: opts.costUSD ?? 1, + tools: [], + mcpTools: [], + skills: [], + subagentTypes: [], + hasAgentSpawn: false, + hasPlanMode: false, + speed: 'standard', + timestamp: opts.timestamp ?? RECENT, + bashCommands: [], + deduplicationKey: `key-${opts.model}-${Math.random()}`, + }], + } +} + +function repeatTurns(count: number, opts: Parameters[0]): ClassifiedTurn[] { + return Array.from({ length: count }, (_, i) => turn({ ...opts, timestamp: opts.timestamp ?? `2026-07-03T12:${String(i).padStart(2, '0')}:00.000Z` })) +} + +function modelTurns(opts: { + model: string + provider?: string + editTurns: number + oneShotTurns: number + editCost: number + timestamp?: string + category?: TaskCategory +}): ClassifiedTurn[] { + const costPerEdit = opts.editCost / opts.editTurns + const oneShot = repeatTurns(opts.oneShotTurns, { + model: opts.model, + provider: opts.provider, + timestamp: opts.timestamp, + costUSD: costPerEdit, + retries: 0, + category: opts.category, + }) + const retried = repeatTurns(opts.editTurns - opts.oneShotTurns, { + model: opts.model, + provider: opts.provider, + timestamp: opts.timestamp, + costUSD: costPerEdit, + retries: 1, + category: opts.category, + }) + return [...oneShot, ...retried] +} + +function emptyCategoryBreakdown(): SessionSummary['categoryBreakdown'] { + return { + coding: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + debugging: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + feature: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + refactoring: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + testing: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + exploration: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + planning: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + delegation: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + git: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + 'build/deploy': { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + conversation: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + brainstorming: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + general: { turns: 0, costUSD: 0, savingsUSD: 0, retries: 0, editTurns: 0, oneShotTurns: 0 }, + } +} + +function projectWithTurns(turns: ClassifiedTurn[], opts: { project?: string; projectPath?: string; debuggingTurns?: number } = {}): ProjectSummary { + const categoryBreakdown = emptyCategoryBreakdown() + const totalTurns = turns.length + const debuggingTurns = opts.debuggingTurns ?? turns.filter(t => t.category === 'debugging').length + categoryBreakdown.debugging.turns = debuggingTurns + categoryBreakdown.debugging.editTurns = debuggingTurns + categoryBreakdown.feature.turns = Math.max(0, totalTurns - debuggingTurns) + categoryBreakdown.feature.editTurns = Math.max(0, totalTurns - debuggingTurns) + + return { + project: opts.project ?? 'demo-project', + projectPath: opts.projectPath ?? '/tmp/demo-project', + totalCostUSD: turns.reduce((sum, t) => sum + t.assistantCalls.reduce((s, c) => s + c.costUSD, 0), 0), + totalSavingsUSD: 0, + totalApiCalls: turns.reduce((sum, t) => sum + t.assistantCalls.length, 0), + totalProxiedCostUSD: 0, + sessions: [{ + sessionId: 'session-1', + project: opts.project ?? 'demo-project', + firstTimestamp: turns[0]?.timestamp ?? RECENT, + lastTimestamp: turns.at(-1)?.timestamp ?? RECENT, + totalCostUSD: turns.reduce((sum, t) => sum + t.assistantCalls.reduce((s, c) => s + c.costUSD, 0), 0), + totalSavingsUSD: 0, + totalInputTokens: 0, + totalOutputTokens: 0, + totalReasoningTokens: 0, + totalCacheReadTokens: 0, + totalCacheWriteTokens: 0, + apiCalls: turns.reduce((sum, t) => sum + t.assistantCalls.length, 0), + turns, + modelBreakdown: {}, + toolBreakdown: {}, + mcpBreakdown: {}, + bashBreakdown: {}, + categoryBreakdown, + skillBreakdown: {}, + subagentBreakdown: {}, + }], + } +} + +function recommendationProject(overrides: { + candidateEditTurns?: number + candidateOneShotTurns?: number + candidateEditCost?: number + candidateProvider?: string + candidateTimestamp?: string + debuggingTurns?: number +} = {}): ProjectSummary { + return projectWithTurns([ + ...modelTurns({ model: 'claude-sonnet-4-20250514', provider: 'claude', editTurns: 35, oneShotTurns: 32, editCost: 70 }), + ...modelTurns({ + model: 'claude-haiku-3-5-20241022', + provider: overrides.candidateProvider ?? 'claude', + editTurns: overrides.candidateEditTurns ?? 32, + oneShotTurns: overrides.candidateOneShotTurns ?? 29, + editCost: overrides.candidateEditCost ?? 30, + timestamp: overrides.candidateTimestamp, + }), + ], { debuggingTurns: overrides.debuggingTurns }) +} + +describe('model default recommendations', () => { + it('recommends a same-provider candidate with enough volume, recent data, similar quality, and <=60% cost per edit', () => { + const recommendation = recommendModelDefault(recommendationProject(), { now: NOW }) + + expect(recommendation).toMatchObject({ + project: 'demo-project', + currentModel: 'claude-sonnet-4-20250514', + candidateModel: 'claude-haiku-3-5-20241022', + provider: 'claude', + }) + expect(recommendation?.currentOneShotRate).toBeCloseTo(32 / 35, 5) + expect(recommendation?.candidateOneShotRate).toBeCloseTo(29 / 32, 5) + expect(recommendation?.savingsPct).toBeGreaterThan(50) + }) + + it('rejects candidates below the 30 edit-turn minimum', () => { + expect(recommendModelDefault(recommendationProject({ candidateEditTurns: 29, candidateOneShotTurns: 27 }), { now: NOW })).toBeNull() + }) + + it('rejects candidates more than 3pp below the current model one-shot rate', () => { + expect(recommendModelDefault(recommendationProject({ candidateOneShotTurns: 28 }), { now: NOW })).toBeNull() + }) + + it('rejects candidates that cost more than 60% of the current model per edit', () => { + expect(recommendModelDefault(recommendationProject({ candidateEditCost: 43 }), { now: NOW })).toBeNull() + }) + + it('rejects candidates last seen more than 14 days ago', () => { + expect(recommendModelDefault(recommendationProject({ candidateTimestamp: OLD }), { now: NOW })).toBeNull() + }) + + it('rejects cross-provider candidates in v1', () => { + expect(recommendModelDefault(recommendationProject({ candidateProvider: 'openai' }), { now: NOW })).toBeNull() + }) + + it('uses zero tolerance for debugging-heavy projects', () => { + const project = recommendationProject({ debuggingTurns: 40 }) + + expect(recommendModelDefault(project, { now: NOW })).toBeNull() + }) +}) + +describe('model default apply plan', () => { + it('writes only the model key while preserving existing Claude settings and journals model-default', async () => { + const dir = await mkdtemp(join(tmpdir(), 'codeburn-model-default-')) + const actionsDir = join(dir, '.codeburn-actions') + const projectPath = join(dir, 'project') + const settingsPath = join(projectPath, '.claude', 'settings.json') + const original = '{\n "enabledTools": ["Bash"],\n "model": "claude-sonnet-4-20250514"\n}\n' + + try { + await mkdir(dirname(settingsPath), { recursive: true }) + await writeFile(settingsPath, original, { encoding: 'utf-8' }) + const recommendation = recommendModelDefault(recommendationProject(), { now: NOW })! + const plan = await buildApplyModelDefaultPlan({ ...recommendation, projectPath }) + const record = await runAction(plan, actionsDir) + + const updated = JSON.parse(await readFile(settingsPath, 'utf-8')) + expect(updated).toEqual({ enabledTools: ['Bash'], model: 'claude-haiku-3-5-20241022' }) + expect(record.kind).toBe('model-default') + expect(record.findingId).toBe('model-default:demo-project') + + await undoAction({ id: record.id }, { actionsDir }) + expect(await readFile(settingsPath, 'utf-8')).toBe(original) + } finally { + await rm(dir, { recursive: true, force: true }) + } + }) +})