From a7236a8a3caa6f0755319d92b1df12672a167e4b Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Tue, 26 May 2026 06:15:32 -0600 Subject: [PATCH] feat(1.5.0)!: shed legacy KB-optimization orchestrator; campaign-native release report MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit agent-eval 0.42.0 deleted the multi-shot orchestration. agent-knowledge's optimization.ts was a thin wrapper over the deleted runMultiShotOptimization; release.ts consumed its MultiShotOptimizationResult shape + the deleted releaseTraceEvidenceFromMultiShotTrials. BREAKING: remove optimization.ts (runKnowledgeBaseOptimization + KnowledgeBase* types) — consumers call the agent-eval campaign substrate (runImprovementLoop / gepaDriver) directly now. Rewrite release.ts: knowledgeReleaseReport takes campaign-native inputs (candidate/baseline RunRecord[] + ReleaseTraceEvidence + gate decision) and folds them into a ReleaseConfidenceScorecard + KnowledgeRelease — decoupled from any optimizer result shape. Only agent-builder used the removed KB-opt API; knowledge features (the other 7 agent-eval importers) use kept symbols and are unchanged. Bump agent-eval ^0.42.0 + agent-runtime ^0.25.0. This is the permanent fix for the scoped-pin trap consumers needed (agent-knowledge no longer imports any deleted symbol). typecheck + build + 99 tests green. --- package.json | 6 +-- pnpm-lock.yaml | 72 ++++++++++++++++++-------------- src/index.ts | 1 - src/optimization.ts | 44 -------------------- src/release.ts | 85 +++++++++++++++++++------------------- tests/optimization.test.ts | 65 ----------------------------- 6 files changed, 86 insertions(+), 187 deletions(-) delete mode 100644 src/optimization.ts delete mode 100644 tests/optimization.test.ts diff --git a/package.json b/package.json index cc3667f..6695285 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@tangle-network/agent-knowledge", - "version": "1.4.0", + "version": "1.5.0", "description": "Source-grounded, eval-gated knowledge growth primitives for agents.", "homepage": "https://github.com/tangle-network/agent-knowledge#readme", "repository": { @@ -63,8 +63,8 @@ "format": "biome format --write src tests" }, "dependencies": { - "@tangle-network/agent-eval": "^0.29.1", - "@tangle-network/agent-runtime": "^0.19.0", + "@tangle-network/agent-eval": "^0.42.0", + "@tangle-network/agent-runtime": "^0.25.0", "@tangle-network/sandbox": "^0.2.1", "zod": "^4.3.6" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 68ea7a1..92ba7ab 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,11 +9,11 @@ importers: .: dependencies: '@tangle-network/agent-eval': - specifier: ^0.29.1 - version: 0.29.1(typescript@5.9.3) + specifier: ^0.42.0 + version: 0.42.0(@tangle-network/agent-runtime@0.25.0(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3))(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3) '@tangle-network/agent-runtime': - specifier: ^0.19.0 - version: 0.19.0(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3) + specifier: ^0.25.0 + version: 0.25.0(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3) '@tangle-network/sandbox': specifier: ^0.2.1 version: 0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)) @@ -78,28 +78,24 @@ packages: engines: {node: '>=14.21.3'} cpu: [arm64] os: [linux] - libc: [musl] '@biomejs/cli-linux-arm64@2.4.15': resolution: {integrity: sha512-owaAMZD/T4LrD0ELNCk0Km3qrRHuM0X6EAyVE1FSqGY0rbLoiDLrO4Us2tllm6cAeB2Ioa9C2C08NZPdr8+0Ug==} engines: {node: '>=14.21.3'} cpu: [arm64] os: [linux] - libc: [glibc] '@biomejs/cli-linux-x64-musl@2.4.15': resolution: {integrity: sha512-CNq/9W38SYSH023lfcQ4KKU8K0YX8T//FZUhcgtMMRABDojx5XsMV7jlweAvGSl389wJQB29Qo6Zb/a+jdvt+w==} engines: {node: '>=14.21.3'} cpu: [x64] os: [linux] - libc: [musl] '@biomejs/cli-linux-x64@2.4.15': resolution: {integrity: sha512-0jj7THz12GbUOLmMibktK6DZjqz2zV64KFxyBtcFTKPiiOIY0a7vns1elpO1dERvxpsZ5ik0oFfz0oGwFde1+g==} engines: {node: '>=14.21.3'} cpu: [x64] os: [linux] - libc: [glibc] '@biomejs/cli-win32-arm64@2.4.15': resolution: {integrity: sha512-ouhkYdlhp/1GghEJPdWwD/Vi3gQ1nFxuSpMolWsbq3Lsq3QUR4jl6UdhhscdCugKU5vOEuMiJhvKj66O0OCq+w==} @@ -346,79 +342,66 @@ packages: resolution: {integrity: sha512-2QxQrM+KQ7DAW4o22j+XZ6RKdxjLD7BOWTP0Bv0tmjdyhXSsr2Ul1oJDQqh9Zf5qOwTuTc7Ek83mOFaKnodPjg==} cpu: [arm] os: [linux] - libc: [glibc] '@rollup/rollup-linux-arm-musleabihf@4.60.2': resolution: {integrity: sha512-TbziEu2DVsTEOPif2mKWkMeDMLoYjx95oESa9fkQQK7r/Orta0gnkcDpzwufEcAO2BLBsD7mZkXGFqEdMRRwfw==} cpu: [arm] os: [linux] - libc: [musl] '@rollup/rollup-linux-arm64-gnu@4.60.2': resolution: {integrity: sha512-bO/rVDiDUuM2YfuCUwZ1t1cP+/yqjqz+Xf2VtkdppefuOFS2OSeAfgafaHNkFn0t02hEyXngZkxtGqXcXwO8Rg==} cpu: [arm64] os: [linux] - libc: [glibc] '@rollup/rollup-linux-arm64-musl@4.60.2': resolution: {integrity: sha512-hr26p7e93Rl0Za+JwW7EAnwAvKkehh12BU1Llm9Ykiibg4uIr2rbpxG9WCf56GuvidlTG9KiiQT/TXT1yAWxTA==} cpu: [arm64] os: [linux] - libc: [musl] '@rollup/rollup-linux-loong64-gnu@4.60.2': resolution: {integrity: sha512-pOjB/uSIyDt+ow3k/RcLvUAOGpysT2phDn7TTUB3n75SlIgZzM6NKAqlErPhoFU+npgY3/n+2HYIQVbF70P9/A==} cpu: [loong64] os: [linux] - libc: [glibc] '@rollup/rollup-linux-loong64-musl@4.60.2': resolution: {integrity: sha512-2/w+q8jszv9Ww1c+6uJT3OwqhdmGP2/4T17cu8WuwyUuuaCDDJ2ojdyYwZzCxx0GcsZBhzi3HmH+J5pZNXnd+Q==} cpu: [loong64] os: [linux] - libc: [musl] '@rollup/rollup-linux-ppc64-gnu@4.60.2': resolution: {integrity: sha512-11+aL5vKheYgczxtPVVRhdptAM2H7fcDR5Gw4/bTcteuZBlH4oP9f5s9zYO9aGZvoGeBpqXI/9TZZihZ609wKw==} cpu: [ppc64] os: [linux] - libc: [glibc] '@rollup/rollup-linux-ppc64-musl@4.60.2': resolution: {integrity: sha512-i16fokAGK46IVZuV8LIIwMdtqhin9hfYkCh8pf8iC3QU3LpwL+1FSFGej+O7l3E/AoknL6Dclh2oTdnRMpTzFQ==} cpu: [ppc64] os: [linux] - libc: [musl] '@rollup/rollup-linux-riscv64-gnu@4.60.2': resolution: {integrity: sha512-49FkKS6RGQoriDSK/6E2GkAsAuU5kETFCh7pG4yD/ylj9rKhTmO3elsnmBvRD4PgJPds5W2PkhC82aVwmUcJ7A==} cpu: [riscv64] os: [linux] - libc: [glibc] '@rollup/rollup-linux-riscv64-musl@4.60.2': resolution: {integrity: sha512-mjYNkHPfGpUR00DuM1ZZIgs64Hpf4bWcz9Z41+4Q+pgDx73UwWdAYyf6EG/lRFldmdHHzgrYyge5akFUW0D3mQ==} cpu: [riscv64] os: [linux] - libc: [musl] '@rollup/rollup-linux-s390x-gnu@4.60.2': resolution: {integrity: sha512-ALyvJz965BQk8E9Al/JDKKDLH2kfKFLTGMlgkAbbYtZuJt9LU8DW3ZoDMCtQpXAltZxwBHevXz5u+gf0yA0YoA==} cpu: [s390x] os: [linux] - libc: [glibc] '@rollup/rollup-linux-x64-gnu@4.60.2': resolution: {integrity: sha512-UQjrkIdWrKI626Du8lCQ6MJp/6V1LAo2bOK9OTu4mSn8GGXIkPXk/Vsp4bLHCd9Z9Iz2OTEaokUE90VweJgIYQ==} cpu: [x64] os: [linux] - libc: [glibc] '@rollup/rollup-linux-x64-musl@4.60.2': resolution: {integrity: sha512-bTsRGj6VlSdn/XD4CGyzMnzaBs9bsRxy79eTqTCBsA8TMIEky7qg48aPkvJvFe1HyzQ5oMZdg7AnVlWQSKLTnw==} cpu: [x64] os: [linux] - libc: [musl] '@rollup/rollup-openbsd-x64@4.60.2': resolution: {integrity: sha512-6d4Z3534xitaA1FcMWP7mQPq5zGwBmGbhphh2DwaA1aNIXUu3KTOfwrWpbwI4/Gr0uANo7NTtaykFyO2hPuFLg==} @@ -468,26 +451,47 @@ packages: '@scure/bip39@2.2.0': resolution: {integrity: sha512-T/Bj/YvYMNkIPq6EENO6/rcs2e7qTNuyoUXf0KBFDmp0ZDu0H2X4Lq6yC3i0c8PcWkov5EbW+yQZZbdMmk154A==} - '@tangle-network/agent-eval@0.29.1': - resolution: {integrity: sha512-mzxCZqgFOlW7F4Ozv/tBfF87FSPm2GI71myZu/EuboT1gHtIdviEI+3dD/AHV8RO/CPOzYbrDWWt8HY7pduI/A==} + '@tangle-network/agent-eval@0.40.5': + resolution: {integrity: sha512-ew27fDkzvYcM/3/u6Jx1HGS3/bPoIWAXKGa/2XlOro2hBwMA/h37SAHg4ytUDMd2M0mAKQAAanUxnHfkt/aklw==} engines: {node: '>=20'} hasBin: true + peerDependencies: + '@tangle-network/agent-runtime': ^0.21.0 + '@tangle-network/sandbox': ^0.2.1 + peerDependenciesMeta: + '@tangle-network/agent-runtime': + optional: true + '@tangle-network/sandbox': + optional: true - '@tangle-network/agent-eval@0.33.1': - resolution: {integrity: sha512-VAbg1UkC480Xzfi2jqiFMQLYykWvDMO47UHx4bb2rOeiogN1zzM10kPst3OotM+k1B2lbu51uoVnKDBnqK8zcw==} + '@tangle-network/agent-eval@0.42.0': + resolution: {integrity: sha512-gJFT1Vm5LYDHtIF0BUqGq6i3Qa9IvFr3EvTfAE1CYjErFNl3TohL1sduJqj1GXIhDbswVVuWp5qaahHZHaIsbA==} engines: {node: '>=20'} hasBin: true + peerDependencies: + '@tangle-network/agent-runtime': ^0.21.0 + '@tangle-network/sandbox': ^0.2.1 + peerDependenciesMeta: + '@tangle-network/agent-runtime': + optional: true + '@tangle-network/sandbox': + optional: true '@tangle-network/agent-integrations@0.25.7': resolution: {integrity: sha512-5Iuymcoq6d1oZlyORfmVXiP2G/tJQe0ADYBUNwDlbk9uulSa3c6rztlr6sKm100NqDavVlJ0Jo75j9CsaemhIA==} engines: {node: '>=20'} hasBin: true - '@tangle-network/agent-runtime@0.19.0': - resolution: {integrity: sha512-WbXEnPRPqeg27b+FWxIkoBCAgyPUWyJo7dgPIUcGWYX6O5FR6gcSBKDxvLorpAC5fKSh1mn3INcpXpuflPZKrA==} + '@tangle-network/agent-runtime@0.25.0': + resolution: {integrity: sha512-8snUNiDIb/9aeLDZPyf1O1gdOTQ9CV4nXDoULwE0xoibG8c0Ob6eRJw6wmDcMlDYVwTQr2gkq/mwWuuJ+GfaNQ==} engines: {node: '>=20'} + hasBin: true peerDependencies: + '@tangle-network/agent-knowledge': '>=1.3.0 <2.0.0' '@tangle-network/sandbox': '>=0.1.2 <0.3.0' + peerDependenciesMeta: + '@tangle-network/agent-knowledge': + optional: true '@tangle-network/sandbox@0.1.2': resolution: {integrity: sha512-6TPH9QgCgou9Bhc1kzLNL4/PRiT1mjId6NONY5Le/KT2kh77cXH8KN3TTY/cU+/eW+WM5FYJOy32FWl2HShXbw==} @@ -1248,7 +1252,7 @@ snapshots: '@noble/hashes': 2.2.0 '@scure/base': 2.2.0 - '@tangle-network/agent-eval@0.29.1(typescript@5.9.3)': + '@tangle-network/agent-eval@0.40.5(@tangle-network/agent-runtime@0.25.0(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3))(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3)': dependencies: '@asteasolutions/zod-to-openapi': 8.5.0(zod@4.4.2) '@ax-llm/ax': 19.0.45(zod@4.4.2) @@ -1256,12 +1260,15 @@ snapshots: '@tangle-network/tcloud': 0.4.6(typescript@5.9.3)(zod@4.4.2) hono: 4.12.16 zod: 4.4.2 + optionalDependencies: + '@tangle-network/agent-runtime': 0.25.0(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3) + '@tangle-network/sandbox': 0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)) transitivePeerDependencies: - bufferutil - typescript - utf-8-validate - '@tangle-network/agent-eval@0.33.1(typescript@5.9.3)': + '@tangle-network/agent-eval@0.42.0(@tangle-network/agent-runtime@0.25.0(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3))(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3)': dependencies: '@asteasolutions/zod-to-openapi': 8.5.0(zod@4.4.2) '@ax-llm/ax': 19.0.45(zod@4.4.2) @@ -1269,6 +1276,9 @@ snapshots: '@tangle-network/tcloud': 0.4.6(typescript@5.9.3)(zod@4.4.2) hono: 4.12.16 zod: 4.4.2 + optionalDependencies: + '@tangle-network/agent-runtime': 0.25.0(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3) + '@tangle-network/sandbox': 0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)) transitivePeerDependencies: - bufferutil - typescript @@ -1276,9 +1286,9 @@ snapshots: '@tangle-network/agent-integrations@0.25.7': {} - '@tangle-network/agent-runtime@0.19.0(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3)': + '@tangle-network/agent-runtime@0.25.0(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3)': dependencies: - '@tangle-network/agent-eval': 0.33.1(typescript@5.9.3) + '@tangle-network/agent-eval': 0.40.5(@tangle-network/agent-runtime@0.25.0(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3))(@tangle-network/sandbox@0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3) '@tangle-network/sandbox': 0.2.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)) transitivePeerDependencies: - bufferutil diff --git a/src/index.ts b/src/index.ts index 7d94aee..f7f2b97 100644 --- a/src/index.ts +++ b/src/index.ts @@ -12,7 +12,6 @@ export * from './indexer' export * from './inspect' export * from './kb-store' export * from './lint' -export * from './optimization' export * from './proposals' export * from './propose-from-finding' export * from './release' diff --git a/src/optimization.ts b/src/optimization.ts deleted file mode 100644 index 06ea4aa..0000000 --- a/src/optimization.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { - type MultiShotMutateAdapter, - type MultiShotOptimizationConfig, - type MultiShotOptimizationResult, - type MultiShotRunner, - type MultiShotScorer, - type MultiShotVariant, - runMultiShotOptimization, -} from '@tangle-network/agent-eval' -import type { KnowledgeBaseCandidate } from './types' - -export type KnowledgeBaseVariant = MultiShotVariant - -export interface KnowledgeOptimizationConfig - extends Omit< - MultiShotOptimizationConfig, - 'runner' | 'scorer' | 'mutateAdapter' | 'target' - > { - target?: string - runner: MultiShotRunner - scorer: MultiShotScorer - mutateAdapter: MultiShotMutateAdapter -} - -export async function runKnowledgeBaseOptimization( - config: KnowledgeOptimizationConfig, -): Promise> { - return runMultiShotOptimization({ - ...config, - target: config.target ?? 'agent-knowledge-base', - }) -} - -export function knowledgeVariantFromCandidate( - candidate: KnowledgeBaseCandidate, - options: { id?: string; label?: string; generation?: number } = {}, -): KnowledgeBaseVariant { - return { - id: options.id ?? candidate.id, - label: options.label ?? candidate.id, - generation: options.generation ?? 0, - payload: candidate, - } -} diff --git a/src/release.ts b/src/release.ts index f09103b..14013f4 100644 --- a/src/release.ts +++ b/src/release.ts @@ -1,14 +1,13 @@ import { evaluateReleaseConfidence, - type MultiShotOptimizationResult, - type MultiShotTrialResult, + type GateDecision, type ReleaseConfidenceScorecard, + type ReleaseTraceEvidence, type RunRecord, - releaseTraceEvidenceFromMultiShotTrials, validateRunRecord, } from '@tangle-network/agent-eval' import { stableId } from './ids' -import type { KnowledgeBaseCandidate, KnowledgeRelease } from './types' +import type { KnowledgeRelease } from './types' export interface KnowledgeReleaseReport { release: KnowledgeRelease @@ -17,55 +16,55 @@ export interface KnowledgeReleaseReport { baselineRuns: RunRecord[] } -export function knowledgeReleaseReportFromOptimization( - result: MultiShotOptimizationResult, - options: { - runRecords?: RunRecord[] - createdAt?: string - minScore?: number - } = {}, -): KnowledgeReleaseReport { - const trials = result.evolution.generations.flatMap( - (generation) => generation.trials, - ) as MultiShotTrialResult[] - const traceEvidence = releaseTraceEvidenceFromMultiShotTrials(trials) - const runRecords = ( - options.runRecords ?? [ - ...(result.gate?.candidateRuns ?? []), - ...(result.gate?.baselineRuns ?? []), - ] - ).map(validateRunRecord) +/** + * Campaign-native release report. The caller (a consumer's KB self-improvement + * loop) supplies the candidate/baseline `RunRecord[]` (e.g. via + * `campaignToRunRecords`) + optional per-instance `ReleaseTraceEvidence` + the + * gate decision; this folds them into a `ReleaseConfidenceScorecard` + a + * `KnowledgeRelease`. Decoupled from any optimizer result shape — agent-eval's + * legacy multi-shot orchestration (and its `MultiShotOptimizationResult`) was + * removed in 0.42; release confidence is computed from records + traces. + */ +export interface KnowledgeReleaseInput { + candidateId: string + baselineId?: string + candidateRuns: RunRecord[] + baselineRuns?: RunRecord[] + traces?: ReleaseTraceEvidence[] + gateDecision?: GateDecision | null + /** True when a held-out split was evaluated (drives the holdout threshold). */ + hasHoldout?: boolean + /** Candidate is the search-best variant — a promotion precondition. Default true. */ + promotedIsBest?: boolean + createdAt?: string + minScore?: number +} + +export function knowledgeReleaseReport(input: KnowledgeReleaseInput): KnowledgeReleaseReport { + const baselineRuns = input.baselineRuns ?? [] + const runRecords = [...input.candidateRuns, ...baselineRuns].map(validateRunRecord) const scorecard = evaluateReleaseConfidence({ target: 'agent-knowledge-base', - candidateId: result.promotedVariant.id, - baselineId: 'baseline', - traces: traceEvidence, + candidateId: input.candidateId, + baselineId: input.baselineId ?? 'baseline', + traces: input.traces ?? [], runs: runRecords, - gateDecision: result.gate?.decision ?? null, + gateDecision: input.gateDecision ?? null, thresholds: { requireCorpus: false, - requireHoldout: Boolean(result.gate), - minHoldoutRuns: result.gate ? 1 : 0, + requireHoldout: input.hasHoldout ?? false, + minHoldoutRuns: input.hasHoldout ? 1 : 0, minSearchRuns: 1, - minMeanScore: options.minScore ?? 0.7, + minMeanScore: input.minScore ?? 0.7, }, }) const release: KnowledgeRelease = { - id: stableId( - 'krel', - `${result.promotedVariant.id}:${options.createdAt ?? new Date().toISOString()}`, - ), - candidateId: result.promotedVariant.id, - createdAt: options.createdAt ?? new Date().toISOString(), - promoted: - scorecard.status !== 'fail' && result.promotedVariant.id === result.searchBestVariant.id, + id: stableId('krel', `${input.candidateId}:${input.createdAt ?? new Date().toISOString()}`), + candidateId: input.candidateId, + createdAt: input.createdAt ?? new Date().toISOString(), + promoted: scorecard.status !== 'fail' && (input.promotedIsBest ?? true), scorecard, runRecordIds: runRecords.map((record) => record.runId), } - return { - release, - scorecard, - candidateRuns: result.gate?.candidateRuns ?? [], - baselineRuns: result.gate?.baselineRuns ?? [], - } + return { release, scorecard, candidateRuns: input.candidateRuns, baselineRuns } } diff --git a/tests/optimization.test.ts b/tests/optimization.test.ts deleted file mode 100644 index 321767a..0000000 --- a/tests/optimization.test.ts +++ /dev/null @@ -1,65 +0,0 @@ -import { describe, expect, it } from 'vitest' -import { - type KnowledgeBaseCandidate, - knowledgeReleaseReportFromOptimization, - knowledgeVariantFromCandidate, - runKnowledgeBaseOptimization, -} from '../src/index' - -function candidate(id: string, quality: number): KnowledgeBaseCandidate { - return { - id, - units: [{ id: `${id}-unit`, title: id, text: `quality ${quality}` }], - metadata: { quality }, - } -} - -describe('runKnowledgeBaseOptimization', () => { - it('uses agent-eval multi-shot optimization for KB candidates', async () => { - const baseline = knowledgeVariantFromCandidate(candidate('baseline', 0.2)) - const result = await runKnowledgeBaseOptimization({ - runId: 'knowledge-opt', - seedVariants: [baseline], - searchScenarioIds: ['single-shot', 'multi-shot'], - reps: 1, - generations: 2, - populationSize: 2, - runner: { - run: ({ variant, scenarioId }) => ({ - trace: { - scenarioId, - transcript: `${scenarioId}:${variant.payload.id}`, - }, - }), - }, - scorer: { - score: ({ variant }) => ({ - score: Number(variant.payload.metadata?.quality ?? 0), - asi: - Number(variant.payload.metadata?.quality ?? 0) > 0.8 - ? [] - : [{ message: 'knowledge was incomplete', responsibleSurface: 'knowledge-base' }], - }), - }, - mutateAdapter: { - mutate: async ({ childCount, generation }) => - Array.from({ length: childCount }, (_, i) => - knowledgeVariantFromCandidate(candidate(`candidate-${generation}-${i}`, 0.9), { - generation, - }), - ), - }, - scalarWeights: { score: 1, cost: 0 }, - earlyStopOnNoImprovement: false, - }) - - expect(result.promotedVariant.payload.id).toContain('candidate') - expect(result.searchBestAggregate.meanScore).toBe(0.9) - const report = knowledgeReleaseReportFromOptimization(result, { - minScore: 0.1, - createdAt: '2026-01-01T00:00:00.000Z', - }) - expect(report.release.candidateId).toBe(result.promotedVariant.id) - expect(report.scorecard.target).toBe('agent-knowledge-base') - }) -})