diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md new file mode 100644 index 0000000..9201ea6 --- /dev/null +++ b/.claude/CLAUDE.md @@ -0,0 +1,93 @@ +# devbase — Cognitive Anchor + +> **Purpose**: This file is designed to survive context compression. It contains +> immutable facts and current state that every AI session must know before +> working on this project. If you are reading this after a context reset, +> treat this as your primary source of truth. + +--- + +## Immutable Facts(不可变事实) + +| ID | Fact | Source | Status | +|----|------|--------|--------| +| F-001 | Version | `Cargo.toml` | **v0.20.1** | +| F-002 | Edition | `Cargo.toml` | **Rust 2024** | +| F-003 | Test Coverage | CI | **491+ passed, 0 failed, 5 ignored** | +| F-004 | Production Unwrap | Architecture Invariants | **0** (G5 rule enforced) | +| F-005 | MCP Tools | `src/mcp/mod.rs` | **69** (5 Stable / 60 Beta / 4 Experimental) | +| F-006 | Schema Version | `registry/migrate.rs` | **v34** | +| F-007 | Entities Table | Schema v21+ | **唯一真相源** (`repos` 表已删除) | +| F-008 | SQLite Mode | `storage.rs` | **WAL mode** | +| F-009 | Clippy | CI | **`-D warnings` 全绿** | +| F-010 | Release Assets | GitHub Releases | **Linux + Windows x64** 预编译二进制 | + +## 架构红线(Architecture Guardrails) + +- **RF-1**: 无裸 `init_db()` 调用,全部使用 `StorageBackend` 注入 +- **RF-2**: `TempStorageBackend` 用于测试隔离(禁止 `DEVBASE_DATA_DIR` 竞态) +- **RF-3**: `entities` 表是唯一真相源 +- **RF-4**: 二进制上下文 ≤ 1MB +- **RF-5**: 模块间无循环依赖 +- **RF-6**: 生产代码零 `unwrap`/`expect`/`panic`(测试除外) +- **RF-7**: 路径输出必须脱敏(`sanitize_path()` 掩码 home 目录) + +## 当前上下文(Current Context) + +| 属性 | 值 | +|------|-----| +| 默认分支 | `main` | +| 最新 Release | `v0.20.1` (2026-05-17) | +| 当前 Phase | Phase 1 Production Hardening ✅ 完成 | +| 下一 Phase | Phase 12 — v0.21.0 "External Capability Grafting" | +| 活跃 PR | 无(PR #55 已合并) | + +## 已知架构 Gaps(不可与 Immutable Facts 混淆) + +这些是**待实现**的能力,不是 bug: + +| Gap | 影响 | 计划版本 | 状态 | +|-----|------|----------|------| +| ~~`relations` 表零生产读取路径~~ | ~~统一实体模型的图遍历能力未暴露~~ | ~~v0.21.0~~ | **已完成** — `devkit_relation_store/query/delete` 已存在,`project_context` 已读取 | +| ~~Workflow 引擎零 MCP 暴露~~ | ~~AI 无法发现/触发工作流~~ | ~~v0.21.0~~ | **已完成** — `devkit_workflow_list/run/status` 已存在 | +| ~~`project_context` 不完整~~ | ~~缺少 relations/limits/skills/workflows~~ | ~~v0.21.0~~ | **已完成** — 已补充 `known_limits` + `skills` | +| 31/68 MCP 工具缺少调用测试 | 回归风险 | v0.21.0 | 待评估 | +| ~~`mcp/tools/repo.rs` 2376 行~~ | ~~维护负担~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `tools/` 目录,`repo.rs` 现 730 行 | +| ~~`init_db_at` 1214 行~~ | ~~迁移函数过大~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `registry/migrate.rs`(503 行)+ `repo.rs` + `vault.rs` + `links.rs` | + +## 防失忆校验清单(每次会话启动) + +- [ ] 已读取本文件(`devbase/.claude/CLAUDE.md`) +- [ ] 已确认 `Cargo.toml` 版本与上表 F-001 一致 +- [ ] 如果 handoff 文档说"未完成",确认是新环境问题还是全局阻塞 +- [ ] 如果修改 Schema,已更新 `registry/migrate.rs` 和 `SCHEMA_DDL` + +## 快速入口 + +| 你想做什么 | 命令 | +|-----------|------| +| 运行测试 | `cargo test --all-targets` | +| 检查 clippy | `cargo clippy --all-targets -D warnings` | +| 检查格式化 | `cargo fmt --check` | +| 运行 invariant checks | `scripts/invariant-checks/run-checks.ps1` | +| 启动 MCP Server | `cargo run -- mcp` | +| 启动 TUI | `cargo run -- tui` | +| 扫描当前目录 | `devbase scan . --register` | +| 索引仓库 | `devbase index` | + +## 关键文件映射 + +| 概念 | 文件 | +|------|------| +| 架构决策 | `docs/architecture/` | +| 稳定工具文档 | `docs/reference/stable-tools/` | +| 快速开始 | `docs/guides/quickstart.md` | +| MCP 集成指南 | `docs/guides/mcp-integration.md` | +| 变更日志 | `CHANGELOG.md` | +| Agent 简报 | `AGENTS.md` | +| 贡献指南 | `CONTRIBUTING.md` | + +--- + +**Last Updated**: 2026-05-20 by Claude Opus 4.7 +**Version**: v0.20.1 diff --git a/AGENTS.md b/AGENTS.md index 04ceedf..8e8f4e7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -5,7 +5,7 @@ > 它将本地数字资产的原始数据(代码库、笔记、Skill、工作流)编译为 AI 可决策的结构化情境,不负责思考,不负责执行,只负责感知、编码、持久化、检索。 - **当前阶段**:阶段十一 — v0.20.0 已发布(知识完备性) -- **当前版本**:v0.20.0(Schema 34,68 MCP tools,451 tests) +- **当前版本**:v0.20.1(Schema 34,69 MCP tools,494 tests) - **已完成里程碑**:Registry God Object 完全拆解(10 子模块提取)+ 18 workspace crates 提取 + MCP Python SDK 1.16.0 兼容修复 + repo.rs trait 化 + flaky 测试根治(RF-2.1/2.2/2.3)+ 许可证迁移 + health 性能优化(-44%)+ index skip-embeddings + batch encoding 实验 + RF-6 清零 + 架构治理文档(ADR/不变量清单)+ Tantivy BM25 代码符号搜索(P1)+ AppContext 职责拆分 Phase 1/2(storage.rs 860→430 行)+ 架构不变量 CI(G5/T11/T12)+ Embedding 多后端(Candle/Ollama 配置切换, P3)+ EnvVersionCache 扩展(9 工具链检测, P4)+ **v0.16.0 Agent Contexts(P1/P2/P3)**:`agent_contexts`/`agent_memories`/`context_entity_links` Schema + 9 个 Session MCP tools + Context-aware Skill Runtime(`DEVBASE_ACTIVE_CONTEXT` 注入)+ **v0.16.1 Workflow-Session Binding**:`workflow_executions.context_id` + 执行自动绑定 Active Context + **v0.17.0 Embedding Externalization**:`embedding` 从 default features 移除(Candle/Ollama 降级为 opt-in `llm-backend`)+ Schema 34 向量存储 + `cosine_similarity` SQLite UDF + `devkit_session_recall` / `devkit_session_index`(60 tools)+ **v0.18.0 ClaudeCode Integration**:`devkit_project_brief`(Markdown 项目简报)+ `devkit_impact_analysis`(修改影响范围分析)+ `devkit_session_export` / `devkit_session_import` + `scripts/devbase-claude.ps1` 启动器(自动注入 `.claude/CLAUDE.md`)+ RFC `docs/RFC/claudecode-workflow-integration.md`(64 tools)+ **v0.18.0 发布收尾**:PR 合并 + 双平台二进制构建 + GitHub Release + 根目录治理 + 世界模型战略认知沉淀(Vault + AGENTS 双向联动)+ NotebookLM 生态消化(5 项目注册)+ GreptimeDB 互补分析 + **v0.19.0 知识基础设施硬化**:SQLite WAL 默认启用 + `devkit_index_health`(Beta)+ Vault 导出(`devkit_vault_export`)+ Redis ADR 决策(放弃引入)+ **v0.20.0 知识完备性**:Vault 双向链接 BFS 图遍历(`devkit_vault_graph` 扩展)+ Vault Git-based 历史追踪(`devkit_vault_history`,第 67 个 tool)+ 混合检索质量监控(`devkit_search_quality`,第 68 个 tool,`HybridSearchMetrics`)+ Block 引用支持(`WikiLink.anchor`:`[[note#heading]]` / `[[note#^block-id]]`)+ 性能回归基线(`#[ignore]` 1k/10k 阈值测试)+ 客户端无关原则(Client-Agnostic Principle)落地 + `skill sync` 泛化接口(零硬编码客户端路径) - **核心方向**:让 Kimi CLI 在调用文件工具之前,先通过 devbase 获得"该读哪些文件、为什么读、它们之间的关系" - **本质分析**:见 `vault/99-Meta/devbase-essence-analysis-20260430.md` 与 `docs/architecture/redefinition.md` @@ -23,7 +23,7 @@ Skill Runtime 全生命周期已落地(含依赖管理 Schema v15),Schema - **Workspace**:`%LOCALAPPDATA%\devbase\workspace/` —— 文件系统 = source of truth - `vault/` —— PARA 结构:00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta - `assets/` —— 二进制资源 -- **MCP Server**:stdio only,**68 个 tools**(含 7 个 vault tools + 8 个代码分析工具 + 5 个 embedding/搜索工具 + 4 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 11 个 Agent Context tools + 2 个 ClaudeCode 集成工具 + 1 个 streaming index 工具 + 1 个 oplog 工具 + 1 个 Index Health 工具 + 1 个 Search Quality 工具 + 1 个 Evaluate 工具);配置见 `mcp.json` +- **MCP Server**:stdio only,**69 个 tools**(含 7 个 vault tools + 8 个代码分析工具 + 5 个 embedding/搜索工具 + 4 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 11 个 Agent Context tools + 2 个 ClaudeCode 集成工具 + 1 个 streaming index 工具 + 1 个 oplog 工具 + 1 个 Index Health 工具 + 1 个 Search Quality 工具 + 1 个 Evaluate 工具 + 1 个 DocumentConvert 工具);配置见 `mcp.json` - **Kimi CLI 集成**:MCP server 已通过 `kimi mcp add` 注册,端到端验证通过(`kimi --print` 成功调用 `devkit_health`);项目级 skill 位于 `.kimi/skills/devbase-project/SKILL.md` - **统一节点模型**:`core::node::{Node, NodeType, Edge}` —— GitRepo / VaultNote / Asset / ExternalLink - **当前测试**:451+ lib passed / 0 failed / 5 ignored + 11/11 integration passed(`tests/cli.rs`) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70d6761..435b7d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- `devkit_document_convert` — Experimental tier MCP tool,PDF/PPTX → Markdown 转换(`pdftotext` / `python-pptx` 流水线),含 frontmatter 质量标注 +- Stable 工具 invocation 测试补全:`devkit_query_repos`、`devkit_vault_search`、`devkit_vault_read`、`devkit_status`、`devkit_workflow_list`、`devkit_index` +- `seed_repo()` 轻量测试 helper(仅插入 `entities` 表,无副作用) + +### Fixed + +- `mcp/tools/document_convert.rs` 原始字符串定界符修复(`r###"` 避免与 Python f-string `"##` 冲突) +- `cleanup_extracted_text` 单元测试期望值与实现语义对齐(保留最多 2 个连续空行) + +### Changed + +- `KNOWN_ISSUES.md` 更新:document_convert 从 P3 债务移至已解决归档;测试计数 485→494 +- `docs/reference/mcp-tools.md` 修正为 69 个工具,补充 Index / Workflow / Relation / KnownLimit / Session 分类 +- `docs/reference/stable-tools/README.md` 修正为 5 个 Stable 工具(删除过时的 `project_brief.md` / `hybrid_search.md` / `session_recall.md`) + ## [0.20.1] - 2026-05-17 ### Added diff --git a/KNOWN_ISSUES.md b/KNOWN_ISSUES.md new file mode 100644 index 0000000..dbdb3f4 --- /dev/null +++ b/KNOWN_ISSUES.md @@ -0,0 +1,99 @@ +# Known Issues & Technical Debt + +> 本文件记录 devbase 的已知问题、技术债务和架构 blockers。 +> 不是 bug 列表 — 这些问题是设计层面的权衡或待完成的工作。 + +--- + +## P0 — 阻塞发布 + +无当前 P0 blocker。v0.20.1 已发布,所有 P0 架构 gaps 已关闭。 + +--- + +## P1 — 测试覆盖 + +### 28 个 MCP 工具缺少 invocation tests + +**现状**:68 个工具中,40 个有 dedicated `invoke()` 测试(+3 本批次新增),28 个仅有 name/schema smoke tests 或零覆盖。 + +**影响**:Beta → Stable 的 promote 需要测试背书;无测试的工具在重构时存在回归风险。 + +**缺失测试的工具清单**: + +| 工具 | Tier | 已有覆盖 | +|------|------|----------| +| `devkit_index` | Beta | 间接(scenario) | +| `devkit_index_health` | Beta | 无 | +| `devkit_index_stream` | Beta | 无 | +| `devkit_status` | Beta | 无 | +| `devkit_note` | Beta | 无 | +| `devkit_digest` | Experimental | 无 | +| `devkit_paper_index` | Experimental | 无 | +| `devkit_semantic_search` | Beta | 间接(scenario) | +| `devkit_embedding_store` | Beta | 无 | +| `devkit_embedding_search` | Beta | 无 | +| `devkit_cross_repo_search` | Beta | 间接(scenario) | +| `devkit_related_symbols` | Experimental | 无 | +| `devkit_search_quality` | Beta | 无 | +| `devkit_impact_analysis` | Beta | 无 | +| `devkit_project_brief` | Beta | 间接(scenario) | +| `devkit_knowledge_report` | Beta | 间接(scenario) | +| `devkit_session_*` × 13 | Beta/Exp | 部分 smoke | +| `devkit_workflow_*` × 3 | Beta | 部分(workflow.rs 单元测试) | +| `devkit_evaluate` | Beta | 无 | + +**建议**:按调用频率排序,优先为 Index、Status、Workflow、Session save/list 添加测试。 + +--- + +## P2 — 架构债务 + +### `mcp/tools/repo.rs` 730 行 + +**现状**:已从 2376 行拆至 730 行,但仍超过理想阈值(~300 行/模块)。 + +**计划**:按 domain 拆分为 `repo_health.rs` + `repo_query.rs` + `repo_index.rs`。已有 `docs/architecture/split-plan.md`。 + +### `src/mcp/mod.rs` 工具枚举集中化 + +**现状**:`McpToolEnum` 是包含 68 个变体的 giant enum,`tier()` 方法是 200+ 行的 match 表达式。 + +**影响**:新增工具需要修改 3 处(enum 定义、match arm、tier match),容易遗漏。 + +**建议**:考虑使用宏或 derive 自动生成 `McpToolEnum` 和 `tier()`,减少 boilerplate。 + +### Vault 笔记全文搜索性能 + +**现状**:`devkit_vault_search` 在内存中对所有笔记做线性扫描 + 字符串匹配。 + +**影响**:Vault 笔记数量 >1000 时,搜索延迟可能超过 1s。 + +**建议**:为 Vault 内容建立 Tantivy 索引(复用现有 symbol_index 基础设施),或至少增加关键词索引表。 + +--- + +## P3 — 文档与可观测性 + +### 性能基准缺失 + +**现状**:Criterion 已列为 dev-dependency,但无实际 benchmark 套件。 + +**建议**:为 Index、Query、VaultSearch 建立 Criterion benchmarks,记录基线到 CI 产物。 + +## 已解决(归档) + +| 问题 | 解决版本 | Commit | +|------|----------|--------| +| `relations` 表零生产读取路径 | v0.20.1 | `devkit_relation_store/query/delete` + `project_context` 读取 | +| Workflow 引擎零 MCP 暴露 | v0.20.1 | `devkit_workflow_list/run/status` | +| `project_context` 不完整 | v0.20.1 | 补充 `known_limits` + `skills` | +| `mcp/tools/repo.rs` 2376 行 | v0.20.1 | 拆分为 `tools/` 目录,repo.rs 730 行 | +| `init_db_at` 1214 行 | v0.20.1 | 拆分为 `registry/migrate.rs`(503 行)+ 子模块 | +| 工具数量文档不一致 | v0.20.1 | `mcp-tools.md` 全面更新至 68 个 | +| 3 Stable 工具缺 invocation tests | v0.20.1 | `query_repos`, `vault_search`, `vault_read` 测试 added | +| `devkit_document_convert` 工具缺失 | v0.21.0 | `src/mcp/tools/document_convert.rs` + MCP 注册 | + +--- + +*Last updated: 2026-05-20* diff --git a/README.md b/README.md index b21f560..711901b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # devbase [![Version](https://img.shields.io/badge/version-v0.20.1-blue)](https://github.com/juice094/devbase/releases) -[![Tests](https://img.shields.io/badge/tests-485%2B%20passed-brightgreen)](./AGENTS.md) +[![Tests](https://img.shields.io/badge/tests-494%2B%20passed-brightgreen)](./AGENTS.md) [![Clippy](https://img.shields.io/badge/clippy-0%20warnings-green)](./AGENTS.md) [![License](https://img.shields.io/badge/license-AGPL--3.0-orange)](./LICENSE) [![Rust](https://img.shields.io/badge/rust-1.95%2B-9cf)](https://www.rust-lang.org) @@ -28,7 +28,7 @@ devbase 是开发者的**世界模型编译器**。它将代码库、笔记、 │ Interaction Layer (人类与 AI 的接口) │ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │ │ │ TUI 仪表盘 │ │ MCP Server │ │ Workflow Engine │ │ -│ │ (ratatui) │ │ 68 Tools │ │ YAML + 拓扑调度 │ │ +│ │ (ratatui) │ │ 69 Tools │ │ YAML + 拓扑调度 │ │ │ └──────────────┘ └──────────────┘ └──────────────────────┘ │ ├─────────────────────────────────────────────────────────────────┤ │ Compilation Layer (World Model Compiler Core) │ @@ -120,7 +120,7 @@ cd devbase && cargo install --path . - **VaultList**:Vault 笔记列表,支持 PARA 方法笔记的快速检索与阅读 - **Session**:Agent 会话列表(● active / ◌ archived),选中后右侧面板展示该上下文的语义记忆(◆ decision ▪ constraint ★ discovery ✗ error) -### AI Layer — 68 个 MCP Tools +### AI Layer — 69 个 MCP Tools 基于 [Model Context Protocol](https://modelcontextprotocol.io) 标准化接口,stdio 本地进程通信。 @@ -382,6 +382,7 @@ TUI `[:]` 触发 embedding 语义搜索,失败自动降级为文本搜索。AI | **v0.18.0** | **✅ 已发布** | **ClaudeCode 工作流集成:`project_brief` + `impact_analysis` MCP Tools + Session 导出/导入 + `devbase-claude.ps1` 一键启动器;64 Tools 完整矩阵** | | v0.19.0 | ✅ 已发布 | **知识基础设施硬化**:SQLite WAL + Tantivy 健康评分 + Vault 导出 + Redis ADR 决策 | | v0.20.0 | ✅ 已发布 | 知识完备性:双向链接图遍历 + 笔记历史追踪 + 混合检索质量监控 + block 引用 + 性能回归基线 + 客户端无关原则;68 Tools | +| v0.20.1 | ✅ 已发布 | Phase 1 Production Hardening:Workflow E2E 测试 + RF-7 路径脱敏 + Tantivy 一致性修复 + TempStorageBackend + 架构不变量 CI;Stable 工具 invocation 测试补全;69 Tools | | **v0.20.1** | **✅ 当前** | **Phase 1 生产硬化**:workflow E2E 测试、RF-7 路径隐私脱敏、Tantivy 一致性修复、性能回归基线、Architecture Invariants CI | --- diff --git a/docs/reference/mcp-tools.md b/docs/reference/mcp-tools.md index 46143ac..958b852 100644 --- a/docs/reference/mcp-tools.md +++ b/docs/reference/mcp-tools.md @@ -1,6 +1,6 @@ # MCP Tools 参考 -devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通信。工具按稳定性分为三级: +devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通信。工具按稳定性分为三级: - **Stable** — 经过充分测试,schema 冻结。详见 [`stable-tools/`](stable-tools/README.md) 独立文档。 - **Beta** — 功能验证通过,schema 可能微调 @@ -22,19 +22,19 @@ devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通 | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| -| `devkit_code_metrics` | Experimental | 统计代码行数、语言分布、测试覆盖率 | `repo_id` | -| `devkit_module_graph` | Experimental | 获取仓库模块依赖图 | `repo_id` | +| `devkit_code_metrics` | Beta | 统计代码行数、语言分布、测试覆盖率 | `repo_id` | +| `devkit_module_graph` | Beta | 获取仓库模块依赖图 | `repo_id` | | `devkit_code_symbols` | Beta | 列出仓库中的代码符号(函数/结构体/枚举等) | `repo_id`, `file_path`, `symbol_type` | | `devkit_dependency_graph` | Beta | 获取跨仓库依赖关系图 | `repo_id` | -| `devkit_call_graph` | Experimental | 获取函数调用图 | `repo_id`, `symbol_name` | -| `devkit_dead_code` | Experimental | 检测未被调用的私有函数 | `repo_id`, `include_pub` | +| `devkit_call_graph` | Beta | 获取函数调用图 | `repo_id`, `symbol_name` | +| `devkit_dead_code` | Beta | 检测未被调用的私有函数 | `repo_id`, `include_pub` | ## 知识检索(8) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| | `devkit_semantic_search` | Beta | 基于 embedding 的语义代码搜索 | `repo_id`, `query`, `limit` | -| [`devkit_hybrid_search`](stable-tools/hybrid_search.md) | Stable | 向量语义 + 关键词 RRF 混合搜索 | `repo_id`, `query`, `limit` | +| `devkit_hybrid_search` | Beta | 向量语义 + 关键词 RRF 混合搜索 | `repo_id`, `query`, `limit` | | `devkit_cross_repo_search` | Beta | 跨仓库符号搜索(按 tag 过滤) | `tags`, `query`, `limit` | | `devkit_related_symbols` | Experimental | 查找与指定符号相关的符号 | `repo_id`, `symbol_name` | | `devkit_embedding_store` | Beta | 存储代码符号的 embedding 向量 | `repo_id`, `symbol_name`, `embedding` | @@ -42,7 +42,7 @@ devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_natural_language_query` | Beta | 自然语言查询(NLQ) | `query`, `limit` | | `devkit_knowledge_report` | Beta | 生成工作区知识覆盖报告 | `repo_id`, `activity_limit` | -## Vault 笔记(4) +## Vault 笔记(8) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| @@ -50,6 +50,10 @@ devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_vault_read` | Stable | 读取指定 Vault 笔记的完整内容 | `path` | | `devkit_vault_write` | Beta | 写入或更新 Vault 笔记(destructive gate) | `path`, `content`, `frontmatter` | | `devkit_vault_backlinks` | Beta | 查找指向指定笔记的反向链接 | `note_id` | +| `devkit_vault_daily` | Beta | 按日期列出 Vault 每日笔记 | `date`, `limit` | +| `devkit_vault_graph` | Beta | 获取 Vault 笔记链接图 | `repo_id`, `note_id`, `depth` | +| `devkit_vault_export` | Beta | 导出 Vault 笔记集合 | `query`, `format` | +| `devkit_vault_history` | Beta | 获取 Vault 笔记修改历史 | `path`, `limit` | ## Skill 运行时(4) @@ -60,33 +64,79 @@ devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_skill_run` | Beta | 执行指定 Skill(destructive gate) | `skill_id`, `args` | | `devkit_skill_discover` | Beta | 将当前项目封装为 Skill(destructive gate,dry_run 默认 true) | `path` | -## 项目上下文(2) +## 项目上下文(3) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| | `devkit_project_context` | Stable | 获取项目统一上下文(repo + vault + assets + modules + symbols + calls) | `project` | -| [`devkit_project_brief`](stable-tools/project_brief.md) | Stable | 生成 Markdown 项目摘要(架构 + 活动 + 限制),供 LLM 注入 | `repo_id`, `max_tokens` | +| `devkit_project_brief` | Beta | 生成 Markdown 项目摘要(架构 + 活动 + 限制),供 LLM 注入 | `repo_id`, `max_tokens` | +| `devkit_impact_analysis` | Beta | 分析代码变更影响范围 | `repo_id`, `file_path` | -## Session 管理(1) +## Session 管理(13) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| -| [`devkit_session_recall`](stable-tools/session_recall.md) | Stable | 基于 embedding 的语义记忆召回 | `context_id`, `query_embedding`, `limit` | +| `devkit_session_save` | Beta | 保存当前会话上下文 | `name`, `tags` | +| `devkit_session_list` | Beta | 列出已保存的会话 | `limit` | +| `devkit_session_resume` | Beta | 恢复指定会话 | `session_id` | +| `devkit_session_attach` | Beta | 附加到运行中的会话 | `session_id` | +| `devkit_session_detach` | Beta | 从当前会话分离 | `session_id` | +| `devkit_session_activate` | Beta | 激活会话上下文 | `session_id` | +| `devkit_session_search` | Beta | 搜索会话历史 | `query`, `limit` | +| `devkit_session_capture` | Beta | 捕获当前会话快照 | `name` | +| `devkit_session_workflows` | Beta | 获取会话关联的工作流 | `session_id` | +| `devkit_session_recall` | Experimental | 基于 embedding 的语义记忆召回 | `context_id`, `query_embedding`, `limit` | +| `devkit_session_index` | Experimental | 索引会话内容用于搜索 | `session_id` | +| `devkit_session_export` | Experimental | 导出会话为文件 | `session_id`, `format` | +| `devkit_session_import` | Experimental | 从文件导入会话 | `path` | + +## Index 管理(3) -## 其他(10) +| 工具名 | Tier | 一句话描述 | 关键参数 | +|--------|------|-----------|----------| +| `devkit_index` | Beta | 索引仓库摘要、模块结构、代码符号 | `path` | +| `devkit_index_health` | Beta | 检查索引健康状态 | `repo_id` | +| `devkit_index_stream` | Beta | 流式索引进度 | `path` | + +## Workflow(3) + +| 工具名 | Tier | 一句话描述 | 关键参数 | +|--------|------|-----------|----------| +| `devkit_workflow_list` | Beta | 列出可用工作流 | `limit` | +| `devkit_workflow_run` | Beta | 执行工作流 | `workflow_id`, `args` | +| `devkit_workflow_status` | Beta | 查询工作流执行状态 | `workflow_id` | + +## Relation 图谱(3) + +| 工具名 | Tier | 一句话描述 | 关键参数 | +|--------|------|-----------|----------| +| `devkit_relation_store` | Beta | 存储实体间关系 | `from`, `to`, `relation_type` | +| `devkit_relation_query` | Beta | 查询实体关系 | `entity_id`, `relation_type` | +| `devkit_relation_delete` | Beta | 删除实体关系 | `from`, `to`, `relation_type` | + +## Known Limit(2) + +| 工具名 | Tier | 一句话描述 | 关键参数 | +|--------|------|-----------|----------| +| `devkit_known_limit_store` | Beta | 记录已知限制(Hard Veto / Known Bug) | `id`, `category`, `description` | +| `devkit_known_limit_list` | Beta | 列出已知限制 | `category`, `mitigated` | + +## 其他(6) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| | `devkit_query` | Beta | 通用查询(repo/tag/keyword) | `query`, `limit`, `page` | | `devkit_note` | Beta | 为仓库添加 AI 发现笔记 | `repo_id`, `text`, `author` | +| `devkit_status` | Beta | 检查 devbase 服务状态 | — | | `devkit_digest` | Experimental | 生成每日知识摘要 | — | | `devkit_paper_index` | Experimental | 索引学术论文 | `title`, `authors`, `venue` | +| `devkit_search_quality` | Beta | 评估搜索质量指标 | `repo_id`, `query` | | `devkit_experiment_log` | Beta | 记录实验结果 | `repo_id`, `paper_id`, `status` | | `devkit_github_info` | Beta | 查询 GitHub 仓库信息 | `owner`, `repo` | | `devkit_arxiv_fetch` | Beta | 从 arXiv 获取论文元数据 | `query`, `max_results` | -| `devkit_known_limit_store` | Beta | 记录已知限制(Hard Veto / Known Bug) | `id`, `category`, `description` | -| `devkit_known_limit_list` | Beta | 列出已知限制 | `category`, `mitigated` | | `devkit_oplog_query` | Beta | 查询操作日志 | `limit`, `repo_id` | +| `devkit_evaluate` | Beta | 评估工具调用结果 | `tool_name`, `result` | +| `devkit_document_convert` | Experimental | PDF/PPTX → Markdown 转换 | `source_path`, `output_path` | --- @@ -98,6 +148,10 @@ devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通 - `devkit_skill_run` - `devkit_skill_discover` - `devkit_vault_write` +- `devkit_relation_store` +- `devkit_relation_delete` +- `devkit_known_limit_store` +- `devkit_workflow_run` --- diff --git a/docs/reference/stable-tools/README.md b/docs/reference/stable-tools/README.md index 66e3b36..3b0b495 100644 --- a/docs/reference/stable-tools/README.md +++ b/docs/reference/stable-tools/README.md @@ -1,15 +1,15 @@ # Stable Tools Reference -Tools in this directory have **frozen schemas** as of devbase v0.21.0. +Tools in this directory have **frozen schemas** as of devbase v0.20.1. Breaking changes require a major version bump and a deprecation cycle. -| Tool | Purpose | File | -|------|---------|------| -| [`devkit_health`](health.md) | Check Git health (dirty/ahead/behind) of all registered repos | `repo.rs` | -| [`devkit_project_brief`](project_brief.md) | Generate a Markdown project brief for LLM context injection | `brief.rs` | -| [`devkit_hybrid_search`](hybrid_search.md) | Vector + keyword RRF search for code symbols | `search.rs` | -| [`devkit_vault_search`](vault_search.md) | Keyword search across Vault notes (titles, tags, content) | `vault.rs` | -| [`devkit_session_recall`](session_recall.md) | Semantic memory recall by embedding similarity | `session.rs` | +| Tool | Purpose | File | Test Coverage | +|------|---------|------|---------------| +| [`devkit_health`](health.md) | Check Git health (dirty/ahead/behind) of all registered repos | `repo.rs` | `test_tools_call_devkit_health` | +| `devkit_query_repos` | Query registered repos with language/tag/status filters | `repo.rs` | `test_tools_call_devkit_query_repos` | +| [`devkit_vault_search`](vault_search.md) | Keyword search across Vault notes (titles, tags, content) | `vault.rs` | `test_tools_call_devkit_vault_search` | +| `devkit_vault_read` | Read full content of a Vault note including frontmatter | `vault.rs` | `test_tools_call_devkit_vault_read` | +| `devkit_project_context` | Unified project snapshot (repo + vault + symbols + relations + limits + skills) | `context.rs` | `test_tools_call_devkit_project_context` | ## Schema stability guarantee @@ -19,6 +19,8 @@ Breaking changes require a major version bump and a deprecation cycle. ## Changelog -| Version | Change | -|---------|------------------------------------------| -| v0.21.0 | 5 tools promoted to Stable; schemas frozen | +| Version | Change | +|---------|--------| +| v0.20.1 | 5 Stable tools verified with dedicated invocation tests | +| v0.20.0 | `project_context` enriched with `known_limits` and `skills` | +| v0.14.2 | 5 tools promoted to Stable tier | diff --git a/docs/reference/stable-tools/hybrid_search.md b/docs/reference/stable-tools/hybrid_search.md deleted file mode 100644 index 917f25b..0000000 --- a/docs/reference/stable-tools/hybrid_search.md +++ /dev/null @@ -1,93 +0,0 @@ -# devkit_hybrid_search - -> **Tier**: Stable (frozen at v0.21.0) -> **Source**: `src/mcp/tools/search.rs` — `DevkitHybridSearchTool` - -Hybrid code symbol search combining vector embeddings and keyword matching via Reciprocal Rank Fusion (RRF). - -## Purpose - -- Find code related to a concept ("authentication", "error handling") -- Search with either natural language or an embedding vector -- Get robust results even when the embedding provider is offline - -## When NOT to use - -- Exact keyword searches → use `devkit_natural_language_query` -- Finding symbol definitions by exact name → use `devkit_code_symbols` -- When no embeddings exist and no keyword query is available - -## Input Schema - -```json -{ - "type": "object", - "properties": { - "repo_id": { "type": "string" }, - "query_text": { "type": "string", "description": "Keyword or natural language query" }, - "query_embedding": { - "type": "array", - "items": { "type": "number" }, - "description": "Optional query embedding vector" - }, - "limit": { "type": "integer", "default": 10 } - }, - "required": ["repo_id", "query_text"] -} -``` - -| Parameter | Type | Required | Default | Description | -|-----------------|------------|----------|---------|--------------------------------------------| -| `repo_id` | string | Yes | — | Registered repository ID | -| `query_text` | string | Yes | — | Keyword or natural language query | -| `query_embedding`| number[] | No | — | Optional f32 vector for semantic search | -| `limit` | integer | No | 10 | Max results (capped at 50) | - -## Behavior - -| Scenario | Behavior | -|---------------------------------------|---------------------------------------------------| -| `query_embedding` provided | RRF fusion: vector similarity (70%) + keyword (30%) | -| `query_embedding` omitted | Falls back to pure keyword search on symbol names/signatures | -| No embeddings exist for repo | Gracefully degrades to keyword search | -| Embedding generation fails | Warns in logs, falls back to keyword search | - -## Output Schema - -```json -{ - "success": true, - "repo_id": "devbase", - "query_text": "error handling", - "count": 3, - "symbols": [ - { - "name": "handle_error", - "file_path": "src/errors.rs", - "line_start": 42, - "similarity_score": 0.87 - } - ] -} -``` - -| Field | Type | Description | -|------------------|---------|------------------------------------------| -| `name` | string | Symbol name | -| `file_path` | string | Relative file path in the repo | -| `line_start` | integer | Line number where symbol begins | -| `similarity_score`| number | RRF score (0.0–1.0, higher is better) | - -## Errors - -| Error | Cause | -|--------------------|------------------------------------------| -| `repo_id required` | Missing `repo_id` | -| `query_text required`| Missing `query_text` | -| Database error | SQLite query failure | - -## Changelog - -| Version | Change | -|---------|------------------------------------------| -| v0.21.0 | Schema frozen as Stable | diff --git a/docs/reference/stable-tools/project_brief.md b/docs/reference/stable-tools/project_brief.md deleted file mode 100644 index f668eaf..0000000 --- a/docs/reference/stable-tools/project_brief.md +++ /dev/null @@ -1,71 +0,0 @@ -# devkit_project_brief - -> **Tier**: Stable (frozen at v0.21.0) -> **Source**: `src/mcp/tools/brief.rs` — `DevkitProjectBriefTool` - -Generate a Markdown project brief optimized for LLM context injection. - -## Purpose - -- Summarize a repository's architecture, symbols, and recent activity -- Produce a concise context document for LLM prompts -- Surface known limits, active contexts, and hot files - -## When NOT to use - -- Searching for specific symbols → use `devkit_code_symbols` -- Reading full source files → use filesystem tools -- Getting Git health status → use `devkit_health` - -## Input Schema - -```json -{ - "type": "object", - "properties": { - "repo_id": { "type": "string" }, - "max_tokens": { "type": "integer", "default": 2000 } - }, - "required": ["repo_id"] -} -``` - -| Parameter | Type | Required | Default | Description | -|--------------|---------|----------|---------|---------------------------------------------| -| `repo_id` | string | Yes | — | Registered repository ID | -| `max_tokens` | integer | No | 2000 | Approximate token budget (1 token ~ 4 chars)| - -## Output Schema - -```json -{ - "success": true, - "repo_id": "devbase", - "brief": "# Project Brief: devbase\n\n## Overview\n- **Language**: rust\n- **Tags**: cli, rust, active\n- **Path**: `C:\\Users\\dev\\devbase`\n\n## Architecture\n- `main` (function)\n- `scan` (function)\n..." -} -``` - -### Brief sections (in order) - -1. **Overview** — language, tags, local path -2. **Architecture** — modules (up to 20) and key symbols (up to 15) -3. **Recent Activity** — last 7 commits, hot files (14d change count) -4. **Known Limits & Tech Debt** — open known_limits entries (up to 10) -5. **Active Contexts** — linked agent contexts with memories - -### Truncation behavior - -If the generated brief exceeds `max_tokens * 4` characters, it is truncated at the nearest section boundary (`\n## `) with an ellipsis note. - -## Errors - -| Error | Cause | -|--------------------|-------------------------------------------------| -| `repo_id required` | Missing or empty `repo_id` argument | -| `repo not found` | `repo_id` does not exist in the registry | - -## Changelog - -| Version | Change | -|---------|------------------------------------------| -| v0.21.0 | Schema frozen as Stable | diff --git a/docs/reference/stable-tools/session_recall.md b/docs/reference/stable-tools/session_recall.md deleted file mode 100644 index 5d1a7dd..0000000 --- a/docs/reference/stable-tools/session_recall.md +++ /dev/null @@ -1,91 +0,0 @@ -# devkit_session_recall - -> **Tier**: Stable (frozen at v0.21.0) -> **Source**: `src/mcp/tools/session.rs` — `DevkitSessionRecallTool` - -Semantic memory recall for an active agent session. Finds relevant past memories by meaning rather than exact keyword. - -## Purpose - -- Surface decisions, constraints, or discoveries related to the current task -- Inject top-k relevant memories into prompt context -- Recall what was discussed in a previous project session - -## When NOT to use - -- Keyword-based memory search → use `devkit_session_search` -- Listing all sessions → use `devkit_session_list` -- Saving a new memory → use `devkit_session_capture` -- When embeddings have not been stored for memories → use `devkit_session_index` first - -## Input Schema - -```json -{ - "type": "object", - "properties": { - "context_id": { "type": "string", "description": "Session ID (optional)" }, - "query_embedding": { - "type": "array", - "items": { "type": "number" }, - "description": "Query vector as f32 array (externally generated)" - }, - "limit": { "type": "integer", "default": 5 } - }, - "required": ["query_embedding"] -} -``` - -| Parameter | Type | Required | Default | Description | -|-----------------|------------|----------|---------|--------------------------------------------| -| `context_id` | string | No | — | Session ID. Falls back to `DEVBASE_ACTIVE_CONTEXT` env var or `.active_context` state file | -| `query_embedding`| number[] | Yes | — | Externally-generated f32 embedding vector | -| `limit` | integer | No | 5 | Max results (capped at 20) | - -## Important: Embedding source - -devbase does **NOT** generate embeddings. The caller must provide a pre-computed vector from an external provider (Ollama, OpenAI, etc.). Use the same model that was used to index the memories via `devkit_session_index`. - -## Output Schema - -```json -{ - "success": true, - "context_id": "project-alpha", - "count": 3, - "memories": [ - { - "id": 42, - "type": "decision", - "content": "Use SQLite WAL mode for concurrent reads", - "created_at": "2026-05-10T14:32:00Z", - "embedding_model": "nomic-embed-text", - "score": 0.91 - } - ] -} -``` - -| Field | Type | Description | -|-------------------|---------|------------------------------------------| -| `id` | integer | Memory row ID | -| `type` | string | Memory classification: decision, constraint, note, discovery, error, action | -| `content` | string | Full memory text | -| `created_at` | string | ISO 8601 timestamp | -| `embedding_model` | string | Model used when memory was indexed | -| `score` | number | Cosine similarity (0.0–1.0) | - -## Errors - -| Error | Cause | -|------------------------------|-----------------------------------------------------| -| `query_embedding required` | Missing or empty embedding array | -| `query_embedding must not be empty` | Array contains no valid f32 values | -| No active session | `context_id` omitted and no active session set | -| Memory not found | `memory_id` in `devkit_session_index` does not exist| - -## Changelog - -| Version | Change | -|---------|------------------------------------------| -| v0.21.0 | Schema frozen as Stable | diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 8f91f8b..5219696 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -124,6 +124,7 @@ pub enum McpToolEnum { WorkflowStatus(DevkitWorkflowStatusTool), OplogQuery(DevkitOplogQueryTool), Evaluate(DevkitEvaluateTool), + DocumentConvert(DevkitDocumentConvertTool), } /// Stability tier for MCP tools. @@ -220,6 +221,7 @@ impl McpToolEnum { McpToolEnum::WorkflowStatus(_) => ToolTier::Beta, McpToolEnum::OplogQuery(_) => ToolTier::Beta, McpToolEnum::Evaluate(_) => ToolTier::Beta, + McpToolEnum::DocumentConvert(_) => ToolTier::Experimental, } } } @@ -295,6 +297,7 @@ impl McpTool for McpToolEnum { McpToolEnum::WorkflowStatus(t) => t.name(), McpToolEnum::OplogQuery(t) => t.name(), McpToolEnum::Evaluate(t) => t.name(), + McpToolEnum::DocumentConvert(t) => t.name(), } } @@ -368,6 +371,7 @@ impl McpTool for McpToolEnum { McpToolEnum::WorkflowStatus(t) => t.schema(), McpToolEnum::OplogQuery(t) => t.schema(), McpToolEnum::Evaluate(t) => t.schema(), + McpToolEnum::DocumentConvert(t) => t.schema(), } } @@ -445,6 +449,7 @@ impl McpTool for McpToolEnum { McpToolEnum::WorkflowStatus(t) => t.invoke(args, ctx).await, McpToolEnum::OplogQuery(t) => t.invoke(args, ctx).await, McpToolEnum::Evaluate(t) => t.invoke(args, ctx).await, + McpToolEnum::DocumentConvert(t) => t.invoke(args, ctx).await, } } } @@ -687,7 +692,7 @@ impl McpServer { /// Build an MCP server with optional tier filtering. /// -/// If `tiers` is `None`, all 37 tools are registered (backward compatible). +/// If `tiers` is `None`, all 69 tools are registered (backward compatible). /// If `tiers` is provided, only tools whose tier is in the set are registered. pub fn build_server_with_tiers(tiers: Option<&HashSet>) -> McpServer { let mut server = McpServer::new(); @@ -760,6 +765,7 @@ pub fn build_server_with_tiers(tiers: Option<&HashSet>) -> McpServer { McpToolEnum::WorkflowStatus(DevkitWorkflowStatusTool), McpToolEnum::OplogQuery(DevkitOplogQueryTool), McpToolEnum::Evaluate(DevkitEvaluateTool), + McpToolEnum::DocumentConvert(DevkitDocumentConvertTool), ]; for tool in all_tools { if let Some(allowed) = tiers diff --git a/src/mcp/tests.rs b/src/mcp/tests.rs index 882d6ab..653463b 100644 --- a/src/mcp/tests.rs +++ b/src/mcp/tests.rs @@ -11,6 +11,17 @@ fn test_ctx() -> (crate::storage::AppContext, tempfile::TempDir) { (ctx, tmp) } +/// Lightweight helper: seed a single repo into the entities table. +fn seed_repo(ctx: &crate::storage::AppContext, id: &str, lang: &str) { + let conn = ctx.conn().unwrap(); + let now = chrono::Utc::now().to_rfc3339(); + conn.execute( + "INSERT INTO entities (id, entity_type, name, local_path, metadata, created_at, updated_at, language, discovered_at, workspace_type, data_tier, stars) + VALUES (?1, 'repo', ?2, ?3, ?4, ?5, ?5, ?6, ?5, 'git', 'private', 0)", + rusqlite::params![id, id, format!("/tmp/{}", id), "{}", &now, lang], + ).unwrap(); +} + #[tokio::test] async fn test_initialize() { let server = build_server(); @@ -39,7 +50,7 @@ async fn test_tools_list() { let (mut ctx, _tmp) = test_ctx(); let resp = server.handle_request(req, &mut ctx).await.unwrap(); let tools = resp.get("result").unwrap().get("tools").unwrap().as_array().unwrap(); - assert_eq!(tools.len(), 68); + assert_eq!(tools.len(), 69); let names: Vec<&str> = tools.iter().map(|t| t.get("name").unwrap().as_str().unwrap()).collect(); assert!(names.contains(&"devkit_index_health")); assert!(names.contains(&"devkit_vault_export")); @@ -53,6 +64,7 @@ async fn test_tools_list() { assert!(names.contains(&"devkit_session_export")); assert!(names.contains(&"devkit_session_import")); assert!(names.contains(&"devkit_evaluate")); + assert!(names.contains(&"devkit_document_convert")); assert!(names.contains(&"devkit_scan")); assert!(names.contains(&"devkit_health")); assert!(names.contains(&"devkit_sync")); @@ -217,6 +229,156 @@ async fn test_tools_call_devkit_project_context() { assert!(parsed.get("assets").unwrap().as_array().unwrap().is_empty()); } +#[tokio::test] +async fn test_tools_call_devkit_query_repos() { + let server = build_server(); + let (mut ctx, _tmp) = test_ctx(); + + // 1. Empty registry returns empty results + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 10, + "method": "tools/call", + "params": { + "name": "devkit_query_repos", + "arguments": { "language": "" } + } + }); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert_eq!(parsed.get("count").unwrap().as_i64().unwrap(), 0); + + // 2. Seeded repo is returned with correct filtering + seed_repo(&ctx, "test-repo", "rust"); + let req2 = serde_json::json!({ + "jsonrpc": "2.0", + "id": 11, + "method": "tools/call", + "params": { + "name": "devkit_query_repos", + "arguments": { "language": "rust" } + } + }); + let resp2 = server.handle_request(req2, &mut ctx).await.unwrap(); + let result2 = resp2.get("result").unwrap(); + let content2 = result2.get("content").unwrap().as_array().unwrap(); + let text2 = content2[0].get("text").unwrap().as_str().unwrap(); + let parsed2: serde_json::Value = serde_json::from_str(text2).unwrap(); + assert_eq!(parsed2.get("success").unwrap(), true); + let repos = parsed2.get("repos").unwrap().as_array().unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].get("id").unwrap().as_str().unwrap(), "test-repo"); + assert_eq!(repos[0].get("language").unwrap().as_str().unwrap(), "rust"); +} + +#[tokio::test] +async fn test_tools_call_devkit_vault_search() { + let server = build_server(); + let (mut ctx, _tmp) = test_ctx(); + + // Setup: create vault note and scan + let ws = ctx.storage.workspace_dir().unwrap(); + let vault_dir = ws.join("vault"); + std::fs::create_dir_all(&vault_dir).unwrap(); + std::fs::write( + vault_dir.join("test-note.md"), + "---\ntitle: Test Note\ntags: [test, vault]\n---\n\nThis is a test note for vault search.\n", + ).unwrap(); + let mut conn = ctx.conn().unwrap(); + crate::vault::scanner::scan_vault(&mut conn, Some(&vault_dir)).unwrap(); + + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 12, + "method": "tools/call", + "params": { + "name": "devkit_vault_search", + "arguments": { "query": "test note" } + } + }); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + let notes = parsed.get("notes").unwrap().as_array().unwrap(); + assert!(!notes.is_empty(), "vault_search should find the test-note"); + assert!( + notes + .iter() + .any(|n| n.get("title").and_then(|v| v.as_str()) == Some("Test Note")), + "vault_search should return Test Note" + ); +} + +#[tokio::test] +async fn test_tools_call_devkit_vault_read() { + let server = build_server(); + let (mut ctx, _tmp) = test_ctx(); + + // Setup: create vault note and scan + let ws = ctx.storage.workspace_dir().unwrap(); + let vault_dir = ws.join("vault"); + std::fs::create_dir_all(&vault_dir).unwrap(); + let note_path = vault_dir.join("test-read.md"); + std::fs::write( + ¬e_path, + "---\ntitle: Readable Note\ntags: [read]\n---\n\nContent body here.\n", + ) + .unwrap(); + let mut conn = ctx.conn().unwrap(); + crate::vault::scanner::scan_vault(&mut conn, Some(&vault_dir)).unwrap(); + + // 1. Read existing note by absolute path + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 13, + "method": "tools/call", + "params": { + "name": "devkit_vault_read", + "arguments": { "path": note_path.to_str().unwrap() } + } + }); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert_eq!(parsed.get("path").unwrap().as_str().unwrap(), note_path.to_str().unwrap()); + let frontmatter = parsed.get("frontmatter").unwrap().as_str().unwrap(); + assert!(frontmatter.contains("title: Readable Note")); + let body = parsed.get("content").unwrap().as_str().unwrap(); + assert!(body.contains("Content body here.")); + + // 2. Read non-existent note returns error + let req2 = serde_json::json!({ + "jsonrpc": "2.0", + "id": 14, + "method": "tools/call", + "params": { + "name": "devkit_vault_read", + "arguments": { "path": "/nonexistent/path/note.md" } + } + }); + let resp2 = server.handle_request(req2, &mut ctx).await.unwrap(); + let result2 = resp2.get("result").unwrap(); + assert_eq!(result2.get("isError").unwrap(), true); + let content2 = result2.get("content").unwrap().as_array().unwrap(); + let text2 = content2[0].get("text").unwrap().as_str().unwrap(); + let parsed2: serde_json::Value = serde_json::from_str(text2).unwrap(); + assert_eq!(parsed2.get("success").unwrap(), false); + assert!( + parsed2.get("error").unwrap().as_str().unwrap().contains("not found") + || parsed2.get("error").unwrap().as_str().unwrap().contains("unreadable") + ); +} + #[tokio::test] async fn test_tools_call_devkit_arxiv_fetch() { let server = build_server(); @@ -240,6 +402,78 @@ async fn test_tools_call_devkit_arxiv_fetch() { assert!(!parsed.get("error").unwrap().as_str().unwrap().is_empty()); } +#[tokio::test] +async fn test_tools_call_devkit_status() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 15, + "method": "tools/call", + "params": { + "name": "devkit_status", + "arguments": {} + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + // Empty registry → overall "fresh" (vacuous truth: all 0 repos are fresh) + assert_eq!(parsed.get("overall").unwrap().as_str().unwrap(), "fresh"); + let repos = parsed.get("repos").unwrap().as_array().unwrap(); + assert!(repos.is_empty()); +} + +#[tokio::test] +async fn test_tools_call_devkit_workflow_list() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 16, + "method": "tools/call", + "params": { + "name": "devkit_workflow_list", + "arguments": {} + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert_eq!(parsed.get("count").unwrap().as_i64().unwrap(), 0); + let workflows = parsed.get("workflows").unwrap().as_array().unwrap(); + assert!(workflows.is_empty()); +} + +#[tokio::test] +async fn test_tools_call_devkit_index() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 17, + "method": "tools/call", + "params": { + "name": "devkit_index", + "arguments": { "path": "" } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + // Empty registry → indexed 0 repos + assert_eq!(parsed.get("indexed").unwrap().as_i64().unwrap(), 0); +} + #[tokio::test] async fn test_tools_call_devkit_skill_list() { let server = build_server(); @@ -663,6 +897,29 @@ async fn test_scenario_one_project_onboarding() { ); } +#[tokio::test] +async fn test_tools_call_devkit_document_convert_not_found() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 18, + "method": "tools/call", + "params": { + "name": "devkit_document_convert", + "arguments": { "source_path": "/nonexistent/file.pdf" } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), false); + let err = parsed.get("error").unwrap().as_str().unwrap(); + assert!(err.contains("not found") || err.contains("Source file")); +} + #[tokio::test] async fn test_scenario_two_semantic_exploration() { let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new()); diff --git a/src/mcp/tools/context.rs b/src/mcp/tools/context.rs index 73ece4d..8e2370c 100644 --- a/src/mcp/tools/context.rs +++ b/src/mcp/tools/context.rs @@ -37,7 +37,9 @@ Returns: JSON object with: - vault_notes: array of linked and keyword-matched notes (id, title, source: "link" or "search") - assets: array of files/folders from the project's assets directory - relations: array of knowledge-graph relations (from relations table) linking this entity to others - - workflows: array of recent workflow executions for this repo"#, + - workflows: array of recent workflow executions for this repo + - known_limits: array of unmitigated known limits (L3 risk layer entries) + - skills: array of available devbase skills"#, "inputSchema": { "type": "object", "properties": { @@ -397,7 +399,47 @@ Returns: JSON object with: } } - anyhow::Ok((repo_json, linked_vaults, modules, symbols, calls, assets, activity, related_symbols, relations, workflows, recent_commits, hot_files)) + // 12. Known limits (unmitigated — highest relevance for active projects) + let mut known_limits = Vec::new(); + match crate::registry::known_limits::list_known_limits(&conn, None, Some(false)) { + Ok(limits) => { + for limit in limits.into_iter().take(20) { + known_limits.push(serde_json::json!({ + "id": limit.id, + "category": limit.category, + "description": limit.description, + "severity": limit.severity, + "source": limit.source, + "first_seen_at": limit.first_seen_at.to_rfc3339(), + })); + } + } + Err(e) => { + tracing::warn!("list_known_limits failed: {}", e); + } + } + + // 13. Available skills (top 20) + let mut skills = Vec::new(); + match crate::skill_runtime::registry::list_skills(&conn, None, None) { + Ok(skill_rows) => { + for s in skill_rows.into_iter().take(20) { + skills.push(serde_json::json!({ + "id": s.id, + "name": s.name, + "version": s.version, + "skill_type": s.skill_type.as_str(), + "description": s.description, + "tags": s.tags, + })); + } + } + Err(e) => { + tracing::warn!("list_skills failed: {}", e); + } + } + + anyhow::Ok((repo_json, linked_vaults, modules, symbols, calls, assets, activity, related_symbols, relations, workflows, recent_commits, hot_files, known_limits, skills)) } }) .await @@ -416,6 +458,8 @@ Returns: JSON object with: workflows, recent_commits, hot_files, + known_limits, + skills, ) = result; Ok(serde_json::json!({ @@ -433,6 +477,8 @@ Returns: JSON object with: "assets": assets, "recent_commits": recent_commits, "hot_files": hot_files, + "known_limits": known_limits, + "skills": skills, })) } } diff --git a/src/mcp/tools/document_convert.rs b/src/mcp/tools/document_convert.rs new file mode 100644 index 0000000..f2c2ef4 --- /dev/null +++ b/src/mcp/tools/document_convert.rs @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +use crate::mcp::McpTool; +use crate::storage::AppContext; +use anyhow::Context; +use std::path::Path; + +#[derive(Clone)] +pub struct DevkitDocumentConvertTool; + +impl McpTool for DevkitDocumentConvertTool { + fn name(&self) -> &'static str { + "devkit_document_convert" + } + + fn schema(&self) -> serde_json::Value { + serde_json::json!({ + "description": r#"Convert PDF/PPTX documents to Markdown text. + +Use this when the user wants to: +- Extract text content from course materials, papers, or slides +- Convert binary documents into editable Markdown for the Vault +- Bulk-process downloaded files before organizing them + +Supported formats: +- PDF (via pdftotext) +- PPTX (via python-pptx) + +Parameters: +- source_path: Absolute path to the source document +- output_path: Optional absolute path for the output Markdown file. Defaults to source_path with .md extension. + +Returns: JSON with output_path, extracted character count, and a quality hint (good / poor)."#, + "inputSchema": { + "type": "object", + "properties": { + "source_path": { "type": "string", "description": "Absolute path to the source document" }, + "output_path": { "type": "string", "description": "Optional absolute path for output Markdown" } + }, + "required": ["source_path"] + } + }) + } + + async fn invoke( + &self, + args: serde_json::Value, + _ctx: &mut AppContext, + ) -> anyhow::Result { + let source_path = args + .get("source_path") + .and_then(|v| v.as_str()) + .context("Missing required argument: source_path")?; + + let source = Path::new(source_path); + anyhow::ensure!(source.exists(), "Source file not found: {}", source_path); + + let output_path = args + .get("output_path") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| source.with_extension("md").to_string_lossy().to_string()); + + let ext = source.extension().and_then(|e| e.to_str()).unwrap_or("").to_lowercase(); + + let (text, quality) = match ext.as_str() { + "pdf" => extract_pdf(source_path).await?, + "pptx" | "ppt" => extract_pptx(source_path).await?, + other => anyhow::bail!("Unsupported file format: '{}' (supported: pdf, pptx)", other), + }; + + let cleaned = cleanup_extracted_text(&text); + let frontmatter = + format!("---\nsource: \"{}\"\nextract_quality: \"{}\"\n---\n\n", source_path, quality); + let md_content = format!("{}{}", frontmatter, cleaned); + + std::fs::write(&output_path, md_content) + .with_context(|| format!("Failed to write output: {}", output_path))?; + + Ok(serde_json::json!({ + "success": true, + "output_path": output_path, + "extracted_chars": text.len(), + "quality": quality, + })) + } +} + +async fn extract_pdf(path: &str) -> anyhow::Result<(String, &'static str)> { + let output = tokio::process::Command::new("pdftotext") + .args(["-", path, "-"]) // read from file, write to stdout + .output() + .await + .context("Failed to spawn pdftotext — is poppler installed?")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!("pdftotext failed: {}", stderr); + } + + let text = String::from_utf8_lossy(&output.stdout).to_string(); + // Heuristic: if output is very short relative to file size, quality is poor + let quality = if text.len() < 200 { "poor" } else { "good" }; + Ok((text, quality)) +} + +async fn extract_pptx(path: &str) -> anyhow::Result<(String, &'static str)> { + let script = format!( + r###" +from pptx import Presentation +import sys +prs = Presentation(r'{}') +lines = [] +for i, slide in enumerate(prs.slides, 1): + lines.append(f"## Slide {{i}}") + for shape in slide.shapes: + if hasattr(shape, "text") and shape.text.strip(): + lines.append(shape.text.strip()) + lines.append("") +print("\n".join(lines)) +"###, + path.replace('\\', "/") + ); + + let output = tokio::process::Command::new("python") + .arg("-c") + .arg(&script) + .output() + .await + .context("Failed to spawn python — is python-pptx installed?")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!("python-pptx extraction failed: {}", stderr); + } + + let text = String::from_utf8_lossy(&output.stdout).to_string(); + let quality = if text.len() < 100 { "poor" } else { "good" }; + Ok((text, quality)) +} + +fn cleanup_extracted_text(text: &str) -> String { + // Collapse 3+ consecutive blank lines to 2 + let mut result = String::new(); + let mut blank_count = 0; + for line in text.lines() { + if line.trim().is_empty() { + blank_count += 1; + if blank_count <= 2 { + result.push('\n'); + } + } else { + blank_count = 0; + result.push_str(line); + result.push('\n'); + } + } + result.trim().to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cleanup_extracted_text() { + let input = "line1\n\n\n\n\nline2\n\nline3"; + let out = cleanup_extracted_text(input); + assert_eq!(out, "line1\n\n\nline2\n\nline3"); + } + + #[test] + fn test_name() { + let tool = DevkitDocumentConvertTool; + assert_eq!(tool.name(), "devkit_document_convert"); + } +} diff --git a/src/mcp/tools/mod.rs b/src/mcp/tools/mod.rs index 3bcf083..cea8769 100644 --- a/src/mcp/tools/mod.rs +++ b/src/mcp/tools/mod.rs @@ -2,6 +2,7 @@ // Copyright (c) 2026 juice094 pub mod brief; pub mod context; +pub mod document_convert; pub mod evaluate; pub mod impact; pub mod index_health; @@ -23,6 +24,7 @@ pub mod search; pub use brief::*; pub use context::*; +pub use document_convert::*; pub use impact::*; pub use index_health::*; pub use known_limit::*;