nablarch · kiyotis · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/.claude/skills/nabledge-6/scripts/bm25-search.sh b/.claude/skills/nabledge-6/scripts/bm25-search.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# BM25 pre-search over nabledge-6 knowledge files.
+#
+# - Builds a bm25s index from all knowledge/*.json section titles+content on first run
+# - Saves index to scripts/.bm25-index/; reloads on subsequent runs
+# - Detects staleness by comparing index mtime to newest JSON mtime; rebuilds if stale
+# - Returns top-20 sections by BM25 score
+#
+# Arguments: one or more search terms
+# Output: JSON array to stdout — [{file, section_id, section_title, score}, ...]
+#         Empty array [] if no hits or no arguments.
+# Exit code: non-zero on error (missing bm25s, index build failure, etc.)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+SKILL_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+KNOWLEDGE_DIR="${KNOWLEDGE_DIR:-$SKILL_DIR/knowledge}"
+INDEX_DIR="$SCRIPT_DIR/.bm25-index"
+
+if [ $# -eq 0 ]; then
+  echo "[]"
+  exit 0
+fi
+
+python3 - "$KNOWLEDGE_DIR" "$INDEX_DIR" "$@" << 'PYEOF'
+import sys
+import json
+import os
+import time
+from pathlib import Path
+
+knowledge_dir = Path(sys.argv[1])
+index_dir = Path(sys.argv[2])
+terms = sys.argv[3:]
+
+if not terms:
+    print(json.dumps([], ensure_ascii=False))
+    sys.exit(0)
+
+try:
+    import bm25s
+    import numpy as np
+except ImportError as e:
+    print(f"Error: {e}. Run: pip install bm25s", file=sys.stderr)
+    sys.exit(1)
+
+INDEX_FILE = index_dir / "index.bm25s"
+META_FILE = index_dir / "meta.json"
+
+
+# Token pattern: match sequences of word characters including CJK characters and underscores.
+# \w+ covers ASCII identifiers; 　-鿿 covers CJK ideographs and common Japanese ranges.
+# This is used as the token_pattern for bm25s.tokenize.
+TOKEN_PATTERN = r'(?u)[\w　-鿿]+'
+
+
+def get_newest_json_mtime(knowledge_dir: Path) -> float:
+    """Return the mtime of the most recently modified JSON file."""
+    mtimes = [p.stat().st_mtime for p in knowledge_dir.rglob("*.json")]
+    return max(mtimes) if mtimes else 0.0
+
+
+def load_sections(knowledge_dir: Path) -> tuple[list[dict], list[str]]:
+    """Load all sections from knowledge JSON files. Returns (section_meta, corpus)."""
+    section_meta = []
+    corpus = []
+    for json_path in sorted(knowledge_dir.rglob("*.json")):
+        try:
+            data = json.loads(json_path.read_text(encoding="utf-8"))
+        except Exception:
+            continue
+        if data.get("no_knowledge_content"):
+            continue
+        rel_path = json_path.relative_to(knowledge_dir).as_posix()
+        for sec in data.get("sections", []):
+            sid = sec.get("id", "")
+            if not sid:
+                continue
+            title = sec.get("title", "")
+            content = sec.get("content", "")
+            text = f"{title} {content}"
+            section_meta.append({
+                "file": rel_path,
+                "section_id": sid,
+                "section_title": title,
+            })
+            corpus.append(text)
+    return section_meta, corpus
+
+
+def build_index(knowledge_dir: Path, index_dir: Path) -> tuple[object, list[dict]]:
+    """Build and save BM25 index. Returns (retriever, section_meta)."""
+    section_meta, corpus = load_sections(knowledge_dir)
+    if not corpus:
+        raise RuntimeError("No sections found in knowledge directory")
+
+    tokenized = bm25s.tokenize(corpus, token_pattern=TOKEN_PATTERN, stopwords=None, show_progress=False)
+    retriever = bm25s.BM25()
+    retriever.index(tokenized, show_progress=False)
+
+    index_dir.mkdir(parents=True, exist_ok=True)
+    retriever.save(str(INDEX_FILE))
+
+    newest_mtime = get_newest_json_mtime(knowledge_dir)
+    meta = {
+        "section_meta": section_meta,
+        "built_at": time.time(),
+        "newest_json_mtime": newest_mtime,
+        "count": len(section_meta),
+    }
+    META_FILE.write_text(json.dumps(meta, ensure_ascii=False), encoding="utf-8")
+
+    return retriever, section_meta
+
+
+def is_index_stale(knowledge_dir: Path) -> bool:
+    """Return True if the index needs to be rebuilt."""
+    if not INDEX_FILE.exists() or not META_FILE.exists():
+        return True
+    try:
+        meta = json.loads(META_FILE.read_text(encoding="utf-8"))
+        saved_mtime = meta.get("newest_json_mtime", 0)
+        current_mtime = get_newest_json_mtime(knowledge_dir)
+        return current_mtime > saved_mtime
+    except Exception:
+        return True
+
+
+def load_index(index_dir: Path) -> tuple[object, list[dict]]:
+    """Load saved BM25 index and metadata."""
+    retriever = bm25s.BM25.load(str(INDEX_FILE), load_corpus=False)
+    meta = json.loads(META_FILE.read_text(encoding="utf-8"))
+    return retriever, meta["section_meta"]
+
+
+# Build or reload index
+if is_index_stale(knowledge_dir):
+    retriever, section_meta = build_index(knowledge_dir, index_dir)
+else:
+    retriever, section_meta = load_index(index_dir)
+
+if not section_meta:
+    print(json.dumps([], ensure_ascii=False))
+    sys.exit(0)
+
+# Search
+query = " ".join(terms)
+tokenized_query = bm25s.tokenize([query], token_pattern=TOKEN_PATTERN, stopwords=None, show_progress=False)
+top_k = min(20, len(section_meta))
+results, scores = retriever.retrieve(tokenized_query, k=top_k, show_progress=False)
+
+# results shape: (n_queries, k), scores shape: (n_queries, k)
+output = []
+for idx, score in zip(results[0], scores[0]):
+    if score <= 0:
+        continue
+    meta = section_meta[int(idx)]
+    output.append({
+        "file": meta["file"],
+        "section_id": meta["section_id"],
+        "section_title": meta["section_title"],
+        "score": round(float(score), 4),
+    })
+
+print(json.dumps(output, ensure_ascii=False, indent=2))
+PYEOF
diff --git a/.claude/skills/nabledge-6/scripts/fts-hints.md b/.claude/skills/nabledge-6/scripts/fts-hints.md
@@ -0,0 +1,53 @@
+# FTS Hints
+
+## component/libraries
+
+### libraries-authorization-permission-check
+### libraries-bean-util
+### libraries-bean-validation
+### libraries-code
+### libraries-create-example
+### libraries-data-bind
+### libraries-data-converter
+### libraries-data-format
+### libraries-data-io-functional-comparison
+### libraries-database-functional-comparison
+### libraries-database-management
+### libraries-database
+### libraries-date
+### libraries-db-double-submit
+### libraries-exclusive-control
+### libraries-failure-log
+### libraries-file-path-management
+### libraries-format-definition
+### libraries-format
+### libraries-generator
+### libraries-http-access-log
+### libraries-http-system-messaging
+### libraries-jaxrs-access-log
+### libraries-libraries-permission-check
+### libraries-libraries
+### libraries-log
+### libraries-mail
+### libraries-message
+### libraries-messaging-log
+### libraries-mom-system-messaging
+### libraries-multi-format-example
+### libraries-nablarch-validation
+### libraries-performance-log
+### libraries-repository
+### libraries-role-check
+### libraries-service-availability
+### libraries-session-store
+### libraries-sql-log
+### libraries-stateless-web-app
+### libraries-static-data-cache
+### libraries-system-messaging
+### libraries-tag-reference
+### libraries-tag
+### libraries-transaction
+### libraries-universal-dao
+### libraries-update-example
+### libraries-utility
+### libraries-validation-functional-comparison
+### libraries-validation
diff --git a/.claude/skills/nabledge-6/workflows/check-answerable.md b/.claude/skills/nabledge-6/workflows/check-answerable.md
@@ -0,0 +1,59 @@
+# Check Answerable Workflow
+
+Judges whether the provided sections contain sufficient information to answer the question, without generating an answer.
+
+## Input
+
+- `{question}`: User's question (natural Japanese text).
+- `{sections}`: Array of section pointers in `{"file": "...", "section_id": "...", "relevance": "..."}` format.
+
+## Output
+
+```json
+{"result": "OK"}
+```
+
+or
+
+```json
+{"result": "NG"}
+```
+
+---
+
+## Step 1: Check for empty sections
+
+If `{sections}` is empty, return `{"result": "NG"}` immediately.
+
+---
+
+## Step 2: Read section content
+
+From `{sections}`, select up to 10 sections to read:
+1. All `high` sections first
+2. Then `partial` sections to fill remaining slots
+
+Build the argument list: for each selected section, `"{file}:{section_id}"`.
+
+```bash
+bash scripts/read-sections.sh "file1.json:s1" "file2.json:s3" ...
+```
+
+Save the output as `sections_content`.
+
+---
+
+## Step 3: Judge answerability
+
+Read `sections_content` and `{question}`.
+
+Judge: Do the sections contain a concrete implementation method, required configuration, or explicit specification that directly answers the question?
+
+**Return `{"result": "OK"}`** if the sections contain sufficient information to write a complete, supported answer to the question.
+
+**Return `{"result": "NG"}`** if:
+- The sections are not relevant to the question
+- The sections mention the topic but lack the concrete detail needed for a complete answer
+- The sections contain only conceptual background without actionable specifics
+
+Do not generate an answer — only judge sufficiency.
diff --git a/.claude/skills/nabledge-6/workflows/full-text-search.md b/.claude/skills/nabledge-6/workflows/full-text-search.md
@@ -0,0 +1,108 @@
+# Full-Text Search Workflow
+
+BM25 keyword search. Returns matching sections.
+
+## Input
+
+- `{question}`: User's question text.
+
+## Output
+
+```json
+{"selected_sections": [...]}
+```
+
+or
+
+```json
+{"selected_sections": []}
+```
+
+Empty array when no BM25 terms found, script returns no hits, or script exits non-zero.
+
+---
+
+## Step 1: Extract BM25 search terms using page title lookup
+
+Read the FTS hints file to get the current page title list:
+
+```bash
+cat scripts/fts-hints.md
+```
+
+Use this list as a translation table to map the question's concepts to Nablarch-specific terms.
+
+**Process**:
+
+1. Read the question and identify its topic (e.g., "バリデーション", "セッション", "DB操作", "ファイル入出力").
+2. Scan the page title list and find titles whose terms relate to that topic.
+3. From the matching titles, extract BM25 search terms:
+   - Use hyphen-separated parts of the title (e.g., `libraries-bean-validation` → `bean-validation`)
+   - Or the full filename (e.g., `handlers-SessionStoreHandler` → `SessionStoreHandler`)
+4. Also include any concrete identifiers that appear **verbatim** in the question (class name, annotation, method name, configuration file name).
+
+**Do NOT extract** broad words from the question:
+- Abstract concepts: `バリデーション`, `トランザクション`, `ハンドラ`, `データ変換`
+- General Java terms: `List`, `String`, `Exception`, `try-catch`
+- Natural language filler: `使い方`, `方法`, `について`, `実装`
+
+**Examples**:
+
+| Question | Matching titles | BM25 terms to use |
+|---|---|---|
+| RESTのバリデーション実装を教えて | `libraries-bean-validation`, `handlers-jaxrs-bean-validation-handler` | `bean-validation`, `jaxrs-bean-validation` |
+| セッションのストア選択基準を知りたい | `libraries-session-store`, `handlers-SessionStoreHandler` | `session-store`, `SessionStoreHandler` |
+| UniversalDaoで検索する方法 | `libraries-universal-dao` (and verbatim: `UniversalDao`) | `universal-dao`, `UniversalDao` |
+| ファイル入出力の実装方法 | `libraries-data-format`, `libraries-data-bind`, `libraries-data-io-functional-comparison` | `data-format`, `data-bind` |
+
+Save the extracted terms as `bm25_terms` (list of strings).
+
+**If `bm25_terms` is empty** (no relevant titles found and no verbatim identifiers in the question), return `{"selected_sections": []}` immediately.
+
+---
+
+## Step 2: BM25 search
+
+Execute the BM25 search script with the extracted terms:
+
+```bash
+bash scripts/bm25-search.sh <term1> [term2] ...
+```
+
+Replace `<term1>`, `[term2]` etc. with the terms from `bm25_terms`.
+
+The script outputs a JSON array. Each element is a section hit with a BM25 score:
+
+```json
+[
+  {
+    "file": "component/libraries/universal-dao.json",
+    "section_id": "s3",
+    "section_title": "batchUpdateメソッドの使い方",
+    "score": 12.45
+  },
+  ...
+]
+```
+
+**If the output array is empty** (`[]`), return `{"selected_sections": []}`.
+
+**If the script exits non-zero** for any reason (index build failure, missing dependency, unexpected error), return `{"selected_sections": []}`.
+
+Otherwise, save the array as `bm25_raw`. Take the top 20 entries by score.
+
+Convert to the `selected_sections` format:
+
+```json
+[
+  {"file": "component/libraries/universal-dao.json", "section_id": "s3", "relevance": "high"}
+]
+```
+
+All BM25 hits use `"relevance": "high"`.
+
+Return:
+
+```json
+{"selected_sections": [...]}
+```