Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
240 changes: 160 additions & 80 deletions Cargo.lock

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ members = [
"crates/graphify-manifest",
"crates/graphify-multigraph-compat",
"crates/graphify-prs",
"crates/graphify-reflect",
"crates/graphify-report",
"crates/graphify-scip",
"crates/graphify-security",
Expand All @@ -43,7 +44,7 @@ license = "Apache-2.0"
publish = false
repository = "https://github.com/bunkerlab-net/graphify"
rust-version = "1.95"
version = "0.8.44"
version = "0.8.49"

[workspace.dependencies]
anyhow = "1"
Expand All @@ -55,13 +56,10 @@ chrono = { version = "0.4", default-features = false, features = [
clap = { version = "4", features = ["derive", "env"] }
hex = "0.4"
htmlescape = "0.3"
idna = "1"
ignore = "0.4"
indexmap = { version = "2", features = ["serde"] }
mockito = "1"
once_cell = "1"
percent-encoding = "2"
petgraph = { version = "0.8", features = ["serde-1"] }
rayon = "1"
regex = "1"
serde = { version = "1", features = ["derive"] }
Expand Down Expand Up @@ -100,6 +98,7 @@ graphify-llm = { path = "crates/graphify-llm" }
graphify-manifest = { path = "crates/graphify-manifest" }
graphify-multigraph-compat = { path = "crates/graphify-multigraph-compat" }
graphify-prs = { path = "crates/graphify-prs" }
graphify-reflect = { path = "crates/graphify-reflect" }
graphify-report = { path = "crates/graphify-report" }
graphify-scip = { path = "crates/graphify-scip" }
graphify-security = { path = "crates/graphify-security" }
Expand Down Expand Up @@ -137,6 +136,7 @@ path = "src/main.rs"

[dependencies]
anyhow = { workspace = true }
chrono = { workspace = true }
clap = { workspace = true }
graphify-affected = { workspace = true }
graphify-analyze = { workspace = true }
Expand All @@ -155,6 +155,7 @@ graphify-html = { workspace = true }
graphify-ingest = { workspace = true }
graphify-llm = { workspace = true }
graphify-prs = { workspace = true }
graphify-reflect = { workspace = true }
graphify-report = { workspace = true }
graphify-security = { workspace = true }
graphify-serve = { workspace = true }
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ a Rust equivalent, and outputs are byte-identical where the test suite asserts i
(`.razor`, `.cshtml`) for package, project-reference, target-framework, and `@code` extraction,
Verilog/SystemVerilog (`.v`, `.sv`, `.svh`), BYOND DreamMaker
(`.dm`, `.dme` source plus `.dmi` icon sheets, `.dmm` maps, and `.dmf` interface forms),
CUDA (`.cu`, `.cuh`) routed through the C++ extractor,
and MCP config files (`.mcp.json`,
`claude_desktop_config.json`, `mcp.json`, `mcp_servers.json`) — servers, commands, packages,
and env-var _names_ (values are never read).
Expand All @@ -75,14 +76,18 @@ a Rust equivalent, and outputs are byte-identical where the test suite asserts i
- **Structural introspection** — `graphify extract --cargo` adds `crate -> crate` dependency edges from `Cargo.toml`
manifests; `--postgres <DSN>` adds a live PostgreSQL schema (requires the `postgres` build feature).
- **LLM community naming** — `graphify label` (or `cluster-only`) auto-names graph communities with the configured
backend; degrades to `Community N` placeholders when no backend is available.
backend, fanning out batches in parallel (`--max-concurrency`, `--batch-size`); degrades to `Community N`
placeholders when no backend is available.
- **AI-assistant integration** — drop-in installers for Claude Code, CodeBuddy, Codex, Amp, Cursor, Gemini CLI,
GitHub Copilot, VS Code, OpenCode, Aider, Factory Droid, Trae, Hermes, Kiro, Kilo Code, Pi, Devin CLI,
Google Antigravity, and more.
- **MCP server** for any MCP-capable assistant (`graphify serve`) — stdio by default, or Streamable HTTP
(`--transport http`, requires the `http` build feature) so one shared process can host the graph for a team.
- **Git hooks + merge driver** so two branches editing the same `graph.json` produce a union-merged result.
- **Cross-repo global graph** — aggregate every project you care about into one `~/.graphify/global-graph.json`.
- **Work memory** — `graphify save-result` records Q&A outcomes under `graphify-out/memory/`, and `graphify reflect`
aggregates them into a deterministic `reflections/LESSONS.md` lessons doc (refreshed automatically by the
post-commit/post-checkout hooks).
- **Deterministic outputs** — same inputs on the same machine produce byte-identical JSON.

## Install
Expand Down Expand Up @@ -146,7 +151,7 @@ For development conventions (lint policy, porting rules, test layout, definition
```text
graphify/
├── src/ # graphify CLI binary
├── crates/ # 29 focused workspace crates
├── crates/ # 30 focused workspace crates
│ ├── graphify-detect/ # filesystem walking + file-type detection
│ ├── graphify-extract/ # tree-sitter / document / media extractors
│ ├── graphify-build/ # graph construction
Expand All @@ -167,6 +172,7 @@ graphify/
│ ├── graphify-multigraph-compat/ # runtime keyed-edge capability probe
│ ├── graphify-scip/ # SCIP-style JSON ingest
│ ├── graphify-semantic/ # LLM extraction fragment validator
│ ├── graphify-reflect/ # work-memory reflection (LESSONS.md aggregator)
│ └── ... # benchmark, cache, dedup, ingest, manifest, transcribe, validate, watch, google
└── graphify-py/ # read-only git submodule — Python reference
```
Expand Down
48 changes: 42 additions & 6 deletions USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ graphify-out/
Optional output lands under `graphify-out/` only when you opt in: `wiki/`
(`graphify export wiki`), `GRAPH_TREE.html` (`graphify tree`), `cypher.txt`
(`graphify export neo4j`), `<YYYY-MM-DD>/` backups (created automatically when
`graph.json` is overwritten), and `memory/` (Q&A saved by `graphify save-result`).
`graph.json` is overwritten), `memory/` (Q&A saved by `graphify save-result`),
and `reflections/LESSONS.md` (aggregated work-memory lessons from `graphify reflect`).

This is the Rust reimplementation of the Python `graphify` reference; the CLI surface is 1:1 with `python -m graphify`.

Expand Down Expand Up @@ -117,7 +118,9 @@ and **Markdown links** (inline, reference-style, and `[[wikilinks]]`) as
`references` edges, so a hub doc (`index.md`, a table of contents) connects to the
documents it links instead of being an orphan (#1376). Swift `import` targets
become shared `type=module` anchor nodes and cross-file member calls
(`recv.method()`) resolve through the file's local type table (#1327, #1356).
(`recv.method()`) resolve through the file's local type table (#1327, #1356);
Python `ClassName.method()` calls resolve to the class-qualified method node
across files (#1446). CUDA sources (`.cu`, `.cuh`) are extracted through the C++ pass.

Optional LLM-driven semantic extraction is wired through `--backend`/`--model`/`--mode`/`--token-budget`/
`--max-concurrency`/`--api-timeout`/`--max-workers` (see `graphify extract --help` and the
Expand Down Expand Up @@ -164,7 +167,8 @@ Rerun clustering on an existing `graph.json` and regenerate the report and HTML
parameters or when you only want to refresh `GRAPH_REPORT.md`.

When no `.graphify_labels.json` exists yet, `cluster-only` auto-names communities with the configured LLM backend
in a single batched call, falling back to `Community N` placeholders if no backend is configured or the call fails.
in batched calls (fanned out in parallel — tune with `--max-concurrency` / `--batch-size`), falling back to
`Community N` placeholders if no backend is configured or the call fails.
An existing labels file is preserved (re-run `graphify label` to force a refresh).

```bash
Expand All @@ -173,6 +177,8 @@ graphify cluster-only . --no-viz # skip graph.html (saves time
graphify cluster-only . --graph other/graph.json # use a non-default graph location
graphify cluster-only . --no-label # keep "Community N" placeholders (skip LLM naming)
graphify cluster-only . --backend openai # backend to use for naming (default: auto-detect)
graphify cluster-only . --max-concurrency 8 # parallel LLM naming batches (default 4)
graphify cluster-only . --batch-size 50 # communities per LLM call (default 100)
```

### `label <path>`
Expand All @@ -184,6 +190,8 @@ graphify cluster-only . --backend openai # backend to use for naming (
graphify label . # re-name with the auto-detected backend
graphify label . --backend gemini # force a specific backend
graphify label . --no-viz # skip graph.html regeneration
graphify label . --max-concurrency 8 # parallel LLM naming batches (default 4)
graphify label . --batch-size 50 # communities per LLM call (default 100)
```

If no backend is configured (no API key), `label` degrades to `Community N` placeholders and prints a hint.
Expand Down Expand Up @@ -304,15 +312,38 @@ graphify explain "AuthMiddleware"
Save a Q&A result back into `graphify-out/memory/` so it gets re-extracted into the graph on the next `update`
(the feedback loop). Files under `graphify-out/memory/` are always detected: they bypass `.gitignore` /
`.graphifyignore` filtering, so a broad ignore pattern (e.g. `*.md`) can't silently erase generated memory notes.
Pass `--outcome useful|dead_end|corrected` (and `--correction "<what worked>"` for the `corrected` case) to record
a work-memory signal that `graphify reflect` later aggregates into `LESSONS.md`. An out-of-set `--outcome` is rejected.

```bash
graphify save-result \
--question "how is auth scoped" \
--answer "AuthMiddleware checks tenant_id from JWT and binds it to the request context" \
--type query \
--nodes AuthMiddleware request_context
--nodes AuthMiddleware request_context \
--outcome useful
```

### `reflect`

Aggregate the work-memory outcomes saved under `graphify-out/memory/` into a single deterministic
`graphify-out/reflections/LESSONS.md`. Each `save-result --outcome` signal is time-decayed (a signal's weight
halves every 30 days by default), and nodes are bucketed into **preferred** (corroborated by ≥2 useful sessions),
**tentative** (seen once), and **contested** (mixed signals — the most recent verdict wins). Dead ends and
corrections are listed so the next session avoids re-deriving them. When a `graph.json` is present, lessons are
grouped by community.

```bash
graphify reflect # writes graphify-out/reflections/LESSONS.md
graphify reflect --if-stale # skip when LESSONS.md is already newer than every input
graphify reflect --half-life-days 14 # signals decay twice as fast
graphify reflect --min-corroboration 3 # require 3 useful sessions to promote a node to "preferred"
graphify reflect --out custom/LESSONS.md # write the lessons doc elsewhere
```

The post-commit / post-checkout hooks refresh `LESSONS.md` automatically after each rebuild when saved outcomes
exist, so the lessons doc stays current without a manual run.

### `affected "<query>"`

Reverse-traversal impact analysis: given a node label / ID / source-file substring, enumerate every node that
Expand Down Expand Up @@ -564,12 +595,17 @@ available and is untouched by the project flag.
graphify install --platform claude # same as `graphify claude install`
graphify install claude # positional shorthand also accepted
graphify install --platform kimi # Kimi CLI → ~/.kimi/skills/graphify/SKILL.md (no dedicated subcommand)
graphify install --platform agents # cross-framework Agent-Skills → ~/.agents/skills/graphify/SKILL.md
graphify agents install # same target; `graphify skills install` is an accepted alias
graphify agents install --project # ./.agents/skills/graphify/SKILL.md + AGENTS.md section
graphify uninstall # removes graphify from every detected platform
graphify uninstall --purge # also deletes graphify-out/
```

The aggregate `install` is a convenience dispatcher to the per-platform installer; the aggregate `uninstall` scans
every supported platform and removes the integration wherever it finds one.
every supported platform and removes the integration wherever it finds one. The `agents` platform (aliased as
`skills`) targets the cross-framework Agent-Skills location — `~/.agents/skills/graphify/SKILL.md` globally, or
`./.agents/skills/graphify/SKILL.md` plus an `AGENTS.md` section under `--project`.

### `hook-check`

Expand Down Expand Up @@ -629,7 +665,7 @@ completes the feature.)

| Variable | Effect |
| -------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- |
| `GRAPHIFY_OUT` | Override the output directory name (default `graphify-out`). |
| `GRAPHIFY_OUT` | Override the output directory (default `graphify-out`); a relative name or an absolute path, honoured everywhere. |
| `GRAPHIFY_FORCE` | Same effect as `--force` on `update`. |
| `GRAPHIFY_VIZ_NODE_LIMIT` | Cap nodes before HTML export is skipped (default 5000). |
| `GRAPHIFY_GOOGLE_WORKSPACE` | Truthy value enables `.gdoc/.gsheet/.gslides` export by default. |
Expand Down
27 changes: 21 additions & 6 deletions crates/graphify-build/src/build_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,31 @@ pub fn build_from_json(
);
}

if let Some(hyperedges) = extraction
.as_object()
.and_then(|o| o.get("hyperedges"))
.cloned()
&& let Some(arr) = hyperedges.as_array()
if let Some(arr) = extraction
.as_object_mut()
.and_then(|o| o.get_mut("hyperedges"))
.and_then(Value::as_array_mut)
&& !arr.is_empty()
{
// Relativize hyperedge source_file the same way nodes and edges are, so
// `to_json` — which writes `graph.hyperedges` verbatim and has no root —
// never leaks an absolute path from a semantic subagent (#1418).
for he in arr.iter_mut() {
let Some(map) = he.as_object_mut() else {
continue;
};
let Some(sf) = map.get("source_file").and_then(Value::as_str) else {
continue;
};
if sf.is_empty() {
continue;
}
let normalized = norm_source_file(sf, root_str.as_deref());
map.insert("source_file".to_string(), Value::String(normalized));
}
graph
.graph_attrs
.insert("hyperedges".to_string(), hyperedges);
.insert("hyperedges".to_string(), Value::Array(arr.clone()));
}

Ok(graph)
Expand Down
78 changes: 78 additions & 0 deletions crates/graphify-build/tests/parity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,84 @@ fn build_from_json_relative_source_file_unchanged() {
);
}

#[test]
fn build_from_json_relativizes_hyperedge_source_file() {
// #1418: hyperedge source_file must be relativized like nodes and edges, so
// `to_json` — which writes `graph.hyperedges` verbatim and has no root —
// never leaks an absolute path from a semantic subagent.
let tmp = tempfile::tempdir().expect("tempdir");
let base = tmp.path().canonicalize().expect("canonicalize");
let abs_doc = base.join("docs").join("CLAUDE.md");
let abs_str = abs_doc.to_string_lossy().into_owned();
let ext = json!({
"nodes": [
{"id": "a", "label": "A", "file_type": "document", "source_file": abs_str.clone()},
],
"edges": [],
"hyperedges": [
{"id": "arch", "label": "Architecture", "nodes": ["a"],
"relation": "participate_in", "confidence": "INFERRED",
"confidence_score": 0.75, "source_file": abs_str},
],
});
let g = build_from_json(ext, false, Some(&base)).expect("build");
let he = g
.graph_attrs
.get("hyperedges")
.and_then(Value::as_array)
.and_then(|a| a.first())
.expect("hyperedge present");
assert_eq!(
he.get("source_file").and_then(Value::as_str),
Some("docs/CLAUDE.md")
);
// Anchor: the node path is relativized the same way (the contract this mirrors).
assert_eq!(
g.node_data("a")
.and_then(|a| a.get("source_file"))
.and_then(Value::as_str),
Some("docs/CLAUDE.md")
);
}

#[test]
fn build_from_json_skips_non_hashable_node_id() {
// A malformed LLM extraction can emit a list-valued id; build_from_json must
// skip it and still build the graph from the well-formed nodes.
let ext = json!({
"nodes": [
{"id": "a", "label": "A", "file_type": "code", "source_file": "a.py"},
{"id": ["x", "y"], "label": "B", "file_type": "code", "source_file": "b.py"},
{"label": "C", "file_type": "code", "source_file": "c.py"},
],
"edges": [],
});
let g = build_from_json(ext, false, None).expect("build");
let ids: std::collections::BTreeSet<String> = g.nodes().map(|(id, _)| id.clone()).collect();
assert_eq!(ids, ["a".to_string()].into_iter().collect());
}

#[test]
fn build_from_json_skips_edge_with_non_hashable_endpoint() {
// A list-valued edge endpoint must be skipped; the well-formed edge survives.
let ext = json!({
"nodes": [
{"id": "a", "label": "A", "file_type": "code", "source_file": "a.py"},
{"id": "b", "label": "B", "file_type": "code", "source_file": "b.py"},
],
"edges": [
{"source": "a", "target": ["b", "c"], "relation": "calls",
"confidence": "INFERRED", "source_file": "a.py"},
{"source": "a", "target": "b", "relation": "imports",
"confidence": "EXTRACTED", "source_file": "a.py"},
],
});
let g = build_from_json(ext, false, None).expect("build");
assert_eq!(g.node_count(), 2);
assert_eq!(g.edge_count(), 1);
assert!(g.edge_data("a", "b").is_some());
}

#[test]
fn build_merge_preserves_call_edge_direction() {
// #760: build_merge must read source/target verbatim, not re-derive edge
Expand Down
1 change: 1 addition & 0 deletions crates/graphify-cache/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ rust-version.workspace = true
version.workspace = true

[dependencies]
graphify-security = { workspace = true }
hex = { workspace = true }
indexmap = { workspace = true }
serde = { workspace = true }
Expand Down
8 changes: 1 addition & 7 deletions crates/graphify-cache/src/paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,6 @@ pub const EXTRACTOR_VERSION: &str = env!("CARGO_PKG_VERSION");
static CLEANED_AST_DIRS: LazyLock<Mutex<HashSet<String>>> =
LazyLock::new(|| Mutex::new(HashSet::new()));

/// Output directory name; defaults to `"graphify-out"` and respects the
/// `GRAPHIFY_OUT` environment variable override.
pub(crate) fn graphify_out() -> String {
std::env::var("GRAPHIFY_OUT").unwrap_or_else(|_| "graphify-out".to_string())
}

/// Resolve the absolute path to the graphify output directory relative to
/// `root`.
///
Expand All @@ -34,7 +28,7 @@ pub(crate) fn graphify_out() -> String {
/// downstream `fs::create_dir_all` call in [`cache_dir`] will surface
/// the underlying I/O error if the path is unusable.
pub(crate) fn out_base(root: &Path) -> PathBuf {
let out = PathBuf::from(graphify_out());
let out = graphify_security::graphify_out();
if out.is_absolute() {
out
} else {
Expand Down
6 changes: 4 additions & 2 deletions crates/graphify-detect/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ version.workspace = true
[dependencies]
calamine = "0.35"
graphify-google = { workspace = true }
graphify-security = { workspace = true }
hex = { workspace = true }
ignore_walk = { version = "0.4", package = "ignore" }
indexmap = { workspace = true }
lopdf = "0.41"
lopdf = "0.42"
md5 = "0.8"
quick-xml = "0.40"
rayon = { workspace = true }
Expand All @@ -29,7 +30,8 @@ unicode-normalization = { workspace = true }
zip = { version = "8", default-features = false, features = ["deflate"] }

[dev-dependencies]
lopdf = "0.41"
lopdf = "0.42"
serial_test = { workspace = true }
tempfile = { workspace = true }

[lints]
Expand Down
Loading
Loading