SpecterOps · kpom-specter · Apr 1, 2026 · Mar 24, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,7 @@
 *.iml
 
 # User IDE Files
-.idea
+.idea
+
+# Local integration test datasets
+integration/testdata/local/
diff --git a/cmd/benchmark/README.md b/cmd/benchmark/README.md
@@ -0,0 +1,80 @@
+# Benchmark
+
+Runs query scenarios against a real database and outputs a markdown timing table.
+
+## Usage
+
+```bash
+# All small datasets
+go run ./cmd/benchmark -connection "postgresql://dawgs:dawgs@localhost:5432/dawgs"
+
+# Single dataset
+go run ./cmd/benchmark -connection "..." -dataset diamond
+
+# Local dataset (not committed to repo)
+go run ./cmd/benchmark -connection "..." -dataset local/phantom
+
+# All small datasets + local dataset
+go run ./cmd/benchmark -connection "..." -local-dataset local/phantom
+
+# Neo4j
+go run ./cmd/benchmark -driver neo4j -connection "neo4j://neo4j:password@localhost:7687"
+
+# Save to file
+go run ./cmd/benchmark -connection "..." -output report.md
+```
+
+## Flags
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `-driver` | `pg` | Database driver (`pg`, `neo4j`) |
+| `-connection` | | Connection string (or `PG_CONNECTION_STRING` env) |
+| `-iterations` | `10` | Timed iterations per scenario |
+| `-dataset` | | Run only this dataset |
+| `-local-dataset` | | Add a local dataset to the default set |
+| `-dataset-dir` | `integration/testdata` | Path to testdata directory |
+| `-output` | stdout | Markdown output file |
+
+## Example: Neo4j on local/phantom
+
+```
+$ go run ./cmd/benchmark -driver neo4j -connection "neo4j://neo4j:testpassword@localhost:7687" -dataset local/phantom
+```
+
+| Query | Dataset | Median | P95 | Max |
+|-------|---------|-------:|----:|----:|
+| Match Nodes | local/phantom | 1.2ms | 1.3ms | 1.3ms |
+| Match Edges | local/phantom | 1.3ms | 2.2ms | 2.2ms |
+| Filter By Kind / User | local/phantom | 2.7ms | 4.5ms | 4.5ms |
+| Filter By Kind / Group | local/phantom | 2.7ms | 3.1ms | 3.1ms |
+| Filter By Kind / Computer | local/phantom | 1.6ms | 1.8ms | 1.8ms |
+| Traversal Depth / depth 1 | local/phantom | 1.3ms | 2.0ms | 2.0ms |
+| Traversal Depth / depth 2 | local/phantom | 1.4ms | 2.0ms | 2.0ms |
+| Traversal Depth / depth 3 | local/phantom | 2.5ms | 4.0ms | 4.0ms |
+| Edge Kind Traversal / MemberOf | local/phantom | 1.3ms | 1.3ms | 1.3ms |
+| Edge Kind Traversal / GenericAll | local/phantom | 1.2ms | 1.4ms | 1.4ms |
+| Edge Kind Traversal / HasSession | local/phantom | 1.1ms | 1.2ms | 1.2ms |
+| Shortest Paths / 41 -> 587 | local/phantom | 1.6ms | 1.9ms | 1.9ms |
+
+## Example: PG on local/phantom
+
+```
+$ export PG_CONNECTION_STRING="postgresql://dawgs:dawgs@localhost:5432/dawgs"
+$ go run ./cmd/benchmark -dataset local/phantom
+```
+
+| Query | Dataset | Median | P95 | Max |
+|-------|---------|-------:|----:|----:|
+| Match Nodes | local/phantom | 2.0ms | 4.4ms | 4.4ms |
+| Match Edges | local/phantom | 411ms | 457ms | 457ms |
+| Filter By Kind / User | local/phantom | 2.2ms | 3.3ms | 3.3ms |
+| Filter By Kind / Group | local/phantom | 2.9ms | 3.3ms | 3.3ms |
+| Filter By Kind / Computer | local/phantom | 1.4ms | 2.0ms | 2.0ms |
+| Traversal Depth / depth 1 | local/phantom | 585ms | 631ms | 631ms |
+| Traversal Depth / depth 2 | local/phantom | 661ms | 696ms | 696ms |
+| Traversal Depth / depth 3 | local/phantom | 743ms | 779ms | 779ms |
+| Edge Kind Traversal / MemberOf | local/phantom | 617ms | 670ms | 670ms |
+| Edge Kind Traversal / GenericAll | local/phantom | 702ms | 755ms | 755ms |
+| Edge Kind Traversal / HasSession | local/phantom | 680ms | 729ms | 729ms |
+| Shortest Paths / 41 -> 587 | local/phantom | 703ms | 765ms | 765ms |
diff --git a/cmd/benchmark/main.go b/cmd/benchmark/main.go
@@ -0,0 +1,219 @@
+// Copyright 2026 Specter Ops, Inc.
+//
+// Licensed under the Apache License, Version 2.0
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//	http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package main
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+	"time"
+
+	"github.com/specterops/dawgs"
+	"github.com/specterops/dawgs/drivers/pg"
+	"github.com/specterops/dawgs/graph"
+	"github.com/specterops/dawgs/opengraph"
+	"github.com/specterops/dawgs/util/size"
+
+	_ "github.com/specterops/dawgs/drivers/neo4j"
+)
+
+func main() {
+	var (
+		driver       = flag.String("driver", "pg", "database driver (pg, neo4j)")
+		connStr      = flag.String("connection", "", "database connection string (or PG_CONNECTION_STRING)")
+		iterations   = flag.Int("iterations", 10, "timed iterations per scenario")
+		output     = flag.String("output", "", "markdown output file (default: stdout)")
+		datasetDir = flag.String("dataset-dir", "integration/testdata", "path to testdata directory")
+		localDataset = flag.String("local-dataset", "", "additional local dataset (e.g. local/phantom)")
+		onlyDataset  = flag.String("dataset", "", "run only this dataset (e.g. diamond, local/phantom)")
+	)
+
+	flag.Parse()
+
+	conn := *connStr
+	if conn == "" {
+		conn = os.Getenv("PG_CONNECTION_STRING")
+	}
+	if conn == "" {
+		fatal("no connection string: set -connection flag or PG_CONNECTION_STRING env var")
+	}
+
+	ctx := context.Background()
+
+	cfg := dawgs.Config{
+		GraphQueryMemoryLimit: size.Gibibyte,
+		ConnectionString:      conn,
+	}
+
+	if *driver == pg.DriverName {
+		pool, err := pg.NewPool(conn)
+		if err != nil {
+			fatal("failed to create pool: %v", err)
+		}
+		cfg.Pool = pool
+	}
+
+	db, err := dawgs.Open(ctx, *driver, cfg)
+	if err != nil {
+		fatal("failed to open database: %v", err)
+	}
+	defer db.Close(ctx)
+
+	// Build dataset list
+	var datasets []string
+	if *onlyDataset != "" {
+		datasets = []string{*onlyDataset}
+	} else {
+		datasets = smallDatasets
+		if *localDataset != "" {
+			datasets = append(datasets, *localDataset)
+		}
+	}
+
+	// Scan all datasets for kinds and assert schema
+	nodeKinds, edgeKinds := scanKinds(*datasetDir, datasets)
+
+	schema := graph.Schema{
+		Graphs: []graph.Graph{{
+			Name:  "integration_test",
+			Nodes: nodeKinds,
+			Edges: edgeKinds,
+		}},
+		DefaultGraph: graph.Graph{Name: "integration_test"},
+	}
+
+	if err := db.AssertSchema(ctx, schema); err != nil {
+		fatal("failed to assert schema: %v", err)
+	}
+
+	report := Report{
+		Driver:     *driver,
+		GitRef:     gitRef(),
+		Date:       time.Now().Format("2006-01-02"),
+		Iterations: *iterations,
+	}
+
+	for _, ds := range datasets {
+		fmt.Fprintf(os.Stderr, "benchmarking %s...\n", ds)
+
+		// Clear graph
+		if err := db.WriteTransaction(ctx, func(tx graph.Transaction) error {
+			return tx.Nodes().Delete()
+		}); err != nil {
+			fmt.Fprintf(os.Stderr, "  clear failed: %v\n", err)
+			continue
+		}
+
+		// Load dataset
+		path := *datasetDir + "/" + ds + ".json"
+		idMap, err := loadDataset(ctx, db, path)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "  load failed: %v\n", err)
+			continue
+		}
+
+		fmt.Fprintf(os.Stderr, "  loaded %d nodes\n", len(idMap))
+
+		// Run scenarios
+		for _, s := range scenariosForDataset(ds, idMap) {
+			result, err := runScenario(ctx, db, s, *iterations)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "  %s/%s failed: %v\n", s.Section, s.Label, err)
+				continue
+			}
+
+			report.Results = append(report.Results, result)
+			fmt.Fprintf(os.Stderr, "  %s/%s: median=%s p95=%s max=%s\n",
+				s.Section, s.Label,
+				fmtDuration(result.Stats.Median),
+				fmtDuration(result.Stats.P95),
+				fmtDuration(result.Stats.Max),
+			)
+		}
+	}
+
+	// Write markdown
+	var mdOut *os.File
+	if *output != "" {
+		var err error
+		mdOut, err = os.Create(*output)
+		if err != nil {
+			fatal("failed to create output: %v", err)
+		}
+		defer mdOut.Close()
+	} else {
+		mdOut = os.Stdout
+	}
+
+	if err := writeMarkdown(mdOut, report); err != nil {
+		fatal("failed to write markdown: %v", err)
+	}
+
+	if *output != "" {
+		fmt.Fprintf(os.Stderr, "wrote %s\n", *output)
+	}
+}
+
+func scanKinds(datasetDir string, datasets []string) (graph.Kinds, graph.Kinds) {
+	var nodeKinds, edgeKinds graph.Kinds
+
+	for _, ds := range datasets {
+		path := datasetDir + "/" + ds + ".json"
+		f, err := os.Open(path)
+		if err != nil {
+			continue
+		}
+
+		doc, err := opengraph.ParseDocument(f)
+		f.Close()
+		if err != nil {
+			continue
+		}
+
+		nk, ek := doc.Graph.Kinds()
+		nodeKinds = nodeKinds.Add(nk...)
+		edgeKinds = edgeKinds.Add(ek...)
+	}
+
+	return nodeKinds, edgeKinds
+}
+
+func loadDataset(ctx context.Context, db graph.Database, path string) (opengraph.IDMap, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	return opengraph.Load(ctx, db, f)
+}
+
+func gitRef() string {
+	out, err := exec.Command("git", "rev-parse", "--short", "HEAD").Output()
+	if err != nil {
+		return "unknown"
+	}
+	return strings.TrimSpace(string(out))
+}
+
+func fatal(format string, args ...any) {
+	fmt.Fprintf(os.Stderr, format+"\n", args...)
+	os.Exit(1)
+}
diff --git a/cmd/benchmark/report.go b/cmd/benchmark/report.go
@@ -0,0 +1,67 @@
+// Copyright 2026 Specter Ops, Inc.
+//
+// Licensed under the Apache License, Version 2.0
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//	http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package main
+
+import (
+	"fmt"
+	"io"
+	"time"
+)
+
+// Report holds all benchmark results and metadata.
+type Report struct {
+	Driver     string
+	GitRef     string
+	Date       string
+	Iterations int
+	Results    []Result
+}
+
+func writeMarkdown(w io.Writer, r Report) error {
+	fmt.Fprintf(w, "# Benchmarks — %s @ %s (%s, %d iterations)\n\n", r.Driver, r.GitRef, r.Date, r.Iterations)
+	fmt.Fprintf(w, "| Query | Dataset | Median | P95 | Max |\n")
+	fmt.Fprintf(w, "|-------|---------|-------:|----:|----:|\n")
+
+	for _, res := range r.Results {
+		label := res.Section
+		if res.Label != res.Dataset {
+			label = res.Section + " / " + res.Label
+		}
+
+		fmt.Fprintf(w, "| %s | %s | %s | %s | %s |\n",
+			label,
+			res.Dataset,
+			fmtDuration(res.Stats.Median),
+			fmtDuration(res.Stats.P95),
+			fmtDuration(res.Stats.Max),
+		)
+	}
+
+	fmt.Fprintln(w)
+	return nil
+}
+
+func fmtDuration(d time.Duration) string {
+	ms := float64(d.Microseconds()) / 1000.0
+	if ms < 1 {
+		return fmt.Sprintf("%.2fms", ms)
+	}
+	if ms < 100 {
+		return fmt.Sprintf("%.1fms", ms)
+	}
+	return fmt.Sprintf("%.0fms", ms)
+}