From 605667649d45fe0d06577124fdedd69a880e66bc Mon Sep 17 00:00:00 2001
From: Jason Hall
Date: Mon, 2 Mar 2026 15:00:32 -0500
Subject: [PATCH] Add packfile explorer to git.dag.dev
Fetches full clone packfiles, caches in GCS/dir, and presents
an interactive parsed view matching git verify-pack -v output.
Object detail pages show decompressed content with delta
instruction visualization.
Co-Authored-By: Claude Opus 4.6
---
internal/forks/rsc.io/gitfs/fs.go | 66 +--
internal/forks/rsc.io/gitfs/git.go | 99 ++++-
internal/forks/rsc.io/gitfs/git_test.go | 4 +-
internal/forks/rsc.io/gitfs/pack.go | 24 +-
internal/git/cache.go | 315 ++++++++++++++
internal/git/git.go | 531 +++++++++++++++++++++++-
internal/git/packfile.go | 442 ++++++++++++++++++++
internal/git/templates.go | 13 +
8 files changed, 1441 insertions(+), 53 deletions(-)
create mode 100644 internal/git/cache.go
create mode 100644 internal/git/packfile.go
diff --git a/internal/forks/rsc.io/gitfs/fs.go b/internal/forks/rsc.io/gitfs/fs.go
index 46eabd31..0a5ea398 100644
--- a/internal/forks/rsc.io/gitfs/fs.go
+++ b/internal/forks/rsc.io/gitfs/fs.go
@@ -39,21 +39,21 @@ func ParseHash(text string) (Hash, error) {
type ObjType int
const (
- objNone ObjType = 0
- objCommit ObjType = 1
- objTree ObjType = 2
- objBlob ObjType = 3
- objTag ObjType = 4
+ ObjNone ObjType = 0
+ ObjCommit ObjType = 1
+ ObjTree ObjType = 2
+ ObjBlob ObjType = 3
+ ObjTag ObjType = 4
// 5 undefined
- objOfsDelta ObjType = 6
- objRefDelta ObjType = 7
+ ObjOfsDelta ObjType = 6
+ ObjRefDelta ObjType = 7
)
var objTypes = [...]string{
- objCommit: "commit",
- objTree: "tree",
- objBlob: "blob",
- objTag: "tag",
+ ObjCommit: "commit",
+ ObjTree: "tree",
+ ObjBlob: "blob",
+ ObjTag: "tag",
}
func (t ObjType) String() string {
@@ -70,10 +70,10 @@ type DirEntry struct {
Hash Hash
}
-// parseDirEntry parses the next directory entry from data,
+// ParseDirEntry parses the next directory entry from data,
// returning the entry and the number of bytes it occupied.
-// If data is malformed, parseDirEntry returns dirEntry{}, 0.
-func parseDirEntry(data []byte) (DirEntry, int) {
+// If data is malformed, ParseDirEntry returns DirEntry{}, 0.
+func ParseDirEntry(data []byte) (DirEntry, int) {
// Unclear where or if this format is documented by Git.
// Each directory entry is an octal mode, then a space,
// then a file name, then a NUL byte, then a 20-byte binary hash.
@@ -111,7 +111,7 @@ func treeLookup(data []byte, name string) (mode int, h Hash, ok bool) {
// but the directory entry data is not self-synchronizing,
// so it's not possible to be clever and use a binary search here.
for len(data) > 0 {
- e, size := parseDirEntry(data)
+ e, size := ParseDirEntry(data)
if size == 0 {
break
}
@@ -153,8 +153,8 @@ func commitKeyValue(data []byte, key string) ([]byte, bool) {
return nil, false
}
-// A store is a collection of Git objects, indexed for lookup by hash.
-type store struct {
+// A Store is a collection of Git objects, indexed for lookup by hash.
+type Store struct {
repo *Repo
sha1 hashpkg.Hash // reused hash state
index map[Hash]stored // lookup index
@@ -164,13 +164,13 @@ type store struct {
// A stored describes a single stored object.
type stored struct {
typ ObjType // object type
- off int // object data is store.data[off:off+len]
+ off int // object data is Store.data[off:off+len]
len int
}
-// add adds an object with the given type and content to s, returning its Hash.
-// If the object is already stored in s, add succeeds but doesn't store a second copy.
-func (s *store) add(typ ObjType, data []byte) (Hash, []byte) {
+// Add adds an object with the given type and content to s, returning its Hash.
+// If the object is already stored in s, Add succeeds but doesn't store a second copy.
+func (s *Store) Add(typ ObjType, data []byte) (Hash, []byte) {
if s.sha1 == nil {
s.sha1 = sha1.New()
}
@@ -196,7 +196,7 @@ func (s *store) add(typ ObjType, data []byte) (Hash, []byte) {
// Object returns the type and data for the Object with hash h.
// If there is no Object with hash h, Object returns 0, nil.
-func (s *store) Object(h Hash) (typ ObjType, data []byte) {
+func (s *Store) Object(h Hash) (typ ObjType, data []byte) {
d, ok := s.index[h]
if !ok {
return 0, nil
@@ -205,16 +205,16 @@ func (s *store) Object(h Hash) (typ ObjType, data []byte) {
}
// Commit returns a treeFS for the file system tree associated with the given Commit hash.
-func (s *store) Commit(c Hash) (*treeFS, []byte, error) {
+func (s *Store) Commit(c Hash) (*treeFS, []byte, error) {
// The commit object data starts with key-value pairs
typ, data := s.Object(c)
- if typ == objNone {
+ if typ == ObjNone {
return nil, nil, fmt.Errorf("commit %s: no such hash", c)
}
// fmt.Fprintf(os.Stderr, "typ=%d\n", typ)
// fmt.Fprintf(os.Stderr, "%s", data)
// os.Stderr.Write([]byte("\n"))
- if typ != objCommit {
+ if typ != ObjCommit {
return nil, nil, fmt.Errorf("commit %s: unexpected type %s", c, typ)
}
treeHash, ok := commitKeyValue(data, "tree")
@@ -230,7 +230,7 @@ func (s *store) Commit(c Hash) (*treeFS, []byte, error) {
// A treeFS is an fs.FS serving a Git file system tree rooted at a given tree object hash.
type treeFS struct {
- s *store
+ s *Store
tree Hash // root tree
commit Hash
}
@@ -265,7 +265,7 @@ func (t *treeFS) Open(name string) (f fs.File, err error) {
if i == len(name) || name[i] == '/' {
// Look up name in current tree object h.
typ, data := t.s.Object(h)
- if typ != objTree {
+ if typ != ObjTree {
return nil, &fs.PathError{Path: name, Op: "open", Err: fs.ErrNotExist}
}
_, th, ok := treeLookup(data, name[start:i])
@@ -283,7 +283,7 @@ func (t *treeFS) Open(name string) (f fs.File, err error) {
// The hash h is the hash for name. Load its object.
typ, data := t.s.Object(h)
info := fileInfo{name, name[start:], 0, 0, nil}
- if typ == objBlob {
+ if typ == ObjBlob {
// Regular file.
info.mode = 0444
info.size = int64(len(data))
@@ -294,7 +294,7 @@ func (t *treeFS) Open(name string) (f fs.File, err error) {
}
return &blobFile{info, bytes.NewReader(data)}, nil
}
- if typ == objTree {
+ if typ == ObjTree {
// Directory.
info.mode = fs.ModeDir | 0555
info.sys = &DirEntry{
@@ -341,7 +341,7 @@ func (f *blobFile) Stat() (fs.FileInfo, error) { return &f.info, nil }
// A dirFile implements fs.File for a directory.
type dirFile struct {
- s *store
+ s *Store
info fileInfo
data []byte
off int
@@ -369,18 +369,18 @@ func (f *dirFile) ReadDir(n int) (list []fs.DirEntry, err error) {
}()
for (n <= 0 || len(list) < n) && f.off < len(f.data) {
- e, size := parseDirEntry(f.data[f.off:])
+ e, size := ParseDirEntry(f.data[f.off:])
if size == 0 {
break
}
f.off += size
typ, data := f.s.Object(e.Hash)
mode := fs.FileMode(0444)
- if typ == objTree {
+ if typ == ObjTree {
mode = fs.ModeDir | 0555
}
infoSize := int64(0)
- if typ == objBlob {
+ if typ == ObjBlob {
infoSize = int64(len(data))
}
name := string(e.Name)
diff --git a/internal/forks/rsc.io/gitfs/git.go b/internal/forks/rsc.io/gitfs/git.go
index 268948da..b380d7a1 100644
--- a/internal/forks/rsc.io/gitfs/git.go
+++ b/internal/forks/rsc.io/gitfs/git.go
@@ -196,6 +196,101 @@ func (r *Repo) CloneHash(ctx context.Context, h Hash) (fs.FS, []byte, error) {
return tfs, data, nil
}
+// FetchPack fetches a full (non-shallow) packfile from the remote server,
+// requesting all refs. It returns the raw packfile bytes.
+func (r *Repo) FetchPack(ctx context.Context) ([]byte, error) {
+ opts, ok := r.caps["fetch"]
+ if !ok {
+ return nil, fmt.Errorf("fetch: server does not support fetch")
+ }
+ _ = opts
+
+ refs, err := r.Refs(ctx)
+ if err != nil {
+ return nil, fmt.Errorf("fetchpack: refs: %v", err)
+ }
+
+ // Deduplicate hashes.
+ seen := map[Hash]bool{}
+ var wants []Hash
+ for _, ref := range refs {
+ if !seen[ref.Hash] {
+ seen[ref.Hash] = true
+ wants = append(wants, ref.Hash)
+ }
+ }
+ if len(wants) == 0 {
+ return nil, fmt.Errorf("fetchpack: no refs found")
+ }
+
+ var buf bytes.Buffer
+ pw := newPktLineWriter(&buf)
+ pw.WriteString("command=fetch")
+ pw.Delim()
+ for _, h := range wants {
+ pw.WriteString("want " + h.String())
+ }
+ pw.WriteString("done")
+ pw.Close()
+
+ req, _ := http.NewRequestWithContext(ctx, "POST", r.url+"/git-upload-pack", &buf)
+ req.Header.Set("Content-Type", "application/x-git-upload-pack-request")
+ req.Header.Set("Accept", "application/x-git-upload-pack-result")
+ req.Header.Set("Git-Protocol", "version=2")
+
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("fetchpack: %v", err)
+ }
+ defer resp.Body.Close()
+ if resp.StatusCode != 200 {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("fetchpack: %v\n%s", resp.Status, body)
+ }
+ if ct := resp.Header.Get("Content-Type"); ct != "application/x-git-upload-pack-result" {
+ return nil, fmt.Errorf("fetchpack: invalid response Content-Type: %v", ct)
+ }
+
+ var data []byte
+ pr := newPktLineReader(resp.Body)
+ sawPackfile := false
+ for {
+ line, err := pr.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ return nil, fmt.Errorf("fetchpack: parsing response: %v", err)
+ }
+ if line == nil {
+ continue
+ }
+ if !sawPackfile {
+ if strings.TrimSuffix(string(line), "\n") == "packfile" {
+ sawPackfile = true
+ }
+ continue
+ }
+ if len(line) == 0 || line[0] == 0 || line[0] > 3 {
+ continue
+ }
+ switch line[0] {
+ case 1:
+ data = append(data, line[1:]...)
+ case 2:
+ // progress
+ case 3:
+ return nil, fmt.Errorf("fetchpack: server error: %s", line[1:])
+ }
+ }
+
+ if !bytes.HasPrefix(data, []byte("PACK")) {
+ return nil, fmt.Errorf("fetchpack: malformed response: not packfile")
+ }
+
+ return data, nil
+}
+
// fetch returns the fs.FS for a given hash.
func (r *Repo) fetch(ctx context.Context, h Hash) (fs.FS, []byte, error) {
// Fetch a shallow packfile from the remote server.
@@ -285,8 +380,8 @@ func (r *Repo) fetch(ctx context.Context, h Hash) (fs.FS, []byte, error) {
}
// Unpack pack file and return fs.FS for the commit we downloaded.
- var s store
- if err := unpack(&s, data); err != nil {
+ var s Store
+ if err := Unpack(&s, data); err != nil {
return nil, nil, fmt.Errorf("fetch: %v", err)
}
s.repo = r
diff --git a/internal/forks/rsc.io/gitfs/git_test.go b/internal/forks/rsc.io/gitfs/git_test.go
index bfabcf6c..e2a66a33 100644
--- a/internal/forks/rsc.io/gitfs/git_test.go
+++ b/internal/forks/rsc.io/gitfs/git_test.go
@@ -54,8 +54,8 @@ func TestPack(t *testing.T) {
if err != nil {
t.Fatal(err)
}
- var s store
- err = unpack(&s, data)
+ var s Store
+ err = Unpack(&s, data)
if err != nil {
t.Fatal(err)
}
diff --git a/internal/forks/rsc.io/gitfs/pack.go b/internal/forks/rsc.io/gitfs/pack.go
index f7e79c5e..e4b5d000 100644
--- a/internal/forks/rsc.io/gitfs/pack.go
+++ b/internal/forks/rsc.io/gitfs/pack.go
@@ -13,11 +13,11 @@ import (
"io"
)
-// unpack parses data, which is a Git pack-formatted archive,
-// writing every object it contains to the store s.
+// Unpack parses data, which is a Git pack-formatted archive,
+// writing every object it contains to the Store s.
//
// See https://git-scm.com/docs/pack-format for format documentation.
-func unpack(s *store, data []byte) error {
+func Unpack(s *Store, data []byte) error {
// If the store is empty, pre-allocate the length of data.
// This should be about the right order of magnitude for the eventual data,
// avoiding many growing steps during append.
@@ -50,7 +50,7 @@ func unpack(s *store, data []byte) error {
objs := data[12 : len(data)-20]
off := 0
for i := 0; i < int(nobj); i++ {
- _, _, _, encSize, err := unpackObject(s, objs, off)
+ _, _, _, encSize, err := UnpackObject(s, objs, off)
if err != nil {
return fmt.Errorf("unpack: malformed git pack: %v", err)
}
@@ -62,10 +62,10 @@ func unpack(s *store, data []byte) error {
return nil
}
-// unpackObject unpacks the object at objs[off:] and writes it to the store s.
+// UnpackObject unpacks the object at objs[off:] and writes it to the Store s.
// It returns the type, hash, and content of the object, as well as the encoded size,
// meaning the number of bytes at the start of objs[off:] that this record occupies.
-func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content []byte, encSize int, err error) {
+func UnpackObject(s *Store, objs []byte, off int) (typ ObjType, h Hash, content []byte, encSize int, err error) {
fail := func(err error) (ObjType, Hash, []byte, int, error) {
return 0, Hash{}, nil, 0, err
}
@@ -92,7 +92,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content
var deltaTyp ObjType
var deltaBase []byte
switch typ {
- case objRefDelta:
+ case ObjRefDelta:
if len(objs)-(off+size) < 20 {
return fail(fmt.Errorf("invalid object: bad delta ref"))
}
@@ -105,7 +105,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content
return fail(fmt.Errorf("invalid object: unknown delta ref %v", h))
}
- case objOfsDelta:
+ case ObjOfsDelta:
i := off + size
if len(objs)-i < 20 {
return fail(fmt.Errorf("invalid object: too short"))
@@ -130,7 +130,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content
return fail(fmt.Errorf("invalid object: bad delta offset"))
}
var err error
- deltaTyp, _, deltaBase, _, err = unpackObject(s, objs, off-int(d))
+ deltaTyp, _, deltaBase, _, err = UnpackObject(s, objs, off-int(d))
if err != nil {
return fail(fmt.Errorf("invalid object: bad delta offset"))
}
@@ -156,9 +156,9 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content
switch typ {
default:
return fail(fmt.Errorf("invalid object: unknown object type"))
- case objCommit, objTree, objBlob, objTag:
+ case ObjCommit, ObjTree, ObjBlob, ObjTag:
// ok
- case objRefDelta, objOfsDelta:
+ case ObjRefDelta, ObjOfsDelta:
// Actual object type is the type of the base object.
typ = deltaTyp
@@ -179,7 +179,7 @@ func unpackObject(s *store, objs []byte, off int) (typ ObjType, h Hash, content
data = targ
}
- h, data = s.add(typ, data)
+ h, data = s.Add(typ, data)
return typ, h, data, encSize, nil
}
diff --git a/internal/git/cache.go b/internal/git/cache.go
new file mode 100644
index 00000000..19fc008c
--- /dev/null
+++ b/internal/git/cache.go
@@ -0,0 +1,315 @@
+package git
+
+import (
+ "compress/gzip"
+ "context"
+ "crypto/sha256"
+ "encoding/json"
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+ "time"
+
+ "cloud.google.com/go/storage"
+)
+
+type packCache interface {
+ GetIndex(ctx context.Context, key string) (*PackIndex, error)
+ PutIndex(ctx context.Context, key string, idx *PackIndex) error
+ GetPack(ctx context.Context, key string) ([]byte, error)
+ PutPack(ctx context.Context, key string, data []byte) error
+ RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error)
+}
+
+func cacheKey(repoURL string) string {
+ h := sha256.Sum256([]byte(repoURL))
+ return fmt.Sprintf("%x", h[:16])
+}
+
+// gcsPackCache stores packs and indexes in GCS.
+type gcsPackCache struct {
+ bucket *storage.BucketHandle
+}
+
+func (g *gcsPackCache) packPath(key string) string {
+ return fmt.Sprintf("pack/%s/pack.bin", key)
+}
+
+func (g *gcsPackCache) indexPath(key string) string {
+ return fmt.Sprintf("pack/%s/index.json.gz", key)
+}
+
+func (g *gcsPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) {
+ rc, err := g.bucket.Object(g.indexPath(key)).NewReader(ctx)
+ if err != nil {
+ return nil, err
+ }
+ defer rc.Close()
+ zr, err := gzip.NewReader(rc)
+ if err != nil {
+ return nil, err
+ }
+ defer zr.Close()
+ idx := &PackIndex{}
+ if err := json.NewDecoder(zr).Decode(idx); err != nil {
+ return nil, err
+ }
+ return idx, nil
+}
+
+func (g *gcsPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error {
+ w := g.bucket.Object(g.indexPath(key)).NewWriter(ctx)
+ zw, err := gzip.NewWriterLevel(w, gzip.BestSpeed)
+ if err != nil {
+ return err
+ }
+ if err := json.NewEncoder(zw).Encode(idx); err != nil {
+ zw.Close()
+ w.Close()
+ return err
+ }
+ if err := zw.Close(); err != nil {
+ w.Close()
+ return err
+ }
+ return w.Close()
+}
+
+func (g *gcsPackCache) GetPack(ctx context.Context, key string) ([]byte, error) {
+ rc, err := g.bucket.Object(g.packPath(key)).NewReader(ctx)
+ if err != nil {
+ return nil, err
+ }
+ defer rc.Close()
+ return io.ReadAll(rc)
+}
+
+func (g *gcsPackCache) PutPack(ctx context.Context, key string, data []byte) error {
+ w := g.bucket.Object(g.packPath(key)).NewWriter(ctx)
+ if _, err := w.Write(data); err != nil {
+ w.Close()
+ return err
+ }
+ return w.Close()
+}
+
+func (g *gcsPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) {
+ return g.bucket.Object(g.packPath(key)).NewRangeReader(ctx, offset, length)
+}
+
+// dirPackCache stores packs and indexes in a local directory.
+type dirPackCache struct {
+ dir string
+}
+
+func (d *dirPackCache) path(key, name string) string {
+ return filepath.Join(d.dir, "pack", key, name)
+}
+
+func (d *dirPackCache) ensureDir(key string) error {
+ return os.MkdirAll(filepath.Join(d.dir, "pack", key), 0755)
+}
+
+func (d *dirPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) {
+ f, err := os.Open(d.path(key, "index.json.gz"))
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+ zr, err := gzip.NewReader(f)
+ if err != nil {
+ return nil, err
+ }
+ defer zr.Close()
+ idx := &PackIndex{}
+ if err := json.NewDecoder(zr).Decode(idx); err != nil {
+ return nil, err
+ }
+ return idx, nil
+}
+
+func (d *dirPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error {
+ if err := d.ensureDir(key); err != nil {
+ return err
+ }
+ f, err := os.Create(d.path(key, "index.json.gz"))
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ zw, err := gzip.NewWriterLevel(f, gzip.BestSpeed)
+ if err != nil {
+ return err
+ }
+ if err := json.NewEncoder(zw).Encode(idx); err != nil {
+ zw.Close()
+ return err
+ }
+ return zw.Close()
+}
+
+func (d *dirPackCache) GetPack(ctx context.Context, key string) ([]byte, error) {
+ return os.ReadFile(d.path(key, "pack.bin"))
+}
+
+func (d *dirPackCache) PutPack(ctx context.Context, key string, data []byte) error {
+ if err := d.ensureDir(key); err != nil {
+ return err
+ }
+ return os.WriteFile(d.path(key, "pack.bin"), data, 0644)
+}
+
+func (d *dirPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) {
+ f, err := os.Open(d.path(key, "pack.bin"))
+ if err != nil {
+ return nil, err
+ }
+ return io.NopCloser(io.NewSectionReader(f, offset, length)), nil
+}
+
+// memPackIndex is an in-memory LRU of parsed PackIndex structs.
+type memPackIndex struct {
+ mu sync.Mutex
+ cap int
+ entries map[string]*memIndexEntry
+}
+
+type memIndexEntry struct {
+ idx *PackIndex
+ access time.Time
+}
+
+func (m *memPackIndex) Get(key string) *PackIndex {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ if e, ok := m.entries[key]; ok {
+ e.access = time.Now()
+ return e.idx
+ }
+ return nil
+}
+
+func (m *memPackIndex) Put(key string, idx *PackIndex) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ if m.entries == nil {
+ m.entries = make(map[string]*memIndexEntry)
+ }
+ if len(m.entries) >= m.cap {
+ var oldest string
+ var oldestTime time.Time
+ for k, e := range m.entries {
+ if oldest == "" || e.access.Before(oldestTime) {
+ oldest = k
+ oldestTime = e.access
+ }
+ }
+ delete(m.entries, oldest)
+ }
+ m.entries[key] = &memIndexEntry{idx: idx, access: time.Now()}
+}
+
+func buildPackCache() packCache {
+ if cd := os.Getenv("CACHE_DIR"); cd != "" {
+ log.Printf("pack cache: dir=%s", cd)
+ return &dirPackCache{dir: cd}
+ }
+ if cb := os.Getenv("CACHE_BUCKET"); cb != "" {
+ log.Printf("pack cache: bucket=%s", cb)
+ client, err := storage.NewClient(context.Background())
+ if err != nil {
+ log.Printf("pack cache: gcs error: %v", err)
+ return &noopPackCache{}
+ }
+ bkt := client.Bucket(strings.TrimPrefix(cb, "gs://"))
+ return &gcsPackCache{bucket: bkt}
+ }
+ return &noopPackCache{}
+}
+
+// noopPackCache is used when no cache backend is configured.
+type noopPackCache struct{}
+
+func (n *noopPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) {
+ return nil, fmt.Errorf("no cache")
+}
+func (n *noopPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error {
+ return nil
+}
+func (n *noopPackCache) GetPack(ctx context.Context, key string) ([]byte, error) {
+ return nil, fmt.Errorf("no cache")
+}
+func (n *noopPackCache) PutPack(ctx context.Context, key string, data []byte) error {
+ return nil
+}
+func (n *noopPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) {
+ return nil, fmt.Errorf("no cache")
+}
+
+// tieredPackCache wraps a persistent cache with an in-memory LRU for indexes.
+type tieredPackCache struct {
+ mem *memPackIndex
+ back packCache
+}
+
+func (t *tieredPackCache) GetIndex(ctx context.Context, key string) (*PackIndex, error) {
+ if idx := t.mem.Get(key); idx != nil {
+ return idx, nil
+ }
+ idx, err := t.back.GetIndex(ctx, key)
+ if err != nil {
+ return nil, err
+ }
+ t.mem.Put(key, idx)
+ return idx, nil
+}
+
+func (t *tieredPackCache) PutIndex(ctx context.Context, key string, idx *PackIndex) error {
+ t.mem.Put(key, idx)
+ return t.back.PutIndex(ctx, key, idx)
+}
+
+func (t *tieredPackCache) GetPack(ctx context.Context, key string) ([]byte, error) {
+ return t.back.GetPack(ctx, key)
+}
+
+func (t *tieredPackCache) PutPack(ctx context.Context, key string, data []byte) error {
+ return t.back.PutPack(ctx, key, data)
+}
+
+func (t *tieredPackCache) RangeReader(ctx context.Context, key string, offset, length int64) (io.ReadCloser, error) {
+ return t.back.RangeReader(ctx, key, offset, length)
+}
+
+func newPackCache() packCache {
+ return &tieredPackCache{
+ mem: &memPackIndex{cap: 50},
+ back: buildPackCache(),
+ }
+}
+
+// memPackData holds raw packfile bytes in memory for object detail views.
+type memPackData struct {
+ mu sync.Mutex
+ data map[string][]byte
+}
+
+func (m *memPackData) Get(key string) []byte {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ return m.data[key]
+}
+
+func (m *memPackData) Put(key string, data []byte) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ if m.data == nil {
+ m.data = make(map[string][]byte)
+ }
+ m.data[key] = data
+}
+
diff --git a/internal/git/git.go b/internal/git/git.go
index 4fde8b83..ddca43ed 100644
--- a/internal/git/git.go
+++ b/internal/git/git.go
@@ -3,6 +3,7 @@ package git
import (
"bufio"
"bytes"
+ "context"
"fmt"
"html"
"io"
@@ -36,6 +37,9 @@ type handler struct {
repos map[string]*gitfs.Repo
commits map[string][]byte
fsyss map[string]fs.FS
+
+ packCache packCache
+ packData memPackData
}
type Option func(h *handler)
@@ -48,10 +52,11 @@ func WithUserAgent(ua string) Option {
func New(args []string, opts ...Option) http.Handler {
h := handler{
- args: args,
- repos: map[string]*gitfs.Repo{},
- fsyss: map[string]fs.FS{},
- commits: map[string][]byte{},
+ args: args,
+ repos: map[string]*gitfs.Repo{},
+ fsyss: map[string]fs.FS{},
+ commits: map[string][]byte{},
+ packCache: newPackCache(),
}
for _, opt := range opts {
@@ -63,6 +68,7 @@ func New(args []string, opts ...Option) http.Handler {
mux.HandleFunc("/", h.errHandler(h.renderResponse))
mux.HandleFunc("/http/", h.errHandler(h.renderFS))
mux.HandleFunc("/https/", h.errHandler(h.renderFS))
+ mux.HandleFunc("/pack/", h.errHandler(h.renderPackObject))
h.mux = gzhttp.GzipHandler(mux)
@@ -108,6 +114,14 @@ func (h *handler) errHandler(hfe HandleFuncE) http.HandlerFunc {
func (h *handler) renderResponse(w http.ResponseWriter, r *http.Request) error {
qs := r.URL.Query()
+ if q := qs.Get("pack"); q != "" {
+ u, err := url.PathUnescape(q)
+ if err != nil {
+ return err
+ }
+ return h.renderPackOverview(w, r, u)
+ }
+
if q := qs.Get("url"); q != "" {
u, err := url.PathUnescape(q)
if err != nil {
@@ -606,3 +620,512 @@ func (d *dumbEscaper) Write(p []byte) (n int, err error) {
}
return len(p), d.buf.Flush()
}
+
+func (h *handler) getOrFetchPack(ctx context.Context, repoURL string) (*PackIndex, string, error) {
+ key := cacheKey(repoURL)
+
+ // Try cache first.
+ idx, err := h.packCache.GetIndex(ctx, key)
+ if err == nil {
+ return idx, key, nil
+ }
+
+ // Cache miss: fetch the packfile.
+ if !strings.Contains(repoURL, "://") {
+ repoURL = "https://" + repoURL
+ }
+
+ repo, err := gitfs.NewRepo(ctx, repoURL)
+ if err != nil {
+ return nil, "", fmt.Errorf("NewRepo: %w", err)
+ }
+
+ data, err := repo.FetchPack(ctx)
+ if err != nil {
+ return nil, "", fmt.Errorf("FetchPack: %w", err)
+ }
+
+ idx, err = BuildPackIndex(data)
+ if err != nil {
+ return nil, "", fmt.Errorf("BuildPackIndex: %w", err)
+ }
+
+ // Store in cache (best effort).
+ if putErr := h.packCache.PutPack(ctx, key, data); putErr != nil {
+ log.Printf("pack cache put pack: %v", putErr)
+ }
+ if putErr := h.packCache.PutIndex(ctx, key, idx); putErr != nil {
+ log.Printf("pack cache put index: %v", putErr)
+ }
+
+ // Keep packfile data in memory for object detail views.
+ h.packData.Put(key, data)
+
+ return idx, key, nil
+}
+
+func (h *handler) renderPackOverview(w http.ResponseWriter, r *http.Request, repoURL string) error {
+ ctx := r.Context()
+
+ idx, key, err := h.getOrFetchPack(ctx, repoURL)
+ if err != nil {
+ return err
+ }
+
+ repo := strings.TrimPrefix(repoURL, "https://")
+ repo = strings.TrimPrefix(repo, "http://")
+
+ if err := headerTmpl.Execute(w, TitleData{"Pack: " + repo}); err != nil {
+ return err
+ }
+ hd := HeaderData{
+ Repo: repo,
+ RepoLink: repoURL,
+ JQ: fmt.Sprintf("git verify-pack -v .git/objects/pack/pack-%s.idx", idx.Checksum),
+ }
+ if err := bodyTmpl.Execute(w, hd); err != nil {
+ return err
+ }
+
+ // Compute stats.
+ nonDelta := 0
+ chainLengths := map[int]int{}
+ for _, obj := range idx.Objects {
+ if obj.Depth == 0 {
+ nonDelta++
+ } else {
+ chainLengths[obj.Depth]++
+ }
+ }
+
+ // Filter by type if requested.
+ filterType := r.URL.Query().Get("type")
+
+ fmt.Fprintf(w, "\n")
+ for _, obj := range idx.Objects {
+ if filterType != "" {
+ if strings.HasPrefix(filterType, "depth-") {
+ var d int
+ fmt.Sscanf(filterType, "depth-%d", &d)
+ if obj.Depth != d {
+ continue
+ }
+ } else if obj.ResolvedType != filterType && obj.Type != filterType {
+ continue
+ }
+ }
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), obj.Hash, key)
+ hashLink := fmt.Sprintf("%s", href, obj.Hash)
+
+ if obj.Depth > 0 {
+ baseHref := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), obj.BaseHash, key)
+ baseLink := fmt.Sprintf("%s", baseHref, obj.BaseHash)
+ fmt.Fprintf(w, "%s %-6s %d %d %d %d %s\n", hashLink, obj.ResolvedType, obj.Size, obj.EncodedSize, obj.Offset, obj.Depth, baseLink)
+ } else {
+ fmt.Fprintf(w, "%s %-6s %d %d %d\n", hashLink, obj.ResolvedType, obj.Size, obj.EncodedSize, obj.Offset)
+ }
+ }
+
+ // Summary.
+ fmt.Fprintf(w, "non delta: %d objects\n", nonDelta)
+ maxDepth := 0
+ for d := range chainLengths {
+ if d > maxDepth {
+ maxDepth = d
+ }
+ }
+ for d := 1; d <= maxDepth; d++ {
+ if c, ok := chainLengths[d]; ok {
+ href := fmt.Sprintf("/?pack=%s&type=depth-%d", url.QueryEscape(repoURL), d)
+ fmt.Fprintf(w, "chain length = %d: %d objects\n", d, href, c)
+ }
+ }
+
+ fmt.Fprintf(w, "\n")
+ fmt.Fprintf(w, footer)
+ return nil
+}
+
+func (h *handler) renderPackObject(w http.ResponseWriter, r *http.Request) error {
+ ctx := r.Context()
+
+ // Path: /pack//
+ p := strings.TrimPrefix(r.URL.Path, "/pack/")
+ lastSlash := strings.LastIndex(p, "/")
+ if lastSlash < 0 {
+ return fmt.Errorf("invalid pack object path: %s", r.URL.Path)
+ }
+ repo := p[:lastSlash]
+ hash := p[lastSlash+1:]
+ key := r.URL.Query().Get("key")
+ if key == "" {
+ key = cacheKey(repo)
+ }
+
+ // Get the index to find the object.
+ idx, err := h.packCache.GetIndex(ctx, key)
+ if err != nil {
+ // Try fetching.
+ repoURL := repo
+ if !strings.Contains(repoURL, "://") {
+ repoURL = "https://" + repoURL
+ }
+ idx, key, err = h.getOrFetchPack(ctx, repoURL)
+ if err != nil {
+ return fmt.Errorf("get pack index: %w", err)
+ }
+ }
+
+ // Find the object by hash.
+ var obj *PackObject
+ for i := range idx.Objects {
+ if idx.Objects[i].Hash == hash {
+ obj = &idx.Objects[i]
+ break
+ }
+ }
+ if obj == nil {
+ return fmt.Errorf("object %s not found in pack", hash)
+ }
+
+ // Get the packfile data to decompress the object.
+ data := h.packData.Get(key)
+ if data == nil {
+ data, err = h.packCache.GetPack(ctx, key)
+ if err != nil {
+ return fmt.Errorf("get pack data: %w", err)
+ }
+ h.packData.Put(key, data)
+ }
+
+ objType, content, err := DecompressObject(data, obj.Hash)
+ if err != nil {
+ return fmt.Errorf("decompress: %w", err)
+ }
+
+ if err := headerTmpl.Execute(w, TitleData{hash[:12] + " - Pack Object"}); err != nil {
+ return err
+ }
+ hd := HeaderData{
+ Repo: repo,
+ RepoLink: fmt.Sprintf("/?pack=%s", url.QueryEscape(repo)),
+ JQ: fmt.Sprintf("git cat-file -p %s", hash),
+ }
+ if err := bodyTmpl.Execute(w, hd); err != nil {
+ return err
+ }
+
+ fmt.Fprintf(w, "\n")
+ if obj.Type != objType {
+ fmt.Fprintf(w, "type: %s (resolves to %s)\n", obj.Type, objType)
+ } else {
+ fmt.Fprintf(w, "type: %s\n", objType)
+ }
+ fmt.Fprintf(w, "size: %s (%d bytes)\n", formatBytes(int64(obj.Size)), obj.Size)
+ fmt.Fprintf(w, "offset: %d\n", obj.Offset)
+ fmt.Fprintf(w, "encoded: %d bytes\n", obj.EncodedSize)
+ if obj.BaseHash != "" {
+ baseHref := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), obj.BaseHash, key)
+ fmt.Fprintf(w, "base: %s (%s", baseHref, obj.BaseHash, obj.Type)
+ if obj.Type == "ofs-delta" {
+ fmt.Fprintf(w, ", offset %d", obj.BaseOffset)
+ }
+ fmt.Fprintf(w, ")\n")
+ fmt.Fprintf(w, "depth: %d\n", obj.Depth)
+ }
+ fmt.Fprintf(w, "\n")
+
+ // For delta objects, show the raw delta instructions.
+ if obj.Type == "ref-delta" || obj.Type == "ofs-delta" {
+ rawDelta, err := RawDelta(data, obj.Offset)
+ if err == nil {
+ deltaInfo, err := ParseDelta(rawDelta)
+ if err == nil {
+ h.renderDeltaOps(w, deltaInfo, repo, key, idx)
+ fmt.Fprintf(w, "\n")
+ }
+ }
+ fmt.Fprintf(w, "--- resolved content (%s) ---\n\n", objType)
+ }
+
+ // Render resolved content based on type.
+ switch objType {
+ case "commit":
+ h.renderPackCommit(w, content, repo, key)
+ case "tree":
+ h.renderPackTree(w, content, repo, key, idx)
+ case "blob":
+ size := min(int64(len(content)), tooBig)
+ esc := &dumbEscaper{buf: bufio.NewWriter(w)}
+ io.CopyN(esc, bytes.NewReader(content), size)
+ if int64(len(content)) > tooBig {
+ fmt.Fprintf(w, "\n... truncated (%s total)", formatBytes(int64(len(content))))
+ }
+ case "tag":
+ h.renderPackTag(w, content, repo, key)
+ default:
+ fmt.Fprintf(w, "(raw %d bytes)\n", len(content))
+ }
+
+ fmt.Fprintf(w, "\n")
+ fmt.Fprintf(w, footer)
+ return nil
+}
+
+func (h *handler) renderPackCommit(w io.Writer, content []byte, repo, key string) {
+ scanner := bufio.NewScanner(bytes.NewReader(content))
+ for scanner.Scan() {
+ line := scanner.Text()
+ hdr, val, ok := strings.Cut(line, " ")
+ if !ok {
+ fmt.Fprintf(w, "%s\n", htmlEscape(line))
+ continue
+ }
+ switch hdr {
+ case "tree", "parent":
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), val, key)
+ fmt.Fprintf(w, "%s %s\n", hdr, href, val)
+ default:
+ fmt.Fprintf(w, "%s\n", htmlEscape(line))
+ }
+ }
+}
+
+func (h *handler) renderPackTree(w io.Writer, content []byte, repo, key string, idx *PackIndex) {
+ // Build hash lookup.
+ hashSet := map[string]bool{}
+ for _, obj := range idx.Objects {
+ hashSet[obj.Hash] = true
+ }
+
+ data := content
+ for len(data) > 0 {
+ e, size := gitfs.ParseDirEntry(data)
+ if size == 0 {
+ break
+ }
+ data = data[size:]
+
+ hashStr := e.Hash.String()
+ typeStr := "blob"
+ if e.Mode == 0o40000 {
+ typeStr = "tree"
+ } else if e.Mode == 0o160000 {
+ typeStr = "commit"
+ }
+
+ name := htmlEscape(string(e.Name))
+ if hashSet[hashStr] {
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), hashStr, key)
+ fmt.Fprintf(w, "%06o %s %s\t%s\n", e.Mode, typeStr, href, hashStr, name)
+ } else {
+ fmt.Fprintf(w, "%06o %s %s\t%s\n", e.Mode, typeStr, hashStr, name)
+ }
+ }
+}
+
+func (h *handler) renderPackTag(w io.Writer, content []byte, repo, key string) {
+ scanner := bufio.NewScanner(bytes.NewReader(content))
+ for scanner.Scan() {
+ line := scanner.Text()
+ hdr, val, ok := strings.Cut(line, " ")
+ if !ok {
+ fmt.Fprintf(w, "%s\n", htmlEscape(line))
+ continue
+ }
+ switch hdr {
+ case "object":
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), val, key)
+ fmt.Fprintf(w, "%s %s\n", hdr, href, val)
+ default:
+ fmt.Fprintf(w, "%s\n", htmlEscape(line))
+ }
+ }
+}
+
+func (h *handler) renderDeltaOps(w io.Writer, info *DeltaInfo, repo, key string, idx *PackIndex) {
+ // Build hash lookup for linking.
+ hashSet := map[string]bool{}
+ for _, obj := range idx.Objects {
+ hashSet[obj.Hash] = true
+ }
+
+ fmt.Fprintf(w, "--- delta instructions ---\n\n")
+ fmt.Fprintf(w, "base size: %s (%d bytes)\n", formatBytes(int64(info.BaseSize)), info.BaseSize)
+ fmt.Fprintf(w, "target size: %s (%d bytes)\n", formatBytes(int64(info.TargetSize)), info.TargetSize)
+ fmt.Fprintf(w, "operations: %d\n\n", len(info.Ops))
+
+ for i, op := range info.Ops {
+ switch op.Kind {
+ case "copy":
+ fmt.Fprintf(w, "%4d copy base[%d:%d] (%d bytes)\n",
+ i, op.Offset, op.Offset+op.Size, op.Size)
+ case "insert":
+ if isBinary(op.Data) {
+ if entries, prefix, suffix := tryParseTreeInsert(op.Data); len(entries) > 0 || len(prefix) > 0 {
+ fmt.Fprintf(w, "%4d insert %d bytes (tree data)\n", i, op.Size)
+ if len(prefix) > 0 {
+ writeHashFragment(w, prefix, hashSet, repo, key)
+ }
+ for _, e := range entries {
+ hashStr := e.Hash.String()
+ typeStr := "blob"
+ if e.Mode == 0o40000 {
+ typeStr = "tree"
+ } else if e.Mode == 0o160000 {
+ typeStr = "commit"
+ }
+ name := htmlEscape(string(e.Name))
+ if hashSet[hashStr] {
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), hashStr, key)
+ fmt.Fprintf(w, " %06o %s %s\t%s\n", e.Mode, typeStr, href, hashStr, name)
+ } else {
+ fmt.Fprintf(w, " %06o %s %s\t%s\n", e.Mode, typeStr, hashStr, name)
+ }
+ }
+ if len(suffix) > 0 {
+ writeHashFragment(w, suffix, hashSet, repo, key)
+ }
+ } else {
+ fmt.Fprintf(w, "%4d insert %d bytes\n", i, op.Size)
+ writeHexDump(w, op.Data)
+ }
+ } else {
+ fmt.Fprintf(w, "%4d insert %d bytes: ", i, op.Size)
+ show := op.Data
+ truncated := false
+ if len(show) > 128 {
+ show = show[:128]
+ truncated = true
+ }
+ esc := &dumbEscaper{buf: bufio.NewWriter(w)}
+ esc.Write(show)
+ if truncated {
+ fmt.Fprintf(w, "...")
+ }
+ fmt.Fprintf(w, "\n")
+ }
+ }
+ }
+}
+
+// tryParseTreeInsert tries to interpret binary insert data as tree entry fragments.
+// It returns any complete tree entries parsed, plus any leading prefix (trailing
+// hash bytes from a previous entry) and trailing suffix that didn't form a complete entry.
+func tryParseTreeInsert(data []byte) (entries []gitfs.DirEntry, prefix, suffix []byte) {
+ // The insert might start mid-entry — the leading bytes could be the tail
+ // of a previous entry's 20-byte hash. Look for the start of a tree entry:
+ // an octal digit followed eventually by ' ', name, '\0', 20 bytes.
+ start := 0
+ for start < len(data) {
+ if data[start] >= '1' && data[start] <= '7' {
+ // Might be the start of a mode. Try parsing.
+ e, size := gitfs.ParseDirEntry(data[start:])
+ if size > 0 {
+ // Found a valid entry start. Everything before it is prefix.
+ if start > 0 {
+ prefix = data[:start]
+ }
+ entries = append(entries, e)
+ pos := start + size
+ // Parse remaining entries.
+ for pos < len(data) {
+ e, size := gitfs.ParseDirEntry(data[pos:])
+ if size == 0 {
+ break
+ }
+ entries = append(entries, e)
+ pos += size
+ }
+ if pos < len(data) {
+ suffix = data[pos:]
+ }
+ return entries, prefix, suffix
+ }
+ }
+ start++
+ }
+ // Couldn't parse any entries. Might be a pure hash fragment.
+ if len(data) <= 20 {
+ return nil, data, nil
+ }
+ return nil, nil, nil
+}
+
+func writeHashFragment(w io.Writer, data []byte, hashSet map[string]bool, repo, key string) {
+ if len(data) == 20 {
+ hashStr := fmt.Sprintf("%x", data)
+ if hashSet[hashStr] {
+ href := fmt.Sprintf("/pack/%s/%s?key=%s", url.PathEscape(repo), hashStr, key)
+ fmt.Fprintf(w, " hash %s\n", href, hashStr)
+ } else {
+ fmt.Fprintf(w, " hash %s\n", hashStr)
+ }
+ } else {
+ fmt.Fprintf(w, " (%d bytes) %x\n", len(data), data)
+ }
+}
+
+func isBinary(data []byte) bool {
+ for _, b := range data {
+ if b == 0 || b >= 0x7f {
+ return true
+ }
+ if b < 0x20 && b != '\n' && b != '\r' && b != '\t' {
+ return true
+ }
+ }
+ return false
+}
+
+func writeHexDump(w io.Writer, data []byte) {
+ for i := 0; i < len(data); i += 16 {
+ end := i + 16
+ if end > len(data) {
+ end = len(data)
+ }
+ line := data[i:end]
+
+ // Offset.
+ fmt.Fprintf(w, " %04x ", i)
+
+ // Hex bytes.
+ for j, b := range line {
+ if j == 8 {
+ fmt.Fprintf(w, " ")
+ }
+ fmt.Fprintf(w, "%02x ", b)
+ }
+ // Pad if short line.
+ for j := len(line); j < 16; j++ {
+ if j == 8 {
+ fmt.Fprintf(w, " ")
+ }
+ fmt.Fprintf(w, " ")
+ }
+
+ // ASCII.
+ fmt.Fprintf(w, " |")
+ for _, b := range line {
+ if b >= 0x20 && b < 0x7f {
+ fmt.Fprintf(w, "%c", b)
+ } else {
+ fmt.Fprintf(w, ".")
+ }
+ }
+ fmt.Fprintf(w, "|\n")
+ }
+}
+
+func formatBytes(b int64) string {
+ switch {
+ case b >= 1<<30:
+ return fmt.Sprintf("%.1f GiB", float64(b)/(1<<30))
+ case b >= 1<<20:
+ return fmt.Sprintf("%.1f MiB", float64(b)/(1<<20))
+ case b >= 1<<10:
+ return fmt.Sprintf("%.1f KiB", float64(b)/(1<<10))
+ default:
+ return fmt.Sprintf("%d B", b)
+ }
+}
diff --git a/internal/git/packfile.go b/internal/git/packfile.go
new file mode 100644
index 00000000..38501aa5
--- /dev/null
+++ b/internal/git/packfile.go
@@ -0,0 +1,442 @@
+package git
+
+import (
+ "bytes"
+ "compress/zlib"
+ "crypto/sha1"
+ "encoding/binary"
+ "encoding/hex"
+ "fmt"
+ "io"
+
+ "github.com/jonjohnsonjr/dagdotdev/internal/forks/rsc.io/gitfs"
+)
+
+type PackIndex struct {
+ Version uint32 `json:"version"`
+ NumObjects uint32 `json:"numObjects"`
+ Size int64 `json:"size"`
+ Checksum string `json:"checksum"`
+ Objects []PackObject `json:"objects"`
+}
+
+type PackObject struct {
+ Offset int `json:"offset"`
+ EncodedSize int `json:"encodedSize"`
+ Type string `json:"type"` // raw type: commit, tree, blob, tag, ofs-delta, ref-delta
+ ResolvedType string `json:"resolvedType"` // resolved type after delta resolution
+ Size int `json:"size"`
+ Hash string `json:"hash"`
+
+ // Delta info (only for ofs-delta and ref-delta objects)
+ DeltaBase string `json:"deltaBase,omitempty"` // raw base ref (hash for ref-delta, offset string for ofs-delta)
+ BaseHash string `json:"baseHash,omitempty"` // resolved base object hash
+ BaseOffset int `json:"baseOffset,omitempty"`
+ Depth int `json:"depth,omitempty"` // delta chain depth (0 for non-delta)
+}
+
+// BuildPackIndex parses a raw packfile and builds an index of all objects.
+func BuildPackIndex(data []byte) (*PackIndex, error) {
+ if len(data) < 12+20 {
+ return nil, fmt.Errorf("packfile too short")
+ }
+
+ hdr := data[:12]
+ vers := binary.BigEndian.Uint32(hdr[4:8])
+ nobj := binary.BigEndian.Uint32(hdr[8:12])
+ if string(hdr[:4]) != "PACK" || (vers != 2 && vers != 3) {
+ return nil, fmt.Errorf("not a packfile")
+ }
+ if vers == 3 {
+ return nil, fmt.Errorf("packfile v3 not supported")
+ }
+
+ sum := sha1.Sum(data[:len(data)-20])
+ if !bytes.Equal(sum[:], data[len(data)-20:]) {
+ return nil, fmt.Errorf("packfile checksum mismatch")
+ }
+
+ idx := &PackIndex{
+ Version: vers,
+ NumObjects: nobj,
+ Size: int64(len(data)),
+ Checksum: hex.EncodeToString(data[len(data)-20:]),
+ Objects: make([]PackObject, 0, nobj),
+ }
+
+ // We need a store to resolve delta chains and compute hashes.
+ var s gitfs.Store
+ objs := data[12 : len(data)-20]
+ off := 0
+
+ for i := 0; i < int(nobj); i++ {
+ obj, encSize, err := indexObject(&s, objs, off)
+ if err != nil {
+ return nil, fmt.Errorf("object %d at offset %d: %v", i, off+12, err)
+ }
+ obj.Offset = off + 12 // offset from start of packfile
+ obj.EncodedSize = encSize
+ idx.Objects = append(idx.Objects, obj)
+ off += encSize
+ }
+
+ // Compute delta chain depths.
+ byHash := map[string]int{} // hash -> index into Objects
+ for i, obj := range idx.Objects {
+ byHash[obj.Hash] = i
+ }
+ for i := range idx.Objects {
+ if idx.Objects[i].BaseHash == "" {
+ continue
+ }
+ depth := 1
+ baseHash := idx.Objects[i].BaseHash
+ for {
+ bi, ok := byHash[baseHash]
+ if !ok || idx.Objects[bi].BaseHash == "" {
+ break
+ }
+ depth++
+ baseHash = idx.Objects[bi].BaseHash
+ }
+ idx.Objects[i].Depth = depth
+ }
+
+ return idx, nil
+}
+
+// indexObject parses the object at objs[off:] and returns structural info.
+func indexObject(s *gitfs.Store, objs []byte, off int) (PackObject, int, error) {
+ if off < 0 || off >= len(objs) {
+ return PackObject{}, 0, fmt.Errorf("invalid offset")
+ }
+
+ u, size := binary.Uvarint(objs[off:])
+ if size <= 0 {
+ return PackObject{}, 0, fmt.Errorf("bad varint")
+ }
+ typ := gitfs.ObjType((u >> 4) & 7)
+ n := int(u&15 | u>>7<<4)
+
+ obj := PackObject{}
+
+ switch typ {
+ case gitfs.ObjRefDelta:
+ if len(objs)-(off+size) < 20 {
+ return PackObject{}, 0, fmt.Errorf("bad ref-delta")
+ }
+ var h gitfs.Hash
+ copy(h[:], objs[off+size:])
+ size += 20
+ obj.Type = "ref-delta"
+ obj.DeltaBase = h.String()
+
+ case gitfs.ObjOfsDelta:
+ i := off + size
+ if len(objs)-i < 20 {
+ return PackObject{}, 0, fmt.Errorf("bad ofs-delta")
+ }
+ d := int64(objs[i] & 0x7f)
+ for objs[i]&0x80 != 0 {
+ i++
+ d = d<<7 | int64(objs[i]&0x7f)
+ d += 1 << 7
+ }
+ i++
+ size = i - off
+ obj.Type = "ofs-delta"
+ obj.BaseOffset = off - int(d) + 12 // offset from start of packfile
+ obj.DeltaBase = fmt.Sprintf("-%d", int(d))
+
+ case gitfs.ObjCommit:
+ obj.Type = "commit"
+ case gitfs.ObjTree:
+ obj.Type = "tree"
+ case gitfs.ObjBlob:
+ obj.Type = "blob"
+ case gitfs.ObjTag:
+ obj.Type = "tag"
+ default:
+ return PackObject{}, 0, fmt.Errorf("unknown type %d", typ)
+ }
+
+ // Decompress to get the actual size and compute hash.
+ br := bytes.NewReader(objs[off+size:])
+ zr, err := zlib.NewReader(br)
+ if err != nil {
+ return PackObject{}, 0, fmt.Errorf("zlib: %v", err)
+ }
+ content, err := io.ReadAll(zr)
+ if err != nil {
+ return PackObject{}, 0, fmt.Errorf("zlib read: %v", err)
+ }
+ if len(content) != n {
+ return PackObject{}, 0, fmt.Errorf("size mismatch: %d != %d", len(content), n)
+ }
+ encSize := len(objs[off:]) - br.Len()
+
+ // For non-delta objects, the hash is straightforward.
+ // For delta objects, we need to resolve the chain via the store.
+ switch typ {
+ case gitfs.ObjCommit, gitfs.ObjTree, gitfs.ObjBlob, gitfs.ObjTag:
+ h, _ := s.Add(typ, content)
+ obj.Hash = h.String()
+ obj.Size = len(content)
+ obj.ResolvedType = obj.Type
+
+ case gitfs.ObjRefDelta:
+ baseTyp, baseData := s.Object(gitfs.Hash(mustParseHash(obj.DeltaBase)))
+ if baseTyp == gitfs.ObjNone {
+ return PackObject{}, 0, fmt.Errorf("unknown ref-delta base %s", obj.DeltaBase)
+ }
+ resolved, err := applyPackDelta(baseData, content)
+ if err != nil {
+ return PackObject{}, 0, fmt.Errorf("apply ref-delta: %v", err)
+ }
+ h, _ := s.Add(baseTyp, resolved)
+ obj.Hash = h.String()
+ obj.Size = len(resolved)
+ obj.ResolvedType = baseTyp.String()
+ obj.BaseHash = obj.DeltaBase
+
+ case gitfs.ObjOfsDelta:
+ baseOff := off - mustParseOfsOffset(obj.DeltaBase)
+ baseTyp, baseHash, baseContent, _, err := gitfs.UnpackObject(s, objs, baseOff)
+ if err != nil {
+ return PackObject{}, 0, fmt.Errorf("resolve ofs-delta base: %v", err)
+ }
+ resolved, err := applyPackDelta(baseContent, content)
+ if err != nil {
+ return PackObject{}, 0, fmt.Errorf("apply ofs-delta: %v", err)
+ }
+ h, _ := s.Add(baseTyp, resolved)
+ obj.Hash = h.String()
+ obj.Size = len(resolved)
+ obj.ResolvedType = baseTyp.String()
+ obj.BaseHash = baseHash.String()
+ }
+
+ return obj, encSize, nil
+}
+
+func mustParseHash(s string) [20]byte {
+ b, _ := hex.DecodeString(s)
+ var h [20]byte
+ copy(h[:], b)
+ return h
+}
+
+func mustParseOfsOffset(s string) int {
+ // s is like "-1234"
+ var n int
+ fmt.Sscanf(s, "-%d", &n)
+ return n
+}
+
+// resolveBaseType follows the delta chain to find the base object type.
+func resolveBaseType(s *gitfs.Store, objs []byte, off int) (gitfs.ObjType, error) {
+ u, size := binary.Uvarint(objs[off:])
+ if size <= 0 {
+ return 0, fmt.Errorf("bad varint")
+ }
+ typ := gitfs.ObjType((u >> 4) & 7)
+ switch typ {
+ case gitfs.ObjCommit, gitfs.ObjTree, gitfs.ObjBlob, gitfs.ObjTag:
+ return typ, nil
+ case gitfs.ObjOfsDelta:
+ i := off + size
+ d := int64(objs[i] & 0x7f)
+ for objs[i]&0x80 != 0 {
+ i++
+ d = d<<7 | int64(objs[i]&0x7f)
+ d += 1 << 7
+ }
+ return resolveBaseType(s, objs, off-int(d))
+ case gitfs.ObjRefDelta:
+ var h gitfs.Hash
+ copy(h[:], objs[off+size:])
+ baseTyp, _ := s.Object(h)
+ return baseTyp, nil
+ }
+ return 0, fmt.Errorf("unknown type %d", typ)
+}
+
+// applyPackDelta applies a delta to a base to produce the target.
+func applyPackDelta(base, delta []byte) ([]byte, error) {
+ // Delta starts with base size and target size as varints.
+ baseSize, s := binary.Uvarint(delta)
+ delta = delta[s:]
+ if baseSize != uint64(len(base)) {
+ return nil, fmt.Errorf("base size mismatch: %d != %d", baseSize, len(base))
+ }
+ targSize, s := binary.Uvarint(delta)
+ delta = delta[s:]
+
+ targ := make([]byte, targSize)
+ dst := targ
+ for len(delta) > 0 {
+ cmd := delta[0]
+ delta = delta[1:]
+ switch {
+ case cmd == 0:
+ return nil, fmt.Errorf("invalid delta cmd")
+ case cmd&0x80 != 0:
+ var off, size int64
+ for i := uint(0); i < 4; i++ {
+ if cmd&(1< 0 {
+ cmd := delta[0]
+ delta = delta[1:]
+
+ switch {
+ case cmd == 0:
+ return nil, fmt.Errorf("invalid delta cmd 0")
+
+ case cmd&0x80 != 0:
+ // Copy from base.
+ var off, size int64
+ for i := uint(0); i < 4; i++ {
+ if cmd&(1<= len(data)-20 {
+ return nil, fmt.Errorf("invalid offset %d", offset)
+ }
+ objs := data[12 : len(data)-20]
+ off := offset - 12
+
+ u, size := binary.Uvarint(objs[off:])
+ if size <= 0 {
+ return nil, fmt.Errorf("bad varint")
+ }
+ typ := gitfs.ObjType((u >> 4) & 7)
+
+ switch typ {
+ case gitfs.ObjRefDelta:
+ size += 20
+ case gitfs.ObjOfsDelta:
+ i := off + size
+ for objs[i]&0x80 != 0 {
+ i++
+ }
+ i++
+ size = i - off
+ default:
+ return nil, fmt.Errorf("not a delta object (type %s)", typ)
+ }
+
+ br := bytes.NewReader(objs[off+size:])
+ zr, err := zlib.NewReader(br)
+ if err != nil {
+ return nil, fmt.Errorf("zlib: %v", err)
+ }
+ return io.ReadAll(zr)
+}
+
+// DecompressObject decompresses a single object from packfile data, identified by hash.
+// It does a full unpack to populate the store (required for ref-delta resolution),
+// then looks up the object by hash.
+func DecompressObject(data []byte, hash string) (objType string, content []byte, err error) {
+ if len(data) < 12+20 {
+ return "", nil, fmt.Errorf("packfile too short")
+ }
+
+ var s gitfs.Store
+ if err := gitfs.Unpack(&s, data); err != nil {
+ return "", nil, err
+ }
+
+ h, err := gitfs.ParseHash(hash)
+ if err != nil {
+ return "", nil, fmt.Errorf("invalid hash %q: %v", hash, err)
+ }
+
+ typ, objData := s.Object(h)
+ if typ == gitfs.ObjNone {
+ return "", nil, fmt.Errorf("object %s not found", hash)
+ }
+
+ return typ.String(), objData, nil
+}
diff --git a/internal/git/templates.go b/internal/git/templates.go
index a221125b..120c83b6 100644
--- a/internal/git/templates.go
+++ b/internal/git/templates.go
@@ -55,6 +55,19 @@ body {
github.com/wolfi-dev/os
+Pack Explorer
+Explore the raw packfile format of a git repo:
+
+
+
Examples
+
+
FAQ
How does this work?