Skip to content

Commit 97d95bf

Browse files
feat: dag diff with partial construction and reconstruction helper functions
1 parent 7c84a27 commit 97d95bf

4 files changed

Lines changed: 1358 additions & 0 deletions

File tree

dag/dag.go

Lines changed: 341 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,3 +1638,344 @@ func (d *Dag) RecomputeLabels() error {
16381638

16391639
return nil
16401640
}
1641+
1642+
// Diff compares this DAG with another DAG and returns all differences.
1643+
func (d *Dag) Diff(other *Dag) (*DagDiff, error) {
1644+
if d == nil {
1645+
return nil, fmt.Errorf("cannot diff: source DAG is nil")
1646+
}
1647+
if other == nil {
1648+
return nil, fmt.Errorf("cannot diff: target DAG is nil")
1649+
}
1650+
1651+
diff := &DagDiff{
1652+
Diffs: make(map[string]*LeafDiff),
1653+
Summary: DiffSummary{
1654+
Added: 0,
1655+
Removed: 0,
1656+
Total: 0,
1657+
},
1658+
}
1659+
1660+
// Create maps of bare hash -> leaf for both DAGs
1661+
oldLeafs := make(map[string]*DagLeaf)
1662+
for hash, leaf := range d.Leafs {
1663+
bareHash := StripLabel(hash)
1664+
oldLeafs[bareHash] = leaf
1665+
}
1666+
1667+
newLeafs := make(map[string]*DagLeaf)
1668+
for hash, leaf := range other.Leafs {
1669+
bareHash := StripLabel(hash)
1670+
newLeafs[bareHash] = leaf
1671+
}
1672+
1673+
// Find added leaves
1674+
for bareHash, newLeaf := range newLeafs {
1675+
if _, existsInOld := oldLeafs[bareHash]; !existsInOld {
1676+
// Leaf was added
1677+
diff.Diffs[bareHash] = &LeafDiff{
1678+
Type: DiffTypeAdded,
1679+
BareHash: bareHash,
1680+
Leaf: newLeaf,
1681+
}
1682+
diff.Summary.Added++
1683+
diff.Summary.Total++
1684+
}
1685+
}
1686+
1687+
// Find removed leaves
1688+
for bareHash, oldLeaf := range oldLeafs {
1689+
if _, existsInNew := newLeafs[bareHash]; !existsInNew {
1690+
// Leaf was removed
1691+
diff.Diffs[bareHash] = &LeafDiff{
1692+
Type: DiffTypeRemoved,
1693+
BareHash: bareHash,
1694+
Leaf: oldLeaf,
1695+
}
1696+
diff.Summary.Removed++
1697+
diff.Summary.Total++
1698+
}
1699+
}
1700+
1701+
return diff, nil
1702+
}
1703+
1704+
// DiffFromNewLeaves creates a DagDiff by comparing an old DAG with a set of new leaves.
1705+
func (d *Dag) DiffFromNewLeaves(newLeaves map[string]*DagLeaf) (*DagDiff, error) {
1706+
if d == nil {
1707+
return nil, fmt.Errorf("cannot diff: source DAG is nil")
1708+
}
1709+
if newLeaves == nil {
1710+
return nil, fmt.Errorf("cannot diff: new leaves map is nil")
1711+
}
1712+
1713+
diff := &DagDiff{
1714+
Diffs: make(map[string]*LeafDiff),
1715+
Summary: DiffSummary{
1716+
Added: 0,
1717+
Removed: 0,
1718+
Total: 0,
1719+
},
1720+
}
1721+
1722+
// Create map of bare hash -> leaf for old DAG
1723+
oldLeafs := make(map[string]*DagLeaf)
1724+
for hash, leaf := range d.Leafs {
1725+
bareHash := StripLabel(hash)
1726+
oldLeafs[bareHash] = leaf
1727+
}
1728+
1729+
// Create map of bare hash -> leaf for new leaves
1730+
newLeafsMap := make(map[string]*DagLeaf)
1731+
for hash, leaf := range newLeaves {
1732+
bareHash := StripLabel(hash)
1733+
newLeafsMap[bareHash] = leaf
1734+
}
1735+
1736+
// Find added leaves (in new but not in old)
1737+
for bareHash, newLeaf := range newLeafsMap {
1738+
if _, existsInOld := oldLeafs[bareHash]; !existsInOld {
1739+
diff.Diffs[bareHash] = &LeafDiff{
1740+
Type: DiffTypeAdded,
1741+
BareHash: bareHash,
1742+
Leaf: newLeaf,
1743+
}
1744+
diff.Summary.Added++
1745+
diff.Summary.Total++
1746+
}
1747+
}
1748+
1749+
// Find removed leaves (in old but not in new)
1750+
for bareHash, oldLeaf := range oldLeafs {
1751+
if _, existsInNew := newLeafsMap[bareHash]; !existsInNew {
1752+
diff.Diffs[bareHash] = &LeafDiff{
1753+
Type: DiffTypeRemoved,
1754+
BareHash: bareHash,
1755+
Leaf: oldLeaf,
1756+
}
1757+
diff.Summary.Removed++
1758+
diff.Summary.Total++
1759+
}
1760+
}
1761+
1762+
return diff, nil
1763+
}
1764+
1765+
// GetAddedLeaves returns a map of all added leaves from the diff.
1766+
func (diff *DagDiff) GetAddedLeaves() map[string]*DagLeaf {
1767+
addedLeaves := make(map[string]*DagLeaf)
1768+
1769+
for bareHash, leafDiff := range diff.Diffs {
1770+
if leafDiff.Type == DiffTypeAdded {
1771+
addedLeaves[bareHash] = leafDiff.Leaf
1772+
}
1773+
}
1774+
1775+
return addedLeaves
1776+
}
1777+
1778+
// GetRemovedLeaves returns a map of all removed leaves from the diff.
1779+
func (diff *DagDiff) GetRemovedLeaves() map[string]*DagLeaf {
1780+
removedLeaves := make(map[string]*DagLeaf)
1781+
1782+
for bareHash, leafDiff := range diff.Diffs {
1783+
if leafDiff.Type == DiffTypeRemoved {
1784+
removedLeaves[bareHash] = leafDiff.Leaf
1785+
}
1786+
}
1787+
1788+
return removedLeaves
1789+
}
1790+
1791+
// ApplyToDAG applies the diff to a DAG, creating a new DAG with the changes.
1792+
// This works by:
1793+
// 1. Creating a pool of all available leaves (old leaves + new leaves from diff)
1794+
// 2. Finding the new root (which will be one of the added leaves)
1795+
// 3. Traversing from the new root to collect only referenced leaves
1796+
// 4. Recomputing labels for consistency
1797+
//
1798+
// Leaves from the old DAG that aren't referenced by the new root are naturally excluded.
1799+
func (diff *DagDiff) ApplyToDAG(oldDag *Dag) (*Dag, error) {
1800+
if oldDag == nil {
1801+
return nil, fmt.Errorf("cannot apply diff: old DAG is nil")
1802+
}
1803+
if diff == nil {
1804+
return nil, fmt.Errorf("cannot apply diff: diff is nil")
1805+
}
1806+
1807+
// If no additions, the DAG structure hasn't changed
1808+
if diff.Summary.Added == 0 {
1809+
// Return a copy of the old DAG
1810+
newDag := &Dag{
1811+
Root: oldDag.Root,
1812+
Leafs: make(map[string]*DagLeaf),
1813+
}
1814+
for hash, leaf := range oldDag.Leafs {
1815+
newDag.Leafs[hash] = leaf
1816+
}
1817+
return newDag, nil
1818+
}
1819+
1820+
// Build a complete pool of available leaves using bare hashes as keys
1821+
leafPool := make(map[string]*DagLeaf)
1822+
1823+
// Add all leaves from old DAG
1824+
for hash, leaf := range oldDag.Leafs {
1825+
bareHash := StripLabel(hash)
1826+
leafPool[bareHash] = leaf
1827+
}
1828+
1829+
// Add all new leaves from diff (these will override if same bare hash exists)
1830+
for bareHash, leafDiff := range diff.Diffs {
1831+
if leafDiff.Type == DiffTypeAdded {
1832+
leafPool[bareHash] = leafDiff.Leaf
1833+
}
1834+
}
1835+
1836+
// Find the new root - it must be one of the added leaves
1837+
// The root is the leaf that's not referenced by any other leaf
1838+
addedLeaves := diff.GetAddedLeaves()
1839+
1840+
// Build a set of all child hashes referenced by ALL leaves in the pool
1841+
childHashes := make(map[string]bool)
1842+
for _, leaf := range leafPool {
1843+
for _, childHash := range leaf.Links {
1844+
bareChildHash := StripLabel(childHash)
1845+
childHashes[bareChildHash] = true
1846+
}
1847+
}
1848+
1849+
// Find the new root among added leaves (not referenced by any leaf)
1850+
var newRootHash string
1851+
for bareHash, leaf := range addedLeaves {
1852+
if !childHashes[bareHash] {
1853+
// verify it has root characteristics
1854+
if leaf.Type == DirectoryLeafType || leaf.LeafCount > 0 || leaf.LatestLabel != "" {
1855+
newRootHash = bareHash
1856+
break
1857+
}
1858+
}
1859+
}
1860+
1861+
if newRootHash == "" {
1862+
return nil, fmt.Errorf("cannot find new root among added leaves")
1863+
}
1864+
1865+
// Now traverse from the new root to collect all referenced leaves
1866+
newDagLeaves := make(map[string]*DagLeaf)
1867+
visited := make(map[string]bool)
1868+
1869+
var traverse func(bareHash string) error
1870+
traverse = func(bareHash string) error {
1871+
if visited[bareHash] {
1872+
return nil
1873+
}
1874+
visited[bareHash] = true
1875+
1876+
leaf, exists := leafPool[bareHash]
1877+
if !exists {
1878+
return fmt.Errorf("missing leaf in pool: %s", bareHash)
1879+
}
1880+
1881+
// Add this leaf to the new DAG
1882+
newDagLeaves[bareHash] = leaf
1883+
1884+
// Traverse all children
1885+
for _, childHash := range leaf.Links {
1886+
bareChildHash := StripLabel(childHash)
1887+
if err := traverse(bareChildHash); err != nil {
1888+
return err
1889+
}
1890+
}
1891+
1892+
return nil
1893+
}
1894+
1895+
// Start traversal from new root
1896+
if err := traverse(newRootHash); err != nil {
1897+
return nil, fmt.Errorf("failed to traverse from new root: %w", err)
1898+
}
1899+
1900+
// Create the new DAG
1901+
newDag := &Dag{
1902+
Root: newRootHash,
1903+
Leafs: newDagLeaves,
1904+
}
1905+
1906+
// Recompute labels to ensure consistency
1907+
if err := newDag.RecomputeLabels(); err != nil {
1908+
return nil, fmt.Errorf("failed to recompute labels: %w", err)
1909+
}
1910+
1911+
return newDag, nil
1912+
}
1913+
1914+
// CreatePartialDAGFromAdded creates a partial DAG containing only the added leaves.
1915+
func (diff *DagDiff) CreatePartialDag() (*Dag, error) {
1916+
if diff == nil {
1917+
return nil, fmt.Errorf("cannot create partial DAG: diff is nil")
1918+
}
1919+
1920+
addedLeaves := diff.GetAddedLeaves()
1921+
if len(addedLeaves) == 0 {
1922+
return nil, fmt.Errorf("no added leaves to create partial DAG")
1923+
}
1924+
1925+
// Create DAG with added leaves
1926+
partialDag := &Dag{
1927+
Leafs: make(map[string]*DagLeaf),
1928+
}
1929+
1930+
// Add all leaves using bare hash as key initially
1931+
for bareHash, leaf := range addedLeaves {
1932+
partialDag.Leafs[bareHash] = leaf
1933+
}
1934+
1935+
// Find the root among added leaves (leaf not referenced by other added leaves)
1936+
childHashes := make(map[string]bool)
1937+
for _, leaf := range addedLeaves {
1938+
for _, childHash := range leaf.Links {
1939+
bareChildHash := StripLabel(childHash)
1940+
// Only count if the child is also in our added leaves
1941+
if _, exists := addedLeaves[bareChildHash]; exists {
1942+
childHashes[bareChildHash] = true
1943+
}
1944+
}
1945+
}
1946+
1947+
// Find the root (not a child of any added leaf)
1948+
var rootBareHash string
1949+
for bareHash := range addedLeaves {
1950+
if !childHashes[bareHash] {
1951+
rootBareHash = bareHash
1952+
break
1953+
}
1954+
}
1955+
1956+
if rootBareHash == "" {
1957+
// If all leaves reference each other, pick the directory leaf or first one
1958+
for bareHash, leaf := range addedLeaves {
1959+
if leaf.Type == DirectoryLeafType {
1960+
rootBareHash = bareHash
1961+
break
1962+
}
1963+
}
1964+
if rootBareHash == "" {
1965+
// Just pick the first one
1966+
for bareHash := range addedLeaves {
1967+
rootBareHash = bareHash
1968+
break
1969+
}
1970+
}
1971+
}
1972+
1973+
partialDag.Root = rootBareHash
1974+
1975+
// Recompute labels for consistency
1976+
if err := partialDag.RecomputeLabels(); err != nil {
1977+
return nil, fmt.Errorf("failed to recompute labels: %w", err)
1978+
}
1979+
1980+
return partialDag, nil
1981+
}

0 commit comments

Comments
 (0)