Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 79 additions & 22 deletions internal/setup/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import (
"errors"
"fmt"
"maps"
"os"
"path/filepath"
"slices"
"strings"
"sync"
"time"

"golang.org/x/crypto/openpgp/packet"
Expand Down Expand Up @@ -273,35 +275,90 @@
}

// Check for glob and generate conflicts.
//
// This is the most time-consuming part of every command because checking
// each pair of paths uses an n^2 algorithm; specifically due to glob
// handling. We can speed up the process by exploiting the fact that
// conflicts are not allowed which means that in valid releases globs will
// appear at only the specific parts of the paths. There will always be a
// non-trivial prefix of the path that doesn't contain globs.
//
// We will speed up the search by only looking for conflicts in paths that
// share this same prefix. By collecting all paths and doing binary search
// we can find the start of the prefix. This is much simpler than other
// data structures like tries or radix trees which have similar algorithmic
// complexities.
allPaths := slices.Collect(maps.Keys(paths))
slices.Sort(allPaths)
sem := make(chan struct{}, 4)
var mu sync.Mutex
err = nil
for oldPath, oldSlices := range paths {
for _, old := range oldSlices {
oldInfo := old.Contents[oldPath]
if oldInfo.Kind != GeneratePath && oldInfo.Kind != GlobPath {
break
}
for newPath, newSlices := range paths {
if oldPath == newPath {
// Identical paths have been filtered earlier.
continue
sem <- struct{}{}
go func() {
defer func() {
<-sem
}()
for _, old := range oldSlices {
oldInfo := old.Contents[oldPath]
if oldInfo.Kind != GeneratePath && oldInfo.Kind != GlobPath {
break
}
for _, new := range newSlices {
newInfo := new.Contents[newPath]
if oldInfo.Kind == GlobPath && (newInfo.Kind == GlobPath || newInfo.Kind == CopyPath) {
if new.Package == old.Package {
continue
}

prefixLen := strings.IndexAny(oldPath, "*?")
if prefixLen == -1 {

Check failure on line 309 in internal/setup/setup.go

View workflow job for this annotation

GitHub Actions / Lint

SA9003: empty branch (staticcheck)
// return fmt.Errorf("internal error: invalid path: generate or glob path does not contain ' ?' or '*': %q", oldPath)
}
searchKey := oldPath[:prefixLen]
// startIndex is the position of the prefix or the position where
// the prefix would have been found.
startIndex, _ := slices.BinarySearch(allPaths, searchKey)
for i := startIndex; i < len(allPaths); i++ {
newPath := allPaths[i]
if !strings.HasPrefix(newPath, searchKey) {
// Iterate until the prefix no longer matches, which means
// all other paths will fail the comparison below.
break
}
if strdist.GlobPath(newPath, oldPath) {
if (old.Package > new.Package) || (old.Package == new.Package && old.Name > new.Name) ||
(old.Package == new.Package && old.Name == new.Name && oldPath > newPath) {
old, new = new, old
oldPath, newPath = newPath, oldPath
newSlices := paths[newPath]
// We know prefixes match, we can skip that part of the string.
if oldPath[len(searchKey)-1:] == newPath[len(searchKey)-1:] {
// Identical paths have been filtered earlier.
continue
}
for _, new := range newSlices {
newInfo := new.Contents[newPath]
if oldInfo.Kind == GlobPath && (newInfo.Kind == GlobPath || newInfo.Kind == CopyPath) {
if new.Package == old.Package {
continue
}
}
// We know prefixes match, we can skip that part of the string.
if strdist.GlobPath(newPath[len(searchKey)-1:], oldPath[len(searchKey)-1:]) {
if (old.Package > new.Package) || (old.Package == new.Package && old.Name > new.Name) ||
(old.Package == new.Package && old.Name == new.Name && oldPath > newPath) {
old, new = new, old
oldPath, newPath = newPath, oldPath
}
mu.Lock()
if err == nil {
err = fmt.Errorf("slices %s and %s conflict on %s and %s", old, new, oldPath, newPath)
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Sort the errors lexicographically by comparing strings to give out a deterministic error. Thanks Paul for the suggestion to make it better

}
mu.Unlock()
return
} else {
break
}
return fmt.Errorf("slices %s and %s conflict on %s and %s", old, new, oldPath, newPath)
}
}
}
}
}()
}
for range 4 {
sem <- struct{}{}
}
if err != nil {
return err
}

// Check for cycles.
Expand Down
99 changes: 99 additions & 0 deletions internal/setup/setup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,86 @@ var setupTests = []setupTest{{
`,
},
relerror: "slices mypkg1_myslice1 and mypkg2_myslice1 conflict on /path1",
}, {
summary: "When multiple prefixes match all are tested",
input: map[string]string{
"slices/mydir/mypkg1.yaml": `
package: mypkg1
slices:
myslice1:
contents:
/path/*/foo:
`,
"slices/mydir/mypkg2.yaml": `
package: mypkg2
slices:
myslice1:
contents:
/path/bar/foo1:
/path/bar/f*:
`,
},
relerror: "slices mypkg1_myslice1 and mypkg2_myslice1 conflict on /path/\\*/foo and /path/bar/f\\*",
}, {
summary: "Later matching paths are tested after several non-matches",
input: map[string]string{
"slices/mydir/mypkg1.yaml": `
package: mypkg1
slices:
myslice1:
contents:
/path/*/foo:
`,
"slices/mydir/mypkg2.yaml": `
package: mypkg2
slices:
myslice1:
contents:
/path/bar/f:
/path/bar/fa:
/path/bar/fb:
/path/bar/f*:
`,
},
relerror: "slices mypkg1_myslice1 and mypkg2_myslice1 conflict on /path/\\*/foo and /path/bar/f\\*",
}, {
summary: "Conflicting globs with wildcard in the middle of the segment",
input: map[string]string{
"slices/mydir/mypkg1.yaml": `
package: mypkg1
slices:
myslice:
contents:
/path/ab*cd/file:
`,
"slices/mydir/mypkg2.yaml": `
package: mypkg2
slices:
myslice:
contents:
/path/ab12cd/file:
`,
},
relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/ab\*cd/file and /path/ab12cd/file`,
}, {
summary: "Conflicting globs when one side uses double-star",
input: map[string]string{
"slices/mydir/mypkg1.yaml": `
package: mypkg1
slices:
myslice:
contents:
/path/**/file:
`,
"slices/mydir/mypkg2.yaml": `
package: mypkg2
slices:
myslice:
contents:
/path/*/file:
`,
},
relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/\*\*/file and /path/\*/file`,
}, {
summary: "Directories must be suffixed with /",
input: map[string]string{
Expand Down Expand Up @@ -1943,6 +2023,25 @@ var setupTests = []setupTest{{
`,
},
relerror: `slices mypkg_myslice1 and mypkg_myslice2 conflict on /path/subdir/\*\* and /path/\*\*`,
}, {
summary: "Generate paths conflict with glob paths sharing the prefix",
input: map[string]string{
"slices/mydir/mypkg1.yaml": `
package: mypkg1
slices:
myslice:
contents:
/path/subdir/**: {generate: manifest}
`,
"slices/mydir/mypkg2.yaml": `
package: mypkg2
slices:
myslice:
contents:
/path/subdir/f*:
`,
},
relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/subdir/\*\* and /path/subdir/f\*`,
}, {
summary: `No other options in "generate" paths`,
input: map[string]string{
Expand Down
124 changes: 74 additions & 50 deletions internal/strdist/strdist.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ func Distance(a, b string, f CostFunc, cut int64) int64 {
}
lst := make([]CostInt, len(b)+1)
bl := 0
for bi, br := range b {
bl++
for _, br := range b {
cost := f(-1, br)
if cost.InsertB == Inhibit || lst[bi] == Inhibit {
lst[bi+1] = Inhibit
if cost.InsertB == Inhibit || lst[bl] == Inhibit {
lst[bl+1] = Inhibit
} else {
lst[bi+1] = lst[bi] + cost.InsertB
lst[bl+1] = lst[bl] + cost.InsertB
}
bl++
}
lst = lst[:bl+1]
// Not required, but caching means preventing the fast path
Expand Down Expand Up @@ -87,8 +87,7 @@ func Distance(a, b string, f CostFunc, cut int64) int64 {
if debug {
debugf("... %v", lst)
}
_ = stop
if cut != 0 && stop {
if cut != 0 && len(b) > 0 && stop {
break
}
}
Expand All @@ -105,18 +104,77 @@ func Distance(a, b string, f CostFunc, cut int64) int64 {
// * - Any zero or more characters, except for /
// ** - Any zero or more characters, including /
func GlobPath(a, b string) bool {
if !wildcardPrefixMatch(a, b) {
// Fast path.
return false
// Computing the actual distance is slow as its complexity is
// O(len(a) * len(b)). If the paths contain globs there is no way around
// it, but we can be clever about creating segments from the paths and only
// calling the distance function when necessary. If there is no glob the
// complexity becomes O(len(a) + len(b)).
//
// The algorithm works by separating the path into segments delimited by
// '/'. We compare the segments in order for a and b, we have three cases:
// 1) No segment uses globs, comparison is memcmp of both strings.
// 2) One of the strings uses a single "*" or "?". We call the distance
// function only with the segment which reduces the algorithmic
// complexity by reducing the length.
// 3) One of the strings uses "**". We need to call distance on the
// the rest of both strings and we can no longer rely on segments.
//
// Crucially, this optimization works because:
// * There are few paths in the releases that use "**", because it results
// in conflict easily. In fact, it is usually used to extract a whole
// directory, meaning the prefix segements should be unique and we can
// avoid computing "**" completely.
distance := func(a, b string) bool {
a = strings.ReplaceAll(a, "**", "⁑")
b = strings.ReplaceAll(b, "**", "⁑")
return Distance(a, b, globCost, 1) == 0
}
if !wildcardSuffixMatch(a, b) {
// Fast path.
return false

// Returns the index where the segment ends (next '/' or the end of the
// string) and whether the string has a "*" or "?". This function will only
// traverse the string once.
segmentEnd := func(s string) (end int, hasGlob bool) {
end = strings.IndexAny(s, "*?/")
if end == -1 {
end = len(s) - 1
} else if s[end] == '*' || s[end] == '?' {
slash := strings.IndexRune(s[end:], '/')
if slash != -1 {
end = end + slash
} else {
end = len(s) - 1
}
hasGlob = true
}
return end, hasGlob
}

a = strings.ReplaceAll(a, "**", "⁑")
b = strings.ReplaceAll(b, "**", "⁑")
return Distance(a, b, globCost, 1) == 0
for len(a) > 0 && len(b) > 0 {
endA, globA := segmentEnd(a)
endB, globB := segmentEnd(b)

segmentA := a[:endA+1]
segmentB := b[:endB+1]
if strings.Contains(segmentA, "**") || strings.Contains(segmentB, "**") {
// We need to match the rest of the string with the slow path, no
// other way around it.
return distance(a, b)
} else if globA || globB {
if !distance(segmentA, segmentB) {
return false
}
} else {
if segmentA != segmentB {
return false
}
}

a = a[endA+1:]
b = b[endB+1:]
}

// One string is empty, this call is linear.
return distance(a, b)
}

func globCost(ar, br rune) Cost {
Expand All @@ -139,37 +197,3 @@ func globCost(ar, br rune) Cost {
}
return Cost{SwapAB: 1, DeleteA: 1, InsertB: 1}
}

// wildcardPrefixMatch compares whether the prefixes of a and b are equal up
// to the shortest one. The prefix is defined as the longest substring that
// starts at index 0 and does not contain a wildcard.
func wildcardPrefixMatch(a, b string) bool {
ai := strings.IndexAny(a, "*?")
bi := strings.IndexAny(b, "*?")
if ai == -1 {
ai = len(a)
}
if bi == -1 {
bi = len(b)
}
mini := min(ai, bi)
return a[:mini] == b[:mini]
}

// wildcardSuffixMatch compares whether the suffixes of a and b are equal up
// to the shortest one. The suffix is defined as the longest substring that ends
// at the string length and does not contain a wildcard.
func wildcardSuffixMatch(a, b string) bool {
ai := strings.LastIndexAny(a, "*?")
la := 0
if ai != -1 {
la = len(a) - ai - 1
}
lb := 0
bi := strings.LastIndexAny(b, "*?")
if bi != -1 {
lb = len(b) - bi - 1
}
minl := min(la, lb)
return a[len(a)-minl:] == b[len(b)-minl:]
}
Loading
Loading