From fd232186d0f9f80fec4b77dbf2dadf30444453d5 Mon Sep 17 00:00:00 2001 From: Alberto Carretero Date: Thu, 2 Apr 2026 11:50:05 +0200 Subject: [PATCH 1/5] fix: unicode is handled properly in strdist --- internal/strdist/strdist.go | 13 ++++++------- internal/strdist/strdist_test.go | 2 ++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/internal/strdist/strdist.go b/internal/strdist/strdist.go index d5b640ef..17a9a0ac 100644 --- a/internal/strdist/strdist.go +++ b/internal/strdist/strdist.go @@ -34,14 +34,14 @@ func Distance(a, b string, f CostFunc, cut int64) int64 { } lst := make([]CostInt, len(b)+1) bl := 0 - for bi, br := range b { - bl++ + for _, br := range b { cost := f(-1, br) - if cost.InsertB == Inhibit || lst[bi] == Inhibit { - lst[bi+1] = Inhibit + if cost.InsertB == Inhibit || lst[bl] == Inhibit { + lst[bl+1] = Inhibit } else { - lst[bi+1] = lst[bi] + cost.InsertB + lst[bl+1] = lst[bl] + cost.InsertB } + bl++ } lst = lst[:bl+1] // Not required, but caching means preventing the fast path @@ -87,8 +87,7 @@ func Distance(a, b string, f CostFunc, cut int64) int64 { if debug { debugf("... %v", lst) } - _ = stop - if cut != 0 && stop { + if cut != 0 && len(b) > 0 && stop { break } } diff --git a/internal/strdist/strdist_test.go b/internal/strdist/strdist_test.go index 7f8975a0..8be71800 100644 --- a/internal/strdist/strdist_test.go +++ b/internal/strdist/strdist_test.go @@ -58,10 +58,12 @@ var distanceTests = []distanceTest{ {f: strdist.GlobCost, r: 1, a: "a**f/hij", b: "abc/def/hik"}, {f: strdist.GlobCost, r: 2, a: "a**fg", b: "abc/def/hik"}, {f: strdist.GlobCost, r: 0, a: "a**f/hij/klm", b: "abc/d**m"}, + {f: strdist.GlobCost, r: 1, a: "**a", b: ""}, } func (s *S) TestDistance(c *C) { for _, test := range distanceTests { + // TODO: test both permutations, at the moment the function is not symmetrical. c.Logf("Test: %v", test) if strings.Contains(test.a, "*") || strings.Contains(test.b, "*") { c.Assert(strdist.GlobPath(test.a, test.b), Equals, test.r == 0) From 548b57326d6939ec057a58e54d0879ddc051211a Mon Sep 17 00:00:00 2001 From: Alberto Carretero Date: Thu, 2 Apr 2026 12:26:07 +0200 Subject: [PATCH 2/5] remove unneeded comment --- internal/strdist/strdist_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/strdist/strdist_test.go b/internal/strdist/strdist_test.go index 8be71800..187eb11f 100644 --- a/internal/strdist/strdist_test.go +++ b/internal/strdist/strdist_test.go @@ -63,7 +63,6 @@ var distanceTests = []distanceTest{ func (s *S) TestDistance(c *C) { for _, test := range distanceTests { - // TODO: test both permutations, at the moment the function is not symmetrical. c.Logf("Test: %v", test) if strings.Contains(test.a, "*") || strings.Contains(test.b, "*") { c.Assert(strdist.GlobPath(test.a, test.b), Equals, test.r == 0) From b6783cd62dfa99a08fcbafadc6c105c7eefceb04 Mon Sep 17 00:00:00 2001 From: Alberto Carretero Date: Mon, 27 Apr 2026 17:09:33 +0200 Subject: [PATCH 3/5] address review --- internal/strdist/strdist.go | 5 ++++- internal/strdist/strdist_test.go | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/strdist/strdist.go b/internal/strdist/strdist.go index a4b2ed4f..c725d98c 100644 --- a/internal/strdist/strdist.go +++ b/internal/strdist/strdist.go @@ -59,6 +59,9 @@ func Distance(a, b string, f CostFunc, cut int64) int64 { lst[0] = last + cost.DeleteA } stop := true + if lst[0] < CostInt(cut) { + stop = false + } i := 0 for _, br := range b { i++ @@ -87,7 +90,7 @@ func Distance(a, b string, f CostFunc, cut int64) int64 { if debug { debugf("... %v", lst) } - if cut != 0 && len(b) > 0 && stop { + if cut != 0 && stop { break } } diff --git a/internal/strdist/strdist_test.go b/internal/strdist/strdist_test.go index 4973b5d2..157d85c5 100644 --- a/internal/strdist/strdist_test.go +++ b/internal/strdist/strdist_test.go @@ -60,6 +60,9 @@ var distanceTests = []distanceTest{ {f: strdist.GlobCost, r: 0, a: "a**f/hij/klm", b: "abc/d**m"}, {f: strdist.GlobCost, r: 1, a: "**a", b: ""}, {f: strdist.GlobCost, r: 0, a: "/*a/", b: "/a/"}, + {f: strdist.GlobCost, r: 3, a: "abc", b: ""}, + {f: strdist.GlobCost, r: 1, cut: 1, a: "abc", b: ""}, + {f: strdist.GlobCost, r: 2, cut: 3, a: "ab", b: ""}, } func (s *S) TestDistance(c *C) { From 77ec1629a9a8f5d0ee887026767f1b7366d28e1d Mon Sep 17 00:00:00 2001 From: Alberto Carretero Date: Mon, 27 Apr 2026 17:17:43 +0200 Subject: [PATCH 4/5] more tests --- internal/strdist/strdist_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/strdist/strdist_test.go b/internal/strdist/strdist_test.go index 157d85c5..c913805b 100644 --- a/internal/strdist/strdist_test.go +++ b/internal/strdist/strdist_test.go @@ -36,6 +36,7 @@ var distanceTests = []distanceTest{ {f: uniqueCost, r: 6, a: "abc", b: "b"}, {f: uniqueCost, r: 6, a: "abc", b: "c"}, {f: uniqueCost, r: 9, a: "abc", b: ""}, + {f: uniqueCost, r: 6, cut: 6, a: "abc", b: ""}, {f: uniqueCost, r: 5, a: "abc", b: "abcd"}, {f: uniqueCost, r: 5, a: "abc", b: "dabc"}, {f: uniqueCost, r: 10, a: "abc", b: "adbdc"}, From 66017cf01492468d40ec2467e009e4718f2089f3 Mon Sep 17 00:00:00 2001 From: Alberto Carretero Date: Wed, 29 Apr 2026 17:08:45 +0200 Subject: [PATCH 5/5] yet another condition ^ tm --- internal/strdist/strdist.go | 8 ++++++++ internal/strdist/strdist_test.go | 2 ++ 2 files changed, 10 insertions(+) diff --git a/internal/strdist/strdist.go b/internal/strdist/strdist.go index c725d98c..8bcd8ac3 100644 --- a/internal/strdist/strdist.go +++ b/internal/strdist/strdist.go @@ -28,6 +28,14 @@ func StandardCost(ar, br rune) Cost { return Cost{SwapAB: 1, DeleteA: 1, InsertB: 1} } +// Distance returns the edit distance between two strings. The cost per edit is +// given by the costFunc argument. +// +// There is an optional cut argument that when set will finish the computation +// as early as possible once the final cost is certain to be >= cut. There is +// no guarantee about the exact cost returned when this is the case other than +// being >= cut. In particular, when cut is used, the function is not symmetric +// on a and b. func Distance(a, b string, f CostFunc, cut int64) int64 { if a == b { return 0 diff --git a/internal/strdist/strdist_test.go b/internal/strdist/strdist_test.go index c913805b..aab85310 100644 --- a/internal/strdist/strdist_test.go +++ b/internal/strdist/strdist_test.go @@ -63,7 +63,9 @@ var distanceTests = []distanceTest{ {f: strdist.GlobCost, r: 0, a: "/*a/", b: "/a/"}, {f: strdist.GlobCost, r: 3, a: "abc", b: ""}, {f: strdist.GlobCost, r: 1, cut: 1, a: "abc", b: ""}, + // Not symmetric. {f: strdist.GlobCost, r: 2, cut: 3, a: "ab", b: ""}, + {f: strdist.GlobCost, r: 2, cut: 1, a: "", b: "ab"}, } func (s *S) TestDistance(c *C) {