Skip to content

Commit 30745b1

Browse files
authored
Merge pull request #7 from HappyHackingSpace/feat/url_dedupe
feat: add url deduping
2 parents 441a9ad + bf3e228 commit 30745b1

4 files changed

Lines changed: 142 additions & 140 deletions

File tree

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ funURL decode "hello%20world"
7777
funURL decode -c "param%3Dvalue%20with%20spaces"
7878
```
7979

80+
### Deduping Urls
81+
82+
```bash
83+
funURL dedupe https://google.com https://google.com/home?qs=value https://google.com/home?qs=secondValue https://google.com/home?qs=newValue&secondQs=anotherValue https://google.com/home?qs=asd&secondQs=das https://site.com/api/users/123 https://site.com/api/users/222 https://site.com/api/users/412/profile https://site.com/users/photos/photo.jpg https://site.com/users/photos/myPhoto.jpg https://site.com/users/photos/photo.png https://google.com/home/?q=2&d=asd https://my.site/profile?param1=1&param2=2 https://my.site/profile?param3=3
84+
85+
cat urls.txt | funURL dedupe
86+
```
87+
8088
## Input Methods
8189

8290
funURL supports multiple ways to provide input:

internal/cli/dedupe.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
package cli
2+
3+
import (
4+
"bufio"
5+
"fmt"
6+
"iter"
7+
"net/url"
8+
"os"
9+
"strings"
10+
11+
"github.com/HappyHackingSpace/funURL/internal/utils"
12+
"github.com/spf13/cobra"
13+
)
14+
15+
var dedupeCmd = &cobra.Command{
16+
Use: "dedupe",
17+
Short: "Dedupe urls",
18+
Run: func(cmd *cobra.Command, args []string) {
19+
var urls []*url.URL
20+
seen := make(map[string]bool)
21+
22+
if len(args) > 0 {
23+
for _, value := range args {
24+
if value == "" {
25+
continue
26+
}
27+
url := utils.CreateURL(value)
28+
urlStr := url.String()
29+
if !seen[urlStr] {
30+
seen[urlStr] = true
31+
urls = append(urls, url)
32+
}
33+
}
34+
} else {
35+
scanner := bufio.NewScanner(os.Stdin)
36+
for scanner.Scan() {
37+
value := scanner.Text()
38+
if value == "" {
39+
continue
40+
}
41+
url := utils.CreateURL(value)
42+
urlStr := url.String()
43+
if !seen[urlStr] {
44+
seen[urlStr] = true
45+
urls = append(urls, url)
46+
}
47+
}
48+
}
49+
50+
if len(urls) == 0 {
51+
fmt.Println("Error: No input provided")
52+
os.Exit(1)
53+
return
54+
}
55+
56+
// Convert slice to iterator
57+
urlIterator := func(yield func(*url.URL) bool) {
58+
for _, u := range urls {
59+
if !yield(u) {
60+
return
61+
}
62+
}
63+
}
64+
65+
dedupeUrls(urlIterator)
66+
},
67+
}
68+
69+
func init() {
70+
rootCmd.AddCommand(dedupeCmd)
71+
}
72+
73+
func dedupeUrls(urls iter.Seq[*url.URL]) {
74+
seen := make(map[string]*url.URL)
75+
var result []*url.URL
76+
77+
for u := range urls {
78+
base := u.Scheme + "://" + u.Host
79+
80+
pathParts := strings.Split(strings.Trim(u.Path, "/"), "/")
81+
var normalizedParts []string
82+
83+
for _, part := range pathParts {
84+
if part == "" {
85+
continue
86+
}
87+
88+
if isNumeric(part) || strings.Contains(part, ".") {
89+
normalizedParts = append(normalizedParts, "{param}")
90+
} else {
91+
normalizedParts = append(normalizedParts, part)
92+
}
93+
}
94+
95+
normalizedPath := "/" + strings.Join(normalizedParts, "/")
96+
97+
params := u.Query()
98+
var paramKeys []string
99+
for key := range params {
100+
paramKeys = append(paramKeys, key)
101+
}
102+
103+
for i := 0; i < len(paramKeys); i++ {
104+
for j := i + 1; j < len(paramKeys); j++ {
105+
if paramKeys[i] > paramKeys[j] {
106+
paramKeys[i], paramKeys[j] = paramKeys[j], paramKeys[i]
107+
}
108+
}
109+
}
110+
111+
signature := base + normalizedPath + "|"
112+
for _, key := range paramKeys {
113+
signature += key + ","
114+
}
115+
116+
if _, exists := seen[signature]; !exists {
117+
seen[signature] = u
118+
result = append(result, u)
119+
}
120+
}
121+
122+
for _, u := range result {
123+
fmt.Println(u.String())
124+
}
125+
}
126+
127+
func isNumeric(s string) bool {
128+
for _, char := range s {
129+
if char < '0' || char > '9' {
130+
return false
131+
}
132+
}
133+
return len(s) > 0
134+
}

internal/cli/encode.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ var encodeCmd = &cobra.Command{
1717
urlString, _ := cmd.Flags().GetString("url")
1818
component, _ := cmd.Flags().GetBool("component")
1919
doubled, _ := cmd.Flags().GetBool("double")
20-
2120
if urlString != "" {
2221
encodeURL(urlString, component, doubled)
2322
} else if len(args) > 0 {

test.sh

Lines changed: 0 additions & 139 deletions
This file was deleted.

0 commit comments

Comments
 (0)