Skip to content

Commit 06457ae

Browse files
authored
Merge pull request #34 from Muneer320/main
feat: Add sensitive data scrubbing to protect secrets before sending to LLMs
2 parents 8a1263f + bd95f7b commit 06457ae

3 files changed

Lines changed: 670 additions & 2 deletions

File tree

src/internal/git/operations.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"path/filepath"
88
"strings"
99

10+
"github.com/dfanso/commit-msg/src/internal/scrubber"
1011
"github.com/dfanso/commit-msg/src/internal/utils"
1112
"github.com/dfanso/commit-msg/src/types"
1213
)
@@ -100,7 +101,14 @@ func GetChanges(config *types.RepoConfig) (string, error) {
100101
continue
101102
}
102103
changes.WriteString(fmt.Sprintf("Content of new file %s:\n", file))
103-
changes.WriteString(string(fileContent))
104+
105+
// Use special scrubbing for .env files
106+
if strings.HasSuffix(strings.ToLower(file), ".env") ||
107+
strings.Contains(strings.ToLower(file), ".env.") {
108+
changes.WriteString(scrubber.ScrubEnvFile(string(fileContent)))
109+
} else {
110+
changes.WriteString(string(fileContent))
111+
}
104112
changes.WriteString("\n\n")
105113
}
106114
}
@@ -115,5 +123,8 @@ func GetChanges(config *types.RepoConfig) (string, error) {
115123
changes.WriteString("\n")
116124
}
117125

118-
return changes.String(), nil
126+
// Scrub sensitive data before returning
127+
scrubbedChanges := scrubber.ScrubDiff(changes.String())
128+
129+
return scrubbedChanges, nil
119130
}

src/internal/scrubber/scrubber.go

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
package scrubber
2+
3+
import (
4+
"regexp"
5+
"strings"
6+
)
7+
8+
// SensitivePattern represents a regex pattern to detect sensitive data
9+
type SensitivePattern struct {
10+
Name string
11+
Pattern *regexp.Regexp
12+
Redact string
13+
}
14+
15+
var (
16+
// Common patterns for sensitive data
17+
sensitivePatterns = []SensitivePattern{
18+
// API Keys and Tokens
19+
{
20+
Name: "Generic API Key",
21+
Pattern: regexp.MustCompile(`(?i)(api[_-]?key|apikey|api[_-]?token)\s*[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})["\']?`),
22+
Redact: "${1}=\"[REDACTED_API_KEY]\"",
23+
},
24+
{
25+
Name: "Bearer Token",
26+
Pattern: regexp.MustCompile(`(?i)(bearer\s+)([a-zA-Z0-9_\-\.]{20,})`),
27+
Redact: "${1}[REDACTED_BEARER_TOKEN]",
28+
},
29+
{
30+
Name: "Authorization Header",
31+
Pattern: regexp.MustCompile(`(?i)(authorization\s*[=:]\s*["\']?)([a-zA-Z0-9_\-\.]{20,})["\']?`),
32+
Redact: "${1}[REDACTED_AUTH_TOKEN]\"",
33+
},
34+
35+
// AWS Credentials
36+
{
37+
Name: "AWS Access Key",
38+
Pattern: regexp.MustCompile(`(?i)(aws[_-]?access[_-]?key[_-]?id|AWS_ACCESS_KEY_ID)\s*[=:]\s*["\']?(AKIA[0-9A-Z]{16})["\']?`),
39+
Redact: "${1}=\"[REDACTED_AWS_KEY]\"",
40+
},
41+
{
42+
Name: "AWS Secret Key",
43+
Pattern: regexp.MustCompile(`(?i)(aws[_-]?secret[_-]?access[_-]?key|AWS_SECRET_ACCESS_KEY)\s*[=:]\s*["\']?([a-zA-Z0-9/+=]{40})["\']?`),
44+
Redact: "${1}=\"[REDACTED_AWS_SECRET]\"",
45+
},
46+
47+
// Database Credentials
48+
{
49+
Name: "Database URL with Password",
50+
Pattern: regexp.MustCompile(`(?i)(postgres|mysql|mongodb|redis)://([^:]+):([^@]+)@`),
51+
Redact: "${1}://${2}:[REDACTED_DB_PASSWORD]@",
52+
},
53+
{
54+
Name: "Database Password",
55+
Pattern: regexp.MustCompile(`(?i)(db[_-]?password|database[_-]?password|DB_PASSWORD)\s*[=:]\s*["\']?([^\s"']+)["\']?`),
56+
Redact: "${1}=\"[REDACTED_DB_PASSWORD]\"",
57+
},
58+
59+
// OAuth and Social Media
60+
{
61+
Name: "GitHub Token",
62+
Pattern: regexp.MustCompile(`(?i)(github[_-]?token|gh[_-]?token|GITHUB_TOKEN)\s*[=:]\s*["\']?(gh[ps]_[a-zA-Z0-9_\-]{20,})["\']?`),
63+
Redact: "${1}=\"[REDACTED_GITHUB_TOKEN]\"",
64+
},
65+
{
66+
Name: "Google API Key",
67+
Pattern: regexp.MustCompile(`(?i)(google[_-]?api[_-]?key|GOOGLE_API_KEY|GEMINI_API_KEY)\s*[=:]\s*["\']?(AIza[a-zA-Z0-9_\-]{35})["\']?`),
68+
Redact: "${1}=\"[REDACTED_GOOGLE_API_KEY]\"",
69+
},
70+
{
71+
Name: "OpenAI API Key",
72+
Pattern: regexp.MustCompile(`(?i)(openai[_-]?api[_-]?key|OPENAI_API_KEY)\s*[=:]\s*["\']?(sk-[a-zA-Z0-9\-]{10,})["\']?`),
73+
Redact: "${1}=\"[REDACTED_OPENAI_KEY]\"",
74+
},
75+
{
76+
Name: "Anthropic/Claude API Key",
77+
Pattern: regexp.MustCompile(`(?i)(claude[_-]?api[_-]?key|anthropic[_-]?api[_-]?key|CLAUDE_API_KEY)\s*[=:]\s*["\']?(sk-ant-[a-zA-Z0-9\-_]{20,})["\']?`),
78+
Redact: "${1}=\"[REDACTED_CLAUDE_KEY]\"",
79+
},
80+
{
81+
Name: "Grok/X.AI API Key",
82+
Pattern: regexp.MustCompile(`(?i)(grok[_-]?api[_-]?key|xai[_-]?api[_-]?key|GROK_API_KEY)\s*[=:]\s*["\']?(xai-[a-zA-Z0-9\-_]{20,})["\']?`),
83+
Redact: "${1}=\"[REDACTED_GROK_KEY]\"",
84+
},
85+
{
86+
Name: "Slack Token",
87+
Pattern: regexp.MustCompile(`(?i)(slack[_-]?token|SLACK_TOKEN)\s*[=:]\s*["\']?(xox[baprs]-[a-zA-Z0-9\-]{10,})["\']?`),
88+
Redact: "${1}=\"[REDACTED_SLACK_TOKEN]\"",
89+
},
90+
91+
// Private Keys
92+
{
93+
Name: "Private Key",
94+
Pattern: regexp.MustCompile(`(?s)(-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----).*?(-----END (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----)`),
95+
Redact: "${1}\n[REDACTED_PRIVATE_KEY]\n${2}",
96+
},
97+
98+
// JWT Tokens
99+
{
100+
Name: "JWT Token",
101+
Pattern: regexp.MustCompile(`(?i)(jwt|token)\s*[=:]\s*["\']?(eyJ[a-zA-Z0-9_\-]*\.eyJ[a-zA-Z0-9_\-]*\.[a-zA-Z0-9_\-]+)["\']?`),
102+
Redact: "${1}=\"[REDACTED_JWT_TOKEN]\"",
103+
},
104+
105+
// Generic Passwords
106+
{
107+
Name: "Password",
108+
Pattern: regexp.MustCompile(`(?i)(password|passwd|pwd)\s*[=:]\s*["\']([^\s"']{8,})["\']`),
109+
Redact: "${1}=\"[REDACTED_PASSWORD]\"",
110+
},
111+
112+
// Generic Secrets
113+
{
114+
Name: "Secret",
115+
Pattern: regexp.MustCompile(`(?i)(secret|SECRET)\s*[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})["\']?`),
116+
Redact: "${1}=\"[REDACTED_SECRET]\"",
117+
},
118+
119+
// Environment Variable Assignments (catch-all for .env patterns)
120+
{
121+
Name: "Generic Token",
122+
Pattern: regexp.MustCompile(`(?i)(access[_-]?token|auth[_-]?token|client[_-]?secret|private[_-]?key)\s*[=:]\s*["\']?([a-zA-Z0-9_\-\.]{20,})["\']?`),
123+
Redact: "${1}=\"[REDACTED_TOKEN]\"",
124+
},
125+
126+
// Credit Card Numbers (basic pattern)
127+
{
128+
Name: "Credit Card",
129+
Pattern: regexp.MustCompile(`\b([0-9]{4}[\s\-]?){3}[0-9]{4}\b`),
130+
Redact: "[REDACTED_CREDIT_CARD]",
131+
},
132+
133+
// Email in credentials context
134+
{
135+
Name: "Email in Credentials",
136+
Pattern: regexp.MustCompile(`(?i)(email|user|username)\s*[=:]\s*["\']?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})["\']?`),
137+
Redact: "${1}=\"[REDACTED_EMAIL]\"",
138+
},
139+
}
140+
)
141+
142+
// ScrubDiff removes sensitive information from git diff output
143+
func ScrubDiff(diff string) string {
144+
scrubbed := diff
145+
146+
// Apply each pattern
147+
for _, pattern := range sensitivePatterns {
148+
scrubbed = pattern.Pattern.ReplaceAllString(scrubbed, pattern.Redact)
149+
}
150+
151+
return scrubbed
152+
}
153+
154+
// ScrubLines removes sensitive information line by line
155+
// This is useful for more granular control
156+
func ScrubLines(content string) string {
157+
lines := strings.Split(content, "\n")
158+
scrubbedLines := make([]string, len(lines))
159+
160+
for i, line := range lines {
161+
scrubbedLine := line
162+
for _, pattern := range sensitivePatterns {
163+
scrubbedLine = pattern.Pattern.ReplaceAllString(scrubbedLine, pattern.Redact)
164+
}
165+
scrubbedLines[i] = scrubbedLine
166+
}
167+
168+
return strings.Join(scrubbedLines, "\n")
169+
}
170+
171+
// HasSensitiveData checks if the content contains any sensitive patterns
172+
func HasSensitiveData(content string) bool {
173+
for _, pattern := range sensitivePatterns {
174+
if pattern.Pattern.MatchString(content) {
175+
return true
176+
}
177+
}
178+
return false
179+
}
180+
181+
// GetDetectedPatterns returns names of all detected sensitive patterns
182+
func GetDetectedPatterns(content string) []string {
183+
var detected []string
184+
for _, pattern := range sensitivePatterns {
185+
if pattern.Pattern.MatchString(content) {
186+
detected = append(detected, pattern.Name)
187+
}
188+
}
189+
return detected
190+
}
191+
192+
// ScrubEnvFile specifically handles .env file patterns
193+
func ScrubEnvFile(content string) string {
194+
lines := strings.Split(content, "\n")
195+
scrubbedLines := make([]string, len(lines))
196+
197+
for i, line := range lines {
198+
trimmed := strings.TrimSpace(line)
199+
200+
// Skip comments and empty lines
201+
if trimmed == "" || strings.HasPrefix(trimmed, "#") {
202+
scrubbedLines[i] = line
203+
continue
204+
}
205+
206+
// Check if line contains an assignment
207+
if strings.Contains(line, "=") {
208+
parts := strings.SplitN(line, "=", 2)
209+
if len(parts) == 2 {
210+
key := parts[0]
211+
// Redact the value if it looks like sensitive data
212+
upperKey := strings.ToUpper(strings.TrimSpace(key))
213+
if strings.Contains(upperKey, "KEY") ||
214+
strings.Contains(upperKey, "SECRET") ||
215+
strings.Contains(upperKey, "TOKEN") ||
216+
strings.Contains(upperKey, "PASSWORD") ||
217+
strings.Contains(upperKey, "PASS") ||
218+
strings.Contains(upperKey, "API") ||
219+
strings.Contains(upperKey, "AUTH") {
220+
scrubbedLines[i] = key + "=[REDACTED]"
221+
continue
222+
}
223+
}
224+
}
225+
226+
// Apply normal scrubbing
227+
scrubbedLines[i] = ScrubDiff(line)
228+
}
229+
230+
return strings.Join(scrubbedLines, "\n")
231+
}

0 commit comments

Comments
 (0)