|
| 1 | +package scrubber |
| 2 | + |
| 3 | +import ( |
| 4 | + "regexp" |
| 5 | + "strings" |
| 6 | +) |
| 7 | + |
| 8 | +// SensitivePattern represents a regex pattern to detect sensitive data |
| 9 | +type SensitivePattern struct { |
| 10 | + Name string |
| 11 | + Pattern *regexp.Regexp |
| 12 | + Redact string |
| 13 | +} |
| 14 | + |
| 15 | +var ( |
| 16 | + // Common patterns for sensitive data |
| 17 | + sensitivePatterns = []SensitivePattern{ |
| 18 | + // API Keys and Tokens |
| 19 | + { |
| 20 | + Name: "Generic API Key", |
| 21 | + Pattern: regexp.MustCompile(`(?i)(api[_-]?key|apikey|api[_-]?token)\s*[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})["\']?`), |
| 22 | + Redact: "${1}=\"[REDACTED_API_KEY]\"", |
| 23 | + }, |
| 24 | + { |
| 25 | + Name: "Bearer Token", |
| 26 | + Pattern: regexp.MustCompile(`(?i)(bearer\s+)([a-zA-Z0-9_\-\.]{20,})`), |
| 27 | + Redact: "${1}[REDACTED_BEARER_TOKEN]", |
| 28 | + }, |
| 29 | + { |
| 30 | + Name: "Authorization Header", |
| 31 | + Pattern: regexp.MustCompile(`(?i)(authorization\s*[=:]\s*["\']?)([a-zA-Z0-9_\-\.]{20,})["\']?`), |
| 32 | + Redact: "${1}[REDACTED_AUTH_TOKEN]\"", |
| 33 | + }, |
| 34 | + |
| 35 | + // AWS Credentials |
| 36 | + { |
| 37 | + Name: "AWS Access Key", |
| 38 | + Pattern: regexp.MustCompile(`(?i)(aws[_-]?access[_-]?key[_-]?id|AWS_ACCESS_KEY_ID)\s*[=:]\s*["\']?(AKIA[0-9A-Z]{16})["\']?`), |
| 39 | + Redact: "${1}=\"[REDACTED_AWS_KEY]\"", |
| 40 | + }, |
| 41 | + { |
| 42 | + Name: "AWS Secret Key", |
| 43 | + Pattern: regexp.MustCompile(`(?i)(aws[_-]?secret[_-]?access[_-]?key|AWS_SECRET_ACCESS_KEY)\s*[=:]\s*["\']?([a-zA-Z0-9/+=]{40})["\']?`), |
| 44 | + Redact: "${1}=\"[REDACTED_AWS_SECRET]\"", |
| 45 | + }, |
| 46 | + |
| 47 | + // Database Credentials |
| 48 | + { |
| 49 | + Name: "Database URL with Password", |
| 50 | + Pattern: regexp.MustCompile(`(?i)(postgres|mysql|mongodb|redis)://([^:]+):([^@]+)@`), |
| 51 | + Redact: "${1}://${2}:[REDACTED_DB_PASSWORD]@", |
| 52 | + }, |
| 53 | + { |
| 54 | + Name: "Database Password", |
| 55 | + Pattern: regexp.MustCompile(`(?i)(db[_-]?password|database[_-]?password|DB_PASSWORD)\s*[=:]\s*["\']?([^\s"']+)["\']?`), |
| 56 | + Redact: "${1}=\"[REDACTED_DB_PASSWORD]\"", |
| 57 | + }, |
| 58 | + |
| 59 | + // OAuth and Social Media |
| 60 | + { |
| 61 | + Name: "GitHub Token", |
| 62 | + Pattern: regexp.MustCompile(`(?i)(github[_-]?token|gh[_-]?token|GITHUB_TOKEN)\s*[=:]\s*["\']?(gh[ps]_[a-zA-Z0-9_\-]{20,})["\']?`), |
| 63 | + Redact: "${1}=\"[REDACTED_GITHUB_TOKEN]\"", |
| 64 | + }, |
| 65 | + { |
| 66 | + Name: "Google API Key", |
| 67 | + Pattern: regexp.MustCompile(`(?i)(google[_-]?api[_-]?key|GOOGLE_API_KEY|GEMINI_API_KEY)\s*[=:]\s*["\']?(AIza[a-zA-Z0-9_\-]{35})["\']?`), |
| 68 | + Redact: "${1}=\"[REDACTED_GOOGLE_API_KEY]\"", |
| 69 | + }, |
| 70 | + { |
| 71 | + Name: "OpenAI API Key", |
| 72 | + Pattern: regexp.MustCompile(`(?i)(openai[_-]?api[_-]?key|OPENAI_API_KEY)\s*[=:]\s*["\']?(sk-[a-zA-Z0-9\-]{10,})["\']?`), |
| 73 | + Redact: "${1}=\"[REDACTED_OPENAI_KEY]\"", |
| 74 | + }, |
| 75 | + { |
| 76 | + Name: "Anthropic/Claude API Key", |
| 77 | + Pattern: regexp.MustCompile(`(?i)(claude[_-]?api[_-]?key|anthropic[_-]?api[_-]?key|CLAUDE_API_KEY)\s*[=:]\s*["\']?(sk-ant-[a-zA-Z0-9\-_]{20,})["\']?`), |
| 78 | + Redact: "${1}=\"[REDACTED_CLAUDE_KEY]\"", |
| 79 | + }, |
| 80 | + { |
| 81 | + Name: "Grok/X.AI API Key", |
| 82 | + Pattern: regexp.MustCompile(`(?i)(grok[_-]?api[_-]?key|xai[_-]?api[_-]?key|GROK_API_KEY)\s*[=:]\s*["\']?(xai-[a-zA-Z0-9\-_]{20,})["\']?`), |
| 83 | + Redact: "${1}=\"[REDACTED_GROK_KEY]\"", |
| 84 | + }, |
| 85 | + { |
| 86 | + Name: "Slack Token", |
| 87 | + Pattern: regexp.MustCompile(`(?i)(slack[_-]?token|SLACK_TOKEN)\s*[=:]\s*["\']?(xox[baprs]-[a-zA-Z0-9\-]{10,})["\']?`), |
| 88 | + Redact: "${1}=\"[REDACTED_SLACK_TOKEN]\"", |
| 89 | + }, |
| 90 | + |
| 91 | + // Private Keys |
| 92 | + { |
| 93 | + Name: "Private Key", |
| 94 | + Pattern: regexp.MustCompile(`(?s)(-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----).*?(-----END (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----)`), |
| 95 | + Redact: "${1}\n[REDACTED_PRIVATE_KEY]\n${2}", |
| 96 | + }, |
| 97 | + |
| 98 | + // JWT Tokens |
| 99 | + { |
| 100 | + Name: "JWT Token", |
| 101 | + Pattern: regexp.MustCompile(`(?i)(jwt|token)\s*[=:]\s*["\']?(eyJ[a-zA-Z0-9_\-]*\.eyJ[a-zA-Z0-9_\-]*\.[a-zA-Z0-9_\-]+)["\']?`), |
| 102 | + Redact: "${1}=\"[REDACTED_JWT_TOKEN]\"", |
| 103 | + }, |
| 104 | + |
| 105 | + // Generic Passwords |
| 106 | + { |
| 107 | + Name: "Password", |
| 108 | + Pattern: regexp.MustCompile(`(?i)(password|passwd|pwd)\s*[=:]\s*["\']([^\s"']{8,})["\']`), |
| 109 | + Redact: "${1}=\"[REDACTED_PASSWORD]\"", |
| 110 | + }, |
| 111 | + |
| 112 | + // Generic Secrets |
| 113 | + { |
| 114 | + Name: "Secret", |
| 115 | + Pattern: regexp.MustCompile(`(?i)(secret|SECRET)\s*[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})["\']?`), |
| 116 | + Redact: "${1}=\"[REDACTED_SECRET]\"", |
| 117 | + }, |
| 118 | + |
| 119 | + // Environment Variable Assignments (catch-all for .env patterns) |
| 120 | + { |
| 121 | + Name: "Generic Token", |
| 122 | + Pattern: regexp.MustCompile(`(?i)(access[_-]?token|auth[_-]?token|client[_-]?secret|private[_-]?key)\s*[=:]\s*["\']?([a-zA-Z0-9_\-\.]{20,})["\']?`), |
| 123 | + Redact: "${1}=\"[REDACTED_TOKEN]\"", |
| 124 | + }, |
| 125 | + |
| 126 | + // Credit Card Numbers (basic pattern) |
| 127 | + { |
| 128 | + Name: "Credit Card", |
| 129 | + Pattern: regexp.MustCompile(`\b([0-9]{4}[\s\-]?){3}[0-9]{4}\b`), |
| 130 | + Redact: "[REDACTED_CREDIT_CARD]", |
| 131 | + }, |
| 132 | + |
| 133 | + // Email in credentials context |
| 134 | + { |
| 135 | + Name: "Email in Credentials", |
| 136 | + Pattern: regexp.MustCompile(`(?i)(email|user|username)\s*[=:]\s*["\']?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})["\']?`), |
| 137 | + Redact: "${1}=\"[REDACTED_EMAIL]\"", |
| 138 | + }, |
| 139 | + } |
| 140 | +) |
| 141 | + |
| 142 | +// ScrubDiff removes sensitive information from git diff output |
| 143 | +func ScrubDiff(diff string) string { |
| 144 | + scrubbed := diff |
| 145 | + |
| 146 | + // Apply each pattern |
| 147 | + for _, pattern := range sensitivePatterns { |
| 148 | + scrubbed = pattern.Pattern.ReplaceAllString(scrubbed, pattern.Redact) |
| 149 | + } |
| 150 | + |
| 151 | + return scrubbed |
| 152 | +} |
| 153 | + |
| 154 | +// ScrubLines removes sensitive information line by line |
| 155 | +// This is useful for more granular control |
| 156 | +func ScrubLines(content string) string { |
| 157 | + lines := strings.Split(content, "\n") |
| 158 | + scrubbedLines := make([]string, len(lines)) |
| 159 | + |
| 160 | + for i, line := range lines { |
| 161 | + scrubbedLine := line |
| 162 | + for _, pattern := range sensitivePatterns { |
| 163 | + scrubbedLine = pattern.Pattern.ReplaceAllString(scrubbedLine, pattern.Redact) |
| 164 | + } |
| 165 | + scrubbedLines[i] = scrubbedLine |
| 166 | + } |
| 167 | + |
| 168 | + return strings.Join(scrubbedLines, "\n") |
| 169 | +} |
| 170 | + |
| 171 | +// HasSensitiveData checks if the content contains any sensitive patterns |
| 172 | +func HasSensitiveData(content string) bool { |
| 173 | + for _, pattern := range sensitivePatterns { |
| 174 | + if pattern.Pattern.MatchString(content) { |
| 175 | + return true |
| 176 | + } |
| 177 | + } |
| 178 | + return false |
| 179 | +} |
| 180 | + |
| 181 | +// GetDetectedPatterns returns names of all detected sensitive patterns |
| 182 | +func GetDetectedPatterns(content string) []string { |
| 183 | + var detected []string |
| 184 | + for _, pattern := range sensitivePatterns { |
| 185 | + if pattern.Pattern.MatchString(content) { |
| 186 | + detected = append(detected, pattern.Name) |
| 187 | + } |
| 188 | + } |
| 189 | + return detected |
| 190 | +} |
| 191 | + |
| 192 | +// ScrubEnvFile specifically handles .env file patterns |
| 193 | +func ScrubEnvFile(content string) string { |
| 194 | + lines := strings.Split(content, "\n") |
| 195 | + scrubbedLines := make([]string, len(lines)) |
| 196 | + |
| 197 | + for i, line := range lines { |
| 198 | + trimmed := strings.TrimSpace(line) |
| 199 | + |
| 200 | + // Skip comments and empty lines |
| 201 | + if trimmed == "" || strings.HasPrefix(trimmed, "#") { |
| 202 | + scrubbedLines[i] = line |
| 203 | + continue |
| 204 | + } |
| 205 | + |
| 206 | + // Check if line contains an assignment |
| 207 | + if strings.Contains(line, "=") { |
| 208 | + parts := strings.SplitN(line, "=", 2) |
| 209 | + if len(parts) == 2 { |
| 210 | + key := parts[0] |
| 211 | + // Redact the value if it looks like sensitive data |
| 212 | + upperKey := strings.ToUpper(strings.TrimSpace(key)) |
| 213 | + if strings.Contains(upperKey, "KEY") || |
| 214 | + strings.Contains(upperKey, "SECRET") || |
| 215 | + strings.Contains(upperKey, "TOKEN") || |
| 216 | + strings.Contains(upperKey, "PASSWORD") || |
| 217 | + strings.Contains(upperKey, "PASS") || |
| 218 | + strings.Contains(upperKey, "API") || |
| 219 | + strings.Contains(upperKey, "AUTH") { |
| 220 | + scrubbedLines[i] = key + "=[REDACTED]" |
| 221 | + continue |
| 222 | + } |
| 223 | + } |
| 224 | + } |
| 225 | + |
| 226 | + // Apply normal scrubbing |
| 227 | + scrubbedLines[i] = ScrubDiff(line) |
| 228 | + } |
| 229 | + |
| 230 | + return strings.Join(scrubbedLines, "\n") |
| 231 | +} |
0 commit comments