Skip to content

Commit 47be303

Browse files
authored
Merge pull request #22 from arran4/fix-lint-types-sa9003-15754644472053227385
Fix SA9003 empty branch in types.go
2 parents e5269b0 + 94de146 commit 47be303

3 files changed

Lines changed: 258 additions & 23 deletions

File tree

types.go

Lines changed: 84 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ type SeparatorWord string
3636

3737
// String implementations
3838
func (w SingleCaseWord) String() string { return strings.ToLower(string(w)) }
39-
func (w FirstUpperCaseWord) String() string { return upperCaseFirstLower(string(w)) }
39+
func (w FirstUpperCaseWord) String() string {
40+
res, _ := upperCaseFirstLower(string(w), UTF8Replace)
41+
return res
42+
}
4043
func (w AcronymWord) String() string { return string(w) }
4144
func (w UpperCaseWord) String() string { return strings.ToUpper(string(w)) }
4245
func (w SeparatorWord) String() string { return string(w) }
@@ -119,25 +122,31 @@ func MustLowerCaseFirst(s string) string {
119122
}
120123

121124
// upperCaseFirstLower capitalizes the first character and lowercases the rest.
122-
func upperCaseFirstLower(s string) string {
125+
func upperCaseFirstLower(s string, mode UTF8Mode) (string, error) {
123126
if s == "" {
124-
return ""
127+
return "", nil
125128
}
126129
r, size := utf8.DecodeRuneInString(s)
127130
if r == utf8.RuneError && size == 1 {
128-
// Invalid UTF-8 start byte.
129-
// We want to replace it with RuneError (like strings.ToLower/ToUpper do).
130-
// So we force needChange.
131-
} else if r == utf8.RuneError {
132-
// Valid RuneError (U+FFFD)
131+
if mode == UTF8Strict {
132+
return "", fmt.Errorf("%w: invalid rune", ErrRune)
133+
}
133134
}
134135

135136
u := unicode.ToUpper(r)
136137

137-
// Check if changes are needed
138-
needChange := (r != u) || (r == utf8.RuneError && size == 1)
138+
// Check if changes are needed.
139+
// If r == utf8.RuneError && size == 1, it is an invalid UTF-8 start byte.
140+
// We want to replace it with RuneError (like strings.ToLower/ToUpper do).
141+
// So we force needChange.
142+
needChange := (r != u) || (r == utf8.RuneError && size == 1 && mode == UTF8Replace)
139143
if !needChange {
140144
for _, rc := range s[size:] {
145+
if rc == utf8.RuneError {
146+
if mode == UTF8Strict {
147+
return "", fmt.Errorf("%w: invalid rune", ErrRune)
148+
}
149+
}
141150
if unicode.ToLower(rc) != rc {
142151
needChange = true
143152
break
@@ -146,16 +155,34 @@ func upperCaseFirstLower(s string) string {
146155
}
147156

148157
if !needChange {
149-
return s
158+
return s, nil
150159
}
151160

152161
var b strings.Builder
153162
b.Grow(len(s))
154-
b.WriteRune(u)
155-
for _, rc := range s[size:] {
163+
if r == utf8.RuneError && size == 1 && mode == UTF8Ignore {
164+
b.WriteByte(s[0])
165+
} else {
166+
b.WriteRune(u)
167+
}
168+
169+
for i, rc := range s[size:] {
170+
if rc == utf8.RuneError {
171+
if mode == UTF8Strict {
172+
return "", fmt.Errorf("%w: invalid rune", ErrRune)
173+
}
174+
if mode == UTF8Ignore {
175+
// s[size:] is the substring starting after first rune.
176+
// i is the index within that substring.
177+
// We need to write the original byte.
178+
// s[size+i] is the byte.
179+
b.WriteByte(s[size+i])
180+
continue
181+
}
182+
}
156183
b.WriteRune(unicode.ToLower(rc))
157184
}
158-
return b.String()
185+
return b.String(), nil
159186
}
160187

161188
func (w ExactCaseWord) String() string { return string(w) }
@@ -181,6 +208,18 @@ const (
181208
CMScreaming
182209
)
183210

211+
// UTF8Mode defines how to handle invalid UTF-8 sequences.
212+
type UTF8Mode int
213+
214+
const (
215+
// UTF8Replace replaces invalid UTF-8 bytes with utf8.RuneError (U+FFFD).
216+
UTF8Replace UTF8Mode = iota
217+
// UTF8Strict returns an error on invalid UTF-8 sequences.
218+
UTF8Strict
219+
// UTF8Ignore ignores invalid UTF-8 sequences and preserves the original bytes (best effort).
220+
UTF8Ignore
221+
)
222+
184223
type caseConfig struct {
185224
caseMode CaseMode
186225
delimiter string
@@ -192,6 +231,7 @@ type caseConfig struct {
192231
mixCaseSupport bool
193232
firstUpper bool
194233
firstLower bool
234+
utf8Mode UTF8Mode
195235
}
196236

197237
// OptionDelimiter sets the delimiter between words.
@@ -224,6 +264,16 @@ func OptionUpperIndicator(d string) Option {
224264
return func(cfg *caseConfig) { cfg.upperIndicator = d }
225265
}
226266

267+
// OptionStrict sets strict mode, which returns an error if invalid UTF-8 sequences are encountered.
268+
func OptionStrict() Option {
269+
return func(cfg *caseConfig) { cfg.utf8Mode = UTF8Strict }
270+
}
271+
272+
// OptionLoose sets loose mode, which preserves invalid UTF-8 bytes as-is instead of replacing them.
273+
func OptionLoose() Option {
274+
return func(cfg *caseConfig) { cfg.utf8Mode = UTF8Ignore }
275+
}
276+
227277
// ToFormattedCase generates formatted case strings with the given options
228278
// Deprecated: Use WordsToFormattedCase. This function suppresses errors for backward compatibility.
229279
func ToFormattedCase(words []Word, opts ...Option) string {
@@ -279,7 +329,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
279329
} else if cfg.allLower || cfg.whispering {
280330
w = strings.ToLower(w)
281331
} else if cfg.caseMode == CMAllTitle {
282-
w = upperCaseFirstLower(w)
332+
var err error
333+
w, err = upperCaseFirstLower(w, cfg.utf8Mode)
334+
if err != nil {
335+
return "", err
336+
}
283337
} else {
284338
w = strings.ToLower(w)
285339
}
@@ -294,7 +348,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
294348
w = strings.ToLower(w)
295349
}
296350
case FirstUpperCaseWord:
297-
w = word.String()
351+
var err error
352+
w, err = upperCaseFirstLower(string(word), cfg.utf8Mode)
353+
if err != nil {
354+
return "", err
355+
}
298356
if cfg.mixCaseSupport {
299357
w = splitMixCase(w, cfg.delimiter)
300358
}
@@ -310,7 +368,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
310368
} else if cfg.whispering {
311369
w = strings.ToLower(w)
312370
} else if cfg.caseMode == CMAllTitle {
313-
w = upperCaseFirstLower(w)
371+
var err error
372+
w, err = upperCaseFirstLower(w, cfg.utf8Mode)
373+
if err != nil {
374+
return "", err
375+
}
314376
}
315377
case UpperCaseWord:
316378
w = word.String()
@@ -319,7 +381,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
319381
} else if cfg.allLower || cfg.whispering {
320382
w = strings.ToLower(w)
321383
} else if cfg.caseMode == CMAllTitle {
322-
w = upperCaseFirstLower(w)
384+
var err error
385+
w, err = upperCaseFirstLower(w, cfg.utf8Mode)
386+
if err != nil {
387+
return "", err
388+
}
323389
} else {
324390
w = strings.ToLower(w)
325391
}
@@ -389,8 +455,6 @@ func separateOptionsAny(opts []any) ([]any, []any) {
389455
case ParserOption, Partitioner, PartitionerConfig:
390456
parseOpts = append(parseOpts, v)
391457
default:
392-
// Assume unknown types might be relevant for formatter if it changes,
393-
// or just ignore.
394458
}
395459
}
396460
return parseOpts, fmtOpts

types_internal_test.go

Lines changed: 99 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package strings2
22

33
import (
4+
"errors"
45
"testing"
56
)
67

@@ -74,27 +75,122 @@ func TestUpperCaseFirstLower_Correctness(t *testing.T) {
7475

7576
for _, tt := range tests {
7677
t.Run(tt.name, func(t *testing.T) {
77-
got := upperCaseFirstLower(tt.input)
78+
got, err := upperCaseFirstLower(tt.input, UTF8Replace)
79+
if err != nil {
80+
t.Errorf("upperCaseFirstLower(%q, UTF8Replace) returned unexpected error: %v", tt.input, err)
81+
}
7882
if got != tt.expected {
7983
t.Errorf("upperCaseFirstLower(%q) = %q, want %q", tt.input, got, tt.expected)
8084
}
8185
})
8286
}
8387
}
8488

89+
func TestUpperCaseFirstLower_Strict(t *testing.T) {
90+
tests := []struct {
91+
name string
92+
input string
93+
expectErr bool
94+
}{
95+
{
96+
name: "Valid ASCII",
97+
input: "test",
98+
expectErr: false,
99+
},
100+
{
101+
name: "Valid Unicode",
102+
input: "äpfel",
103+
expectErr: false,
104+
},
105+
{
106+
name: "Invalid UTF-8 Start",
107+
input: "\xfftest",
108+
expectErr: true,
109+
},
110+
{
111+
name: "Invalid UTF-8 Middle",
112+
input: "te\xffst",
113+
expectErr: true,
114+
},
115+
}
116+
117+
for _, tt := range tests {
118+
t.Run(tt.name, func(t *testing.T) {
119+
_, err := upperCaseFirstLower(tt.input, UTF8Strict)
120+
if tt.expectErr {
121+
if err == nil {
122+
t.Errorf("upperCaseFirstLower(%q, UTF8Strict) expected error, got nil", tt.input)
123+
}
124+
if !errors.Is(err, ErrRune) {
125+
t.Errorf("upperCaseFirstLower(%q, UTF8Strict) expected ErrRune, got %v", tt.input, err)
126+
}
127+
} else {
128+
if err != nil {
129+
t.Errorf("upperCaseFirstLower(%q, UTF8Strict) unexpected error: %v", tt.input, err)
130+
}
131+
}
132+
})
133+
}
134+
}
135+
136+
func TestUpperCaseFirstLower_Loose(t *testing.T) {
137+
tests := []struct {
138+
name string
139+
input string
140+
expected string
141+
}{
142+
{
143+
name: "Invalid UTF-8 Start",
144+
input: "\xfftest",
145+
expected: "\xfftest", // Preserves invalid byte
146+
},
147+
{
148+
name: "Invalid UTF-8 Middle",
149+
input: "te\xffst",
150+
expected: "Te\xffst", // Preserves invalid byte, title cases valid parts
151+
},
152+
{
153+
name: "Mixed Invalid",
154+
input: "\xffT\xff",
155+
expected: "\xfft\xff", // Start invalid kept, 'T' -> 't', 't' lowercased? No wait.
156+
// upperCaseFirstLower Logic:
157+
// 1. Decode first rune. If invalid: write byte.
158+
// 2. Loop rest. If invalid: write byte. Else toLower.
159+
// Input: \xff T \xff
160+
// 1. First: \xff. Invalid. Write \xff.
161+
// 2. Rest: "T\xff".
162+
// - 'T': ToLower -> 't'.
163+
// - \xff: Invalid. Write \xff.
164+
// Result: "\xfft\xff".
165+
},
166+
}
167+
168+
for _, tt := range tests {
169+
t.Run(tt.name, func(t *testing.T) {
170+
got, err := upperCaseFirstLower(tt.input, UTF8Ignore)
171+
if err != nil {
172+
t.Errorf("upperCaseFirstLower(%q, UTF8Ignore) returned unexpected error: %v", tt.input, err)
173+
}
174+
if got != tt.expected {
175+
t.Errorf("upperCaseFirstLower(%q, UTF8Ignore) = %q (bytes: %x), want %q (bytes: %x)", tt.input, got, []byte(got), tt.expected, []byte(tt.expected))
176+
}
177+
})
178+
}
179+
}
180+
85181
func TestUpperCaseFirstLower_Allocations(t *testing.T) {
86182
// Tests that no allocation occurs if the string is already correct
87183
input := "Test"
88184
if testing.AllocsPerRun(10, func() {
89-
upperCaseFirstLower(input)
185+
_, _ = upperCaseFirstLower(input, UTF8Replace)
90186
}) > 0 {
91187
t.Errorf("upperCaseFirstLower(%q) allocated memory when no change was needed", input)
92188
}
93189

94190
// Test that allocation occurs when change IS needed
95191
input2 := "test"
96192
if testing.AllocsPerRun(10, func() {
97-
upperCaseFirstLower(input2)
193+
_, _ = upperCaseFirstLower(input2, UTF8Replace)
98194
}) == 0 {
99195
t.Errorf("upperCaseFirstLower(%q) did not allocate memory when change was needed", input2)
100196
}

0 commit comments

Comments
 (0)