Skip to content

Commit 47c0289

Browse files
Add OptionStrict for strict UTF-8 handling
Implemented `OptionStrict` which allows `WordsToFormattedCase` to return an error when invalid UTF-8 sequences are encountered. Updated `upperCaseFirstLower` to accept a `strict` boolean. Updated `WordsToFormattedCase` to use `upperCaseFirstLower` with the strict setting from `caseConfig`. Added tests for `OptionStrict`. Co-authored-by: arran4 <111667+arran4@users.noreply.github.com>
1 parent c6bf706 commit 47c0289

3 files changed

Lines changed: 157 additions & 12 deletions

File tree

types.go

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ type SeparatorWord string
3636

3737
// String implementations
3838
func (w SingleCaseWord) String() string { return strings.ToLower(string(w)) }
39-
func (w FirstUpperCaseWord) String() string { return upperCaseFirstLower(string(w)) }
39+
func (w FirstUpperCaseWord) String() string {
40+
res, _ := upperCaseFirstLower(string(w), false)
41+
return res
42+
}
4043
func (w AcronymWord) String() string { return string(w) }
4144
func (w UpperCaseWord) String() string { return strings.ToUpper(string(w)) }
4245
func (w SeparatorWord) String() string { return string(w) }
@@ -119,11 +122,14 @@ func MustLowerCaseFirst(s string) string {
119122
}
120123

121124
// upperCaseFirstLower capitalizes the first character and lowercases the rest.
122-
func upperCaseFirstLower(s string) string {
125+
func upperCaseFirstLower(s string, strict bool) (string, error) {
123126
if s == "" {
124-
return ""
127+
return "", nil
125128
}
126129
r, size := utf8.DecodeRuneInString(s)
130+
if strict && r == utf8.RuneError {
131+
return "", fmt.Errorf("%w: invalid rune", ErrRune)
132+
}
127133
u := unicode.ToUpper(r)
128134

129135
// Check if changes are needed.
@@ -133,6 +139,9 @@ func upperCaseFirstLower(s string) string {
133139
needChange := (r != u) || (r == utf8.RuneError && size == 1)
134140
if !needChange {
135141
for _, rc := range s[size:] {
142+
if strict && rc == utf8.RuneError {
143+
return "", fmt.Errorf("%w: invalid rune", ErrRune)
144+
}
136145
if unicode.ToLower(rc) != rc {
137146
needChange = true
138147
break
@@ -141,16 +150,19 @@ func upperCaseFirstLower(s string) string {
141150
}
142151

143152
if !needChange {
144-
return s
153+
return s, nil
145154
}
146155

147156
var b strings.Builder
148157
b.Grow(len(s))
149158
b.WriteRune(u)
150159
for _, rc := range s[size:] {
160+
if strict && rc == utf8.RuneError {
161+
return "", fmt.Errorf("%w: invalid rune", ErrRune)
162+
}
151163
b.WriteRune(unicode.ToLower(rc))
152164
}
153-
return b.String()
165+
return b.String(), nil
154166
}
155167

156168
func (w ExactCaseWord) String() string { return string(w) }
@@ -187,6 +199,7 @@ type caseConfig struct {
187199
mixCaseSupport bool
188200
firstUpper bool
189201
firstLower bool
202+
strict bool
190203
}
191204

192205
// OptionDelimiter sets the delimiter between words.
@@ -219,6 +232,11 @@ func OptionUpperIndicator(d string) Option {
219232
return func(cfg *caseConfig) { cfg.upperIndicator = d }
220233
}
221234

235+
// OptionStrict sets strict mode, which returns an error if invalid UTF-8 sequences are encountered.
236+
func OptionStrict() Option {
237+
return func(cfg *caseConfig) { cfg.strict = true }
238+
}
239+
222240
// ToFormattedCase generates formatted case strings with the given options
223241
// Deprecated: Use WordsToFormattedCase. This function suppresses errors for backward compatibility.
224242
func ToFormattedCase(words []Word, opts ...Option) string {
@@ -266,7 +284,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
266284
} else if cfg.allLower || cfg.whispering {
267285
w = strings.ToLower(w)
268286
} else if cfg.caseMode == CMAllTitle {
269-
w = upperCaseFirstLower(w)
287+
var err error
288+
w, err = upperCaseFirstLower(w, cfg.strict)
289+
if err != nil {
290+
return "", err
291+
}
270292
} else {
271293
w = strings.ToLower(w)
272294
}
@@ -281,7 +303,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
281303
w = strings.ToLower(w)
282304
}
283305
case FirstUpperCaseWord:
284-
w = word.String()
306+
var err error
307+
w, err = upperCaseFirstLower(string(word), cfg.strict)
308+
if err != nil {
309+
return "", err
310+
}
285311
if cfg.mixCaseSupport {
286312
w = splitMixCase(w, cfg.delimiter)
287313
}
@@ -297,7 +323,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
297323
} else if cfg.whispering {
298324
w = strings.ToLower(w)
299325
} else if cfg.caseMode == CMAllTitle {
300-
w = upperCaseFirstLower(w)
326+
var err error
327+
w, err = upperCaseFirstLower(w, cfg.strict)
328+
if err != nil {
329+
return "", err
330+
}
301331
}
302332
case UpperCaseWord:
303333
w = word.String()
@@ -306,7 +336,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
306336
} else if cfg.allLower || cfg.whispering {
307337
w = strings.ToLower(w)
308338
} else if cfg.caseMode == CMAllTitle {
309-
w = upperCaseFirstLower(w)
339+
var err error
340+
w, err = upperCaseFirstLower(w, cfg.strict)
341+
if err != nil {
342+
return "", err
343+
}
310344
} else {
311345
w = strings.ToLower(w)
312346
}

types_internal_test.go

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package strings2
22

33
import (
4+
"errors"
45
"testing"
56
)
67

@@ -74,27 +75,77 @@ func TestUpperCaseFirstLower_Correctness(t *testing.T) {
7475

7576
for _, tt := range tests {
7677
t.Run(tt.name, func(t *testing.T) {
77-
got := upperCaseFirstLower(tt.input)
78+
got, err := upperCaseFirstLower(tt.input, false)
79+
if err != nil {
80+
t.Errorf("upperCaseFirstLower(%q, false) returned unexpected error: %v", tt.input, err)
81+
}
7882
if got != tt.expected {
7983
t.Errorf("upperCaseFirstLower(%q) = %q, want %q", tt.input, got, tt.expected)
8084
}
8185
})
8286
}
8387
}
8488

89+
func TestUpperCaseFirstLower_Strict(t *testing.T) {
90+
tests := []struct {
91+
name string
92+
input string
93+
expectErr bool
94+
}{
95+
{
96+
name: "Valid ASCII",
97+
input: "test",
98+
expectErr: false,
99+
},
100+
{
101+
name: "Valid Unicode",
102+
input: "äpfel",
103+
expectErr: false,
104+
},
105+
{
106+
name: "Invalid UTF-8 Start",
107+
input: "\xfftest",
108+
expectErr: true,
109+
},
110+
{
111+
name: "Invalid UTF-8 Middle",
112+
input: "te\xffst",
113+
expectErr: true,
114+
},
115+
}
116+
117+
for _, tt := range tests {
118+
t.Run(tt.name, func(t *testing.T) {
119+
_, err := upperCaseFirstLower(tt.input, true)
120+
if tt.expectErr {
121+
if err == nil {
122+
t.Errorf("upperCaseFirstLower(%q, true) expected error, got nil", tt.input)
123+
}
124+
if !errors.Is(err, ErrRune) {
125+
t.Errorf("upperCaseFirstLower(%q, true) expected ErrRune, got %v", tt.input, err)
126+
}
127+
} else {
128+
if err != nil {
129+
t.Errorf("upperCaseFirstLower(%q, true) unexpected error: %v", tt.input, err)
130+
}
131+
}
132+
})
133+
}
134+
}
135+
85136
func TestUpperCaseFirstLower_Allocations(t *testing.T) {
86137
// Tests that no allocation occurs if the string is already correct
87138
input := "Test"
88139
if testing.AllocsPerRun(10, func() {
89-
upperCaseFirstLower(input)
140+
_, _ = upperCaseFirstLower(input, false)
90141
}) > 0 {
91142
t.Errorf("upperCaseFirstLower(%q) allocated memory when no change was needed", input)
92143
}
93144

94145
// Test that allocation occurs when change IS needed
95146
input2 := "test"
96147
if testing.AllocsPerRun(10, func() {
97-
upperCaseFirstLower(input2)
148+
_, _ = upperCaseFirstLower(input2, false)
98149
}) == 0 {
99150
t.Errorf("upperCaseFirstLower(%q) did not allocate memory when change was needed", input2)
100151
}

types_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,3 +454,63 @@ func TestToFormattedCase_MultibyteFirstLower(t *testing.T) {
454454
t.Errorf("ToFormattedCase with OptionFirstLower for %q = %q, want %q", "Äpfel", got, want)
455455
}
456456
}
457+
458+
func TestOptionStrict(t *testing.T) {
459+
tests := []struct {
460+
name string
461+
words []Word
462+
options []Option
463+
expectErr bool
464+
}{
465+
{
466+
name: "FirstUpperCaseWord Invalid UTF-8 Strict",
467+
words: []Word{
468+
FirstUpperCaseWord("\xfftest"),
469+
},
470+
options: []Option{OptionStrict()},
471+
expectErr: true,
472+
},
473+
{
474+
name: "FirstUpperCaseWord Invalid UTF-8 Non-Strict",
475+
words: []Word{
476+
FirstUpperCaseWord("\xfftest"),
477+
},
478+
options: []Option{},
479+
expectErr: false,
480+
},
481+
{
482+
name: "SingleCaseWord CMAllTitle Invalid UTF-8 Strict",
483+
words: []Word{
484+
SingleCaseWord("\xfftest"),
485+
},
486+
options: []Option{OptionCaseMode(CMAllTitle), OptionStrict()},
487+
expectErr: true,
488+
},
489+
{
490+
name: "SingleCaseWord CMAllTitle Invalid UTF-8 Non-Strict",
491+
words: []Word{
492+
SingleCaseWord("\xfftest"),
493+
},
494+
options: []Option{OptionCaseMode(CMAllTitle)},
495+
expectErr: false,
496+
},
497+
}
498+
499+
for _, tt := range tests {
500+
t.Run(tt.name, func(t *testing.T) {
501+
_, err := WordsToFormattedCase(tt.words, convertOptions(tt.options)...)
502+
if tt.expectErr {
503+
if err == nil {
504+
t.Error("expected error, got nil")
505+
}
506+
if !errors.Is(err, ErrRune) {
507+
t.Errorf("expected ErrRune, got %v", err)
508+
}
509+
} else {
510+
if err != nil {
511+
t.Errorf("unexpected error: %v", err)
512+
}
513+
}
514+
})
515+
}
516+
}

0 commit comments

Comments
 (0)