@@ -36,7 +36,10 @@ type SeparatorWord string
3636
3737// String implementations
3838func (w SingleCaseWord ) String () string { return strings .ToLower (string (w )) }
39- func (w FirstUpperCaseWord ) String () string { return upperCaseFirstLower (string (w )) }
39+ func (w FirstUpperCaseWord ) String () string {
40+ res , _ := upperCaseFirstLower (string (w ), UTF8Replace )
41+ return res
42+ }
4043func (w AcronymWord ) String () string { return string (w ) }
4144func (w UpperCaseWord ) String () string { return strings .ToUpper (string (w )) }
4245func (w SeparatorWord ) String () string { return string (w ) }
@@ -119,25 +122,31 @@ func MustLowerCaseFirst(s string) string {
119122}
120123
121124// upperCaseFirstLower capitalizes the first character and lowercases the rest.
122- func upperCaseFirstLower (s string ) string {
125+ func upperCaseFirstLower (s string , mode UTF8Mode ) ( string , error ) {
123126 if s == "" {
124- return ""
127+ return "" , nil
125128 }
126129 r , size := utf8 .DecodeRuneInString (s )
127130 if r == utf8 .RuneError && size == 1 {
128- // Invalid UTF-8 start byte.
129- // We want to replace it with RuneError (like strings.ToLower/ToUpper do).
130- // So we force needChange.
131- } else if r == utf8 .RuneError {
132- // Valid RuneError (U+FFFD)
131+ if mode == UTF8Strict {
132+ return "" , fmt .Errorf ("%w: invalid rune" , ErrRune )
133+ }
133134 }
134135
135136 u := unicode .ToUpper (r )
136137
137- // Check if changes are needed
138- needChange := (r != u ) || (r == utf8 .RuneError && size == 1 )
138+ // Check if changes are needed.
139+ // If r == utf8.RuneError && size == 1, it is an invalid UTF-8 start byte.
140+ // We want to replace it with RuneError (like strings.ToLower/ToUpper do).
141+ // So we force needChange.
142+ needChange := (r != u ) || (r == utf8 .RuneError && size == 1 && mode == UTF8Replace )
139143 if ! needChange {
140144 for _ , rc := range s [size :] {
145+ if rc == utf8 .RuneError {
146+ if mode == UTF8Strict {
147+ return "" , fmt .Errorf ("%w: invalid rune" , ErrRune )
148+ }
149+ }
141150 if unicode .ToLower (rc ) != rc {
142151 needChange = true
143152 break
@@ -146,16 +155,34 @@ func upperCaseFirstLower(s string) string {
146155 }
147156
148157 if ! needChange {
149- return s
158+ return s , nil
150159 }
151160
152161 var b strings.Builder
153162 b .Grow (len (s ))
154- b .WriteRune (u )
155- for _ , rc := range s [size :] {
163+ if r == utf8 .RuneError && size == 1 && mode == UTF8Ignore {
164+ b .WriteByte (s [0 ])
165+ } else {
166+ b .WriteRune (u )
167+ }
168+
169+ for i , rc := range s [size :] {
170+ if rc == utf8 .RuneError {
171+ if mode == UTF8Strict {
172+ return "" , fmt .Errorf ("%w: invalid rune" , ErrRune )
173+ }
174+ if mode == UTF8Ignore {
175+ // s[size:] is the substring starting after first rune.
176+ // i is the index within that substring.
177+ // We need to write the original byte.
178+ // s[size+i] is the byte.
179+ b .WriteByte (s [size + i ])
180+ continue
181+ }
182+ }
156183 b .WriteRune (unicode .ToLower (rc ))
157184 }
158- return b .String ()
185+ return b .String (), nil
159186}
160187
161188func (w ExactCaseWord ) String () string { return string (w ) }
@@ -181,6 +208,18 @@ const (
181208 CMScreaming
182209)
183210
211+ // UTF8Mode defines how to handle invalid UTF-8 sequences.
212+ type UTF8Mode int
213+
214+ const (
215+ // UTF8Replace replaces invalid UTF-8 bytes with utf8.RuneError (U+FFFD).
216+ UTF8Replace UTF8Mode = iota
217+ // UTF8Strict returns an error on invalid UTF-8 sequences.
218+ UTF8Strict
219+ // UTF8Ignore ignores invalid UTF-8 sequences and preserves the original bytes (best effort).
220+ UTF8Ignore
221+ )
222+
184223type caseConfig struct {
185224 caseMode CaseMode
186225 delimiter string
@@ -192,6 +231,7 @@ type caseConfig struct {
192231 mixCaseSupport bool
193232 firstUpper bool
194233 firstLower bool
234+ utf8Mode UTF8Mode
195235}
196236
197237// OptionDelimiter sets the delimiter between words.
@@ -224,6 +264,16 @@ func OptionUpperIndicator(d string) Option {
224264 return func (cfg * caseConfig ) { cfg .upperIndicator = d }
225265}
226266
267+ // OptionStrict sets strict mode, which returns an error if invalid UTF-8 sequences are encountered.
268+ func OptionStrict () Option {
269+ return func (cfg * caseConfig ) { cfg .utf8Mode = UTF8Strict }
270+ }
271+
272+ // OptionLoose sets loose mode, which preserves invalid UTF-8 bytes as-is instead of replacing them.
273+ func OptionLoose () Option {
274+ return func (cfg * caseConfig ) { cfg .utf8Mode = UTF8Ignore }
275+ }
276+
227277// ToFormattedCase generates formatted case strings with the given options
228278// Deprecated: Use WordsToFormattedCase. This function suppresses errors for backward compatibility.
229279func ToFormattedCase (words []Word , opts ... Option ) string {
@@ -279,7 +329,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
279329 } else if cfg .allLower || cfg .whispering {
280330 w = strings .ToLower (w )
281331 } else if cfg .caseMode == CMAllTitle {
282- w = upperCaseFirstLower (w )
332+ var err error
333+ w , err = upperCaseFirstLower (w , cfg .utf8Mode )
334+ if err != nil {
335+ return "" , err
336+ }
283337 } else {
284338 w = strings .ToLower (w )
285339 }
@@ -294,7 +348,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
294348 w = strings .ToLower (w )
295349 }
296350 case FirstUpperCaseWord :
297- w = word .String ()
351+ var err error
352+ w , err = upperCaseFirstLower (string (word ), cfg .utf8Mode )
353+ if err != nil {
354+ return "" , err
355+ }
298356 if cfg .mixCaseSupport {
299357 w = splitMixCase (w , cfg .delimiter )
300358 }
@@ -310,7 +368,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
310368 } else if cfg .whispering {
311369 w = strings .ToLower (w )
312370 } else if cfg .caseMode == CMAllTitle {
313- w = upperCaseFirstLower (w )
371+ var err error
372+ w , err = upperCaseFirstLower (w , cfg .utf8Mode )
373+ if err != nil {
374+ return "" , err
375+ }
314376 }
315377 case UpperCaseWord :
316378 w = word .String ()
@@ -319,7 +381,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
319381 } else if cfg .allLower || cfg .whispering {
320382 w = strings .ToLower (w )
321383 } else if cfg .caseMode == CMAllTitle {
322- w = upperCaseFirstLower (w )
384+ var err error
385+ w , err = upperCaseFirstLower (w , cfg .utf8Mode )
386+ if err != nil {
387+ return "" , err
388+ }
323389 } else {
324390 w = strings .ToLower (w )
325391 }
@@ -389,8 +455,6 @@ func separateOptionsAny(opts []any) ([]any, []any) {
389455 case ParserOption , Partitioner , PartitionerConfig :
390456 parseOpts = append (parseOpts , v )
391457 default :
392- // Assume unknown types might be relevant for formatter if it changes,
393- // or just ignore.
394458 }
395459 }
396460 return parseOpts , fmtOpts
0 commit comments