Merge pull request #22 from arran4/fix-lint-types-sa9003-15754644472053227385

arran4 · web-flow · commit 47be303ce551 · 2026-02-07T22:16:38.000+11:00
Fix SA9003 empty branch in types.go
diff --git a/types.go b/types.go
@@ -36,7 +36,10 @@ type SeparatorWord string
 
 // String implementations
 func (w SingleCaseWord) String() string     { return strings.ToLower(string(w)) }
-func (w FirstUpperCaseWord) String() string { return upperCaseFirstLower(string(w)) }
+func (w FirstUpperCaseWord) String() string {
+	res, _ := upperCaseFirstLower(string(w), UTF8Replace)
+	return res
+}
 func (w AcronymWord) String() string        { return string(w) }
 func (w UpperCaseWord) String() string      { return strings.ToUpper(string(w)) }
 func (w SeparatorWord) String() string      { return string(w) }
@@ -119,25 +122,31 @@ func MustLowerCaseFirst(s string) string {
 }
 
 // upperCaseFirstLower capitalizes the first character and lowercases the rest.
-func upperCaseFirstLower(s string) string {
+func upperCaseFirstLower(s string, mode UTF8Mode) (string, error) {
 	if s == "" {
-		return ""
+		return "", nil
 	}
 	r, size := utf8.DecodeRuneInString(s)
 	if r == utf8.RuneError && size == 1 {
-		// Invalid UTF-8 start byte.
-		// We want to replace it with RuneError (like strings.ToLower/ToUpper do).
-		// So we force needChange.
-	} else if r == utf8.RuneError {
-		// Valid RuneError (U+FFFD)
+		if mode == UTF8Strict {
+			return "", fmt.Errorf("%w: invalid rune", ErrRune)
+		}
 	}
 
 	u := unicode.ToUpper(r)
 
-	// Check if changes are needed
-	needChange := (r != u) || (r == utf8.RuneError && size == 1)
+	// Check if changes are needed.
+	// If r == utf8.RuneError && size == 1, it is an invalid UTF-8 start byte.
+	// We want to replace it with RuneError (like strings.ToLower/ToUpper do).
+	// So we force needChange.
+	needChange := (r != u) || (r == utf8.RuneError && size == 1 && mode == UTF8Replace)
 	if !needChange {
 		for _, rc := range s[size:] {
+			if rc == utf8.RuneError {
+				if mode == UTF8Strict {
+					return "", fmt.Errorf("%w: invalid rune", ErrRune)
+				}
+			}
 			if unicode.ToLower(rc) != rc {
 				needChange = true
 				break
@@ -146,16 +155,34 @@ func upperCaseFirstLower(s string) string {
 	}
 
 	if !needChange {
-		return s
+		return s, nil
 	}
 
 	var b strings.Builder
 	b.Grow(len(s))
-	b.WriteRune(u)
-	for _, rc := range s[size:] {
+	if r == utf8.RuneError && size == 1 && mode == UTF8Ignore {
+		b.WriteByte(s[0])
+	} else {
+		b.WriteRune(u)
+	}
+
+	for i, rc := range s[size:] {
+		if rc == utf8.RuneError {
+			if mode == UTF8Strict {
+				return "", fmt.Errorf("%w: invalid rune", ErrRune)
+			}
+			if mode == UTF8Ignore {
+				// s[size:] is the substring starting after first rune.
+				// i is the index within that substring.
+				// We need to write the original byte.
+				// s[size+i] is the byte.
+				b.WriteByte(s[size+i])
+				continue
+			}
+		}
 		b.WriteRune(unicode.ToLower(rc))
 	}
-	return b.String()
+	return b.String(), nil
 }
 
 func (w ExactCaseWord) String() string { return string(w) }
@@ -181,6 +208,18 @@ const (
 	CMScreaming
 )
 
+// UTF8Mode defines how to handle invalid UTF-8 sequences.
+type UTF8Mode int
+
+const (
+	// UTF8Replace replaces invalid UTF-8 bytes with utf8.RuneError (U+FFFD).
+	UTF8Replace UTF8Mode = iota
+	// UTF8Strict returns an error on invalid UTF-8 sequences.
+	UTF8Strict
+	// UTF8Ignore ignores invalid UTF-8 sequences and preserves the original bytes (best effort).
+	UTF8Ignore
+)
+
 type caseConfig struct {
 	caseMode       CaseMode
 	delimiter      string
@@ -192,6 +231,7 @@ type caseConfig struct {
 	mixCaseSupport bool
 	firstUpper     bool
 	firstLower     bool
+	utf8Mode       UTF8Mode
 }
 
 // OptionDelimiter sets the delimiter between words.
@@ -224,6 +264,16 @@ func OptionUpperIndicator(d string) Option {
 	return func(cfg *caseConfig) { cfg.upperIndicator = d }
 }
 
+// OptionStrict sets strict mode, which returns an error if invalid UTF-8 sequences are encountered.
+func OptionStrict() Option {
+	return func(cfg *caseConfig) { cfg.utf8Mode = UTF8Strict }
+}
+
+// OptionLoose sets loose mode, which preserves invalid UTF-8 bytes as-is instead of replacing them.
+func OptionLoose() Option {
+	return func(cfg *caseConfig) { cfg.utf8Mode = UTF8Ignore }
+}
+
 // ToFormattedCase generates formatted case strings with the given options
 // Deprecated: Use WordsToFormattedCase. This function suppresses errors for backward compatibility.
 func ToFormattedCase(words []Word, opts ...Option) string {
@@ -279,7 +329,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
 			} else if cfg.allLower || cfg.whispering {
 				w = strings.ToLower(w)
 			} else if cfg.caseMode == CMAllTitle {
-				w = upperCaseFirstLower(w)
+				var err error
+				w, err = upperCaseFirstLower(w, cfg.utf8Mode)
+				if err != nil {
+					return "", err
+				}
 			} else {
 				w = strings.ToLower(w)
 			}
@@ -294,7 +348,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
 				w = strings.ToLower(w)
 			}
 		case FirstUpperCaseWord:
-			w = word.String()
+			var err error
+			w, err = upperCaseFirstLower(string(word), cfg.utf8Mode)
+			if err != nil {
+				return "", err
+			}
 			if cfg.mixCaseSupport {
 				w = splitMixCase(w, cfg.delimiter)
 			}
@@ -310,7 +368,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
 			} else if cfg.whispering {
 				w = strings.ToLower(w)
 			} else if cfg.caseMode == CMAllTitle {
-				w = upperCaseFirstLower(w)
+				var err error
+				w, err = upperCaseFirstLower(w, cfg.utf8Mode)
+				if err != nil {
+					return "", err
+				}
 			}
 		case UpperCaseWord:
 			w = word.String()
@@ -319,7 +381,11 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
 			} else if cfg.allLower || cfg.whispering {
 				w = strings.ToLower(w)
 			} else if cfg.caseMode == CMAllTitle {
-				w = upperCaseFirstLower(w)
+				var err error
+				w, err = upperCaseFirstLower(w, cfg.utf8Mode)
+				if err != nil {
+					return "", err
+				}
 			} else {
 				w = strings.ToLower(w)
 			}
@@ -389,8 +455,6 @@ func separateOptionsAny(opts []any) ([]any, []any) {
 		case ParserOption, Partitioner, PartitionerConfig:
 			parseOpts = append(parseOpts, v)
 		default:
-			// Assume unknown types might be relevant for formatter if it changes,
-			// or just ignore.
 		}
 	}
 	return parseOpts, fmtOpts
diff --git a/types_internal_test.go b/types_internal_test.go
@@ -1,6 +1,7 @@
 package strings2
 
 import (
+	"errors"
 	"testing"
 )
 
@@ -74,27 +75,122 @@ func TestUpperCaseFirstLower_Correctness(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			got := upperCaseFirstLower(tt.input)
+			got, err := upperCaseFirstLower(tt.input, UTF8Replace)
+			if err != nil {
+				t.Errorf("upperCaseFirstLower(%q, UTF8Replace) returned unexpected error: %v", tt.input, err)
+			}
 			if got != tt.expected {
 				t.Errorf("upperCaseFirstLower(%q) = %q, want %q", tt.input, got, tt.expected)
 			}
 		})
 	}
 }
 
+func TestUpperCaseFirstLower_Strict(t *testing.T) {
+	tests := []struct {
+		name      string
+		input     string
+		expectErr bool
+	}{
+		{
+			name:      "Valid ASCII",
+			input:     "test",
+			expectErr: false,
+		},
+		{
+			name:      "Valid Unicode",
+			input:     "äpfel",
+			expectErr: false,
+		},
+		{
+			name:      "Invalid UTF-8 Start",
+			input:     "\xfftest",
+			expectErr: true,
+		},
+		{
+			name:      "Invalid UTF-8 Middle",
+			input:     "te\xffst",
+			expectErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			_, err := upperCaseFirstLower(tt.input, UTF8Strict)
+			if tt.expectErr {
+				if err == nil {
+					t.Errorf("upperCaseFirstLower(%q, UTF8Strict) expected error, got nil", tt.input)
+				}
+				if !errors.Is(err, ErrRune) {
+					t.Errorf("upperCaseFirstLower(%q, UTF8Strict) expected ErrRune, got %v", tt.input, err)
+				}
+			} else {
+				if err != nil {
+					t.Errorf("upperCaseFirstLower(%q, UTF8Strict) unexpected error: %v", tt.input, err)
+				}
+			}
+		})
+	}
+}
+
+func TestUpperCaseFirstLower_Loose(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "Invalid UTF-8 Start",
+			input:    "\xfftest",
+			expected: "\xfftest", // Preserves invalid byte
+		},
+		{
+			name:     "Invalid UTF-8 Middle",
+			input:    "te\xffst",
+			expected: "Te\xffst", // Preserves invalid byte, title cases valid parts
+		},
+		{
+			name:     "Mixed Invalid",
+			input:    "\xffT\xff",
+			expected: "\xfft\xff", // Start invalid kept, 'T' -> 't', 't' lowercased? No wait.
+			// upperCaseFirstLower Logic:
+			// 1. Decode first rune. If invalid: write byte.
+			// 2. Loop rest. If invalid: write byte. Else toLower.
+			// Input: \xff T \xff
+			// 1. First: \xff. Invalid. Write \xff.
+			// 2. Rest: "T\xff".
+			//    - 'T': ToLower -> 't'.
+			//    - \xff: Invalid. Write \xff.
+			// Result: "\xfft\xff".
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := upperCaseFirstLower(tt.input, UTF8Ignore)
+			if err != nil {
+				t.Errorf("upperCaseFirstLower(%q, UTF8Ignore) returned unexpected error: %v", tt.input, err)
+			}
+			if got != tt.expected {
+				t.Errorf("upperCaseFirstLower(%q, UTF8Ignore) = %q (bytes: %x), want %q (bytes: %x)", tt.input, got, []byte(got), tt.expected, []byte(tt.expected))
+			}
+		})
+	}
+}
+
 func TestUpperCaseFirstLower_Allocations(t *testing.T) {
 	// Tests that no allocation occurs if the string is already correct
 	input := "Test"
 	if testing.AllocsPerRun(10, func() {
-		upperCaseFirstLower(input)
+		_, _ = upperCaseFirstLower(input, UTF8Replace)
 	}) > 0 {
 		t.Errorf("upperCaseFirstLower(%q) allocated memory when no change was needed", input)
 	}
 
 	// Test that allocation occurs when change IS needed
 	input2 := "test"
 	if testing.AllocsPerRun(10, func() {
-		upperCaseFirstLower(input2)
+		_, _ = upperCaseFirstLower(input2, UTF8Replace)
 	}) == 0 {
 		t.Errorf("upperCaseFirstLower(%q) did not allocate memory when change was needed", input2)
 	}
diff --git a/types_test.go b/types_test.go