@@ -75,8 +75,8 @@ public static Encoding GetEncodingForReading(string filePath, string? encodingNa
7575 }
7676
7777 /// <summary>
78- /// Get encoding by name only (no file detection)
79- /// Returns null if encoding name is invalid
78+ /// Canonical encoding alias resolver. Returns null if the name is unrecognized.
79+ /// All encoding name → Encoding mappings live here (single source of truth).
8080 /// </summary>
8181 private static Encoding ? GetEncodingByName ( string encodingName )
8282 {
@@ -87,14 +87,54 @@ public static Encoding GetEncodingForReading(string filePath, string? encodingNa
8787 // UTF encodings
8888 "utf-8" or "utf8" or "utf8nobom" or "utf-8nobom" or "utf-8-nobom" or "utf8-nobom" => new UTF8Encoding ( false ) ,
8989 "utf-8-bom" or "utf8-bom" or "utf8bom" or "utf8-sig" or "utf-8-sig" => new UTF8Encoding ( true ) ,
90- "utf-16" or "utf16" or "utf-16le" or "utf16le" or "unicode" => Encoding . Unicode ,
91- "utf-16be" or "utf16be" => Encoding . BigEndianUnicode ,
92- "utf-32" or "utf32" or "utf-32le" or "utf32le" => Encoding . UTF32 ,
90+ "utf-16" or "utf16" or "utf-16le" or "utf16le" or "unicode" or "utf16bom" or "utf-16bom" or "utf16lebom" or "utf-16lebom" => Encoding . Unicode ,
91+ "utf-16be" or "utf16be" or "utf16bebom" or "utf-16bebom" => Encoding . BigEndianUnicode ,
92+ "utf-32" or "utf32" or "utf-32le" or "utf32le" or "utf32bom" or "utf-32bom" or "utf32lebom" or "utf-32lebom" => Encoding . UTF32 ,
93+ "utf-32be" or "utf32be" or "utf32bebom" or "utf-32bebom" => new UTF32Encoding ( true , true ) ,
9394
9495 // Japanese encodings
9596 "shift_jis" or "shift-jis" or "shiftjis" or "sjis" or "cp932" => Encoding . GetEncoding ( "shift_jis" ) ,
9697 "euc-jp" or "euc_jp" or "eucjp" => Encoding . GetEncoding ( "euc-jp" ) ,
97- "iso-2022-jp" or "iso2022jp" or "jis" => Encoding . GetEncoding ( "iso-2022-jp" ) ,
98+ "iso-2022-jp" or "iso2022jp" or "iso2022-jp" or "jis" => Encoding . GetEncoding ( "iso-2022-jp" ) ,
99+
100+ // Chinese encodings
101+ "big-5" or "big5hkscs" or "cp950" => Encoding . GetEncoding ( "big5" ) ,
102+ "gb2312" or "gbk" or "gb18030" or "cp936" => Encoding . GetEncoding ( "gb2312" ) ,
103+
104+ // Korean encodings
105+ "euckr" or "cp949" => Encoding . GetEncoding ( "euc-kr" ) ,
106+
107+ // Windows codepages (numeric)
108+ "874" => Encoding . GetEncoding ( "windows-874" ) , // Thai
109+ "1250" => Encoding . GetEncoding ( "windows-1250" ) , // Central European
110+ "1251" => Encoding . GetEncoding ( "windows-1251" ) , // Cyrillic
111+ "1252" => Encoding . GetEncoding ( "windows-1252" ) , // Western European
112+ "1253" => Encoding . GetEncoding ( "windows-1253" ) , // Greek
113+ "1254" => Encoding . GetEncoding ( "windows-1254" ) , // Turkish
114+ "1255" => Encoding . GetEncoding ( "windows-1255" ) , // Hebrew
115+ "1256" => Encoding . GetEncoding ( "windows-1256" ) , // Arabic
116+ "1257" => Encoding . GetEncoding ( "windows-1257" ) , // Baltic
117+ "1258" => Encoding . GetEncoding ( "windows-1258" ) , // Vietnamese
118+
119+ // Windows codepages (cp prefix)
120+ "cp874" => Encoding . GetEncoding ( "windows-874" ) ,
121+ "cp1250" => Encoding . GetEncoding ( "windows-1250" ) ,
122+ "cp1251" => Encoding . GetEncoding ( "windows-1251" ) ,
123+ "cp1254" => Encoding . GetEncoding ( "windows-1254" ) ,
124+
125+ // ISO-8859 variants
126+ "latin-1" or "iso88591" or "iso_8859_1" => Encoding . GetEncoding ( "iso-8859-1" ) , // Latin-1 (Western)
127+ "latin-2" or "iso88592" or "iso_8859_2" => Encoding . GetEncoding ( "iso-8859-2" ) , // Latin-2 (Central European)
128+ "iso88595" or "iso_8859_5" => Encoding . GetEncoding ( "iso-8859-5" ) , // Cyrillic
129+ "iso88596" => Encoding . GetEncoding ( "iso-8859-6" ) , // Arabic
130+ "iso88599" => Encoding . GetEncoding ( "iso-8859-9" ) , // Turkish
131+ "latin-9" or "iso885915" => Encoding . GetEncoding ( "iso-8859-15" ) , // Latin-9
132+
133+ // Cyrillic encodings
134+ "koi8u" => Encoding . GetEncoding ( "koi8-u" ) , // Ukrainian
135+
136+ // Thai encodings
137+ "tis620" => Encoding . GetEncoding ( "tis-620" ) ,
98138
99139 // ASCII
100140 "ascii" => Encoding . ASCII ,
@@ -109,79 +149,17 @@ public static Encoding GetEncodingForReading(string filePath, string? encodingNa
109149 }
110150
111151 /// <summary>
112- /// Get encoding from name or detect from file
152+ /// Get encoding from name or detect from file.
153+ /// Delegates to GetEncodingByName for alias resolution.
113154 /// </summary>
114155 public static Encoding GetEncoding ( string filePath , string ? encodingName )
115156 {
116157 if ( ! string . IsNullOrEmpty ( encodingName ) )
117158 {
118- try
119- {
120- // Normalize common encoding aliases
121- return encodingName . ToLowerInvariant ( ) switch
122- {
123- // UTF encodings
124- "utf-8" or "utf8" or "utf8nobom" or "utf-8nobom" or "utf-8-nobom" or "utf8-nobom" => new UTF8Encoding ( false ) ,
125- "utf-8-bom" or "utf8-bom" or "utf8bom" or "utf8-sig" or "utf-8-sig" => new UTF8Encoding ( true ) ,
126- "utf-16" or "utf16" or "utf-16le" or "utf16le" or "unicode" or "utf16bom" or "utf-16bom" or "utf16lebom" or "utf-16lebom" => Encoding . Unicode ,
127- "utf-16be" or "utf16be" or "utf16bebom" or "utf-16bebom" => Encoding . BigEndianUnicode ,
128- "utf-32" or "utf32" or "utf-32le" or "utf32le" or "utf32bom" or "utf-32bom" or "utf32lebom" or "utf-32lebom" => Encoding . UTF32 ,
129- "utf-32be" or "utf32be" or "utf32bebom" or "utf-32bebom" => new UTF32Encoding ( true , true ) ,
130-
131- // Japanese encodings
132- "shift_jis" or "shift-jis" or "shiftjis" or "sjis" or "cp932" => Encoding . GetEncoding ( "shift_jis" ) ,
133- "euc-jp" or "euc_jp" or "eucjp" => Encoding . GetEncoding ( "euc-jp" ) ,
134- "iso-2022-jp" or "iso2022jp" or "iso2022-jp" or "jis" => Encoding . GetEncoding ( "iso-2022-jp" ) ,
135-
136- // Chinese encodings
137- "big-5" or "big5hkscs" or "cp950" => Encoding . GetEncoding ( "big5" ) ,
138- "gb2312" or "gbk" or "gb18030" or "cp936" => Encoding . GetEncoding ( "gb2312" ) ,
139-
140- // Korean encodings
141- "euckr" or "cp949" => Encoding . GetEncoding ( "euc-kr" ) ,
142-
143- // Windows codepages (numeric)
144- "874" => Encoding . GetEncoding ( "windows-874" ) , // Thai
145- "1250" => Encoding . GetEncoding ( "windows-1250" ) , // Central European
146- "1251" => Encoding . GetEncoding ( "windows-1251" ) , // Cyrillic
147- "1252" => Encoding . GetEncoding ( "windows-1252" ) , // Western European
148- "1253" => Encoding . GetEncoding ( "windows-1253" ) , // Greek
149- "1254" => Encoding . GetEncoding ( "windows-1254" ) , // Turkish
150- "1255" => Encoding . GetEncoding ( "windows-1255" ) , // Hebrew
151- "1256" => Encoding . GetEncoding ( "windows-1256" ) , // Arabic
152- "1257" => Encoding . GetEncoding ( "windows-1257" ) , // Baltic
153- "1258" => Encoding . GetEncoding ( "windows-1258" ) , // Vietnamese
154-
155- // Windows codepages (cp prefix)
156- "cp874" => Encoding . GetEncoding ( "windows-874" ) ,
157- "cp1250" => Encoding . GetEncoding ( "windows-1250" ) ,
158- "cp1251" => Encoding . GetEncoding ( "windows-1251" ) ,
159- "cp1254" => Encoding . GetEncoding ( "windows-1254" ) ,
160-
161- // ISO-8859 variants
162- "latin-1" or "iso88591" or "iso_8859_1" => Encoding . GetEncoding ( "iso-8859-1" ) , // Latin-1 (Western)
163- "latin-2" or "iso88592" or "iso_8859_2" => Encoding . GetEncoding ( "iso-8859-2" ) , // Latin-2 (Central European)
164- "iso88595" or "iso_8859_5" => Encoding . GetEncoding ( "iso-8859-5" ) , // Cyrillic
165- "iso88596" => Encoding . GetEncoding ( "iso-8859-6" ) , // Arabic
166- "iso88599" => Encoding . GetEncoding ( "iso-8859-9" ) , // Turkish
167- "latin-9" or "iso885915" => Encoding . GetEncoding ( "iso-8859-15" ) , // Latin-9
168-
169- // Cyrillic encodings
170- "koi8u" => Encoding . GetEncoding ( "koi8-u" ) , // Ukrainian
171-
172- // Thai encodings
173- "tis620" => Encoding . GetEncoding ( "tis-620" ) ,
174-
175- // ASCII
176- "ascii" => Encoding . ASCII ,
177-
178- _ => Encoding . GetEncoding ( encodingName )
179- } ;
180- }
181- catch
182- {
183- // Invalid encoding name specified, fall back to detection
184- }
159+ var resolved = GetEncodingByName ( encodingName ) ;
160+ if ( resolved != null )
161+ return resolved ;
162+ // Invalid encoding name, fall back to detection
185163 }
186164
187165 return DetectEncoding ( filePath ) ;
0 commit comments