|
| 1 | +using System.Text; |
| 2 | + |
| 3 | +namespace LibVideo.Helpers |
| 4 | +{ |
| 5 | + /// <summary> |
| 6 | + /// Lightweight pinyin initial extractor using GB2312 encoding ranges. |
| 7 | + /// Zero external dependencies. Converts Chinese characters to their |
| 8 | + /// pinyin first letter (e.g. "战狼" → "ZL"). |
| 9 | + /// </summary> |
| 10 | + public static class PinyinHelper |
| 11 | + { |
| 12 | + private static readonly Encoding _gb2312; |
| 13 | + |
| 14 | + static PinyinHelper() |
| 15 | + { |
| 16 | + try |
| 17 | + { |
| 18 | + _gb2312 = Encoding.GetEncoding("GB2312"); |
| 19 | + } |
| 20 | + catch |
| 21 | + { |
| 22 | + _gb2312 = null; |
| 23 | + } |
| 24 | + } |
| 25 | + |
| 26 | + /// <summary> |
| 27 | + /// Returns a string where each Chinese character is replaced by its |
| 28 | + /// pinyin initial letter. Non-Chinese characters are kept as-is. |
| 29 | + /// Example: "战狼2" → "ZL2" |
| 30 | + /// </summary> |
| 31 | + public static string GetInitials(string text) |
| 32 | + { |
| 33 | + if (string.IsNullOrEmpty(text) || _gb2312 == null) return text ?? ""; |
| 34 | + |
| 35 | + var sb = new StringBuilder(text.Length); |
| 36 | + foreach (char ch in text) |
| 37 | + { |
| 38 | + if (ch >= 0x4E00 && ch <= 0x9FFF) |
| 39 | + { |
| 40 | + sb.Append(GetChineseInitial(ch)); |
| 41 | + } |
| 42 | + else |
| 43 | + { |
| 44 | + sb.Append(ch); |
| 45 | + } |
| 46 | + } |
| 47 | + |
| 48 | + return sb.ToString(); |
| 49 | + } |
| 50 | + |
| 51 | + private static char GetChineseInitial(char ch) |
| 52 | + { |
| 53 | + byte[] bytes; |
| 54 | + try |
| 55 | + { |
| 56 | + bytes = _gb2312.GetBytes(ch.ToString()); |
| 57 | + } |
| 58 | + catch |
| 59 | + { |
| 60 | + return ch; |
| 61 | + } |
| 62 | + |
| 63 | + if (bytes.Length != 2) return ch; |
| 64 | + |
| 65 | + int code = bytes[0] * 256 + bytes[1]; |
| 66 | + |
| 67 | + if (code < 0xB0A1 || code > 0xD7F9) return ch; |
| 68 | + |
| 69 | + if (code <= 0xB0C4) return 'A'; |
| 70 | + if (code <= 0xB2C0) return 'B'; |
| 71 | + if (code <= 0xB4ED) return 'C'; |
| 72 | + if (code <= 0xB6E9) return 'D'; |
| 73 | + if (code <= 0xB7A1) return 'E'; |
| 74 | + if (code <= 0xB8C0) return 'F'; |
| 75 | + if (code <= 0xB9FD) return 'G'; |
| 76 | + if (code <= 0xBBF6) return 'H'; |
| 77 | + if (code <= 0xBFA5) return 'J'; |
| 78 | + if (code <= 0xC0AB) return 'K'; |
| 79 | + if (code <= 0xC2E7) return 'L'; |
| 80 | + if (code <= 0xC4C2) return 'M'; |
| 81 | + if (code <= 0xC5B5) return 'N'; |
| 82 | + if (code <= 0xC5BD) return 'O'; |
| 83 | + if (code <= 0xC6D9) return 'P'; |
| 84 | + if (code <= 0xC8BA) return 'Q'; |
| 85 | + if (code <= 0xC8F5) return 'R'; |
| 86 | + if (code <= 0xCBF9) return 'S'; |
| 87 | + if (code <= 0xCDD9) return 'T'; |
| 88 | + if (code <= 0xCEF3) return 'W'; |
| 89 | + if (code <= 0xD1B8) return 'X'; |
| 90 | + if (code <= 0xD4D0) return 'Y'; |
| 91 | + if (code <= 0xD7F9) return 'Z'; |
| 92 | + |
| 93 | + return ch; |
| 94 | + } |
| 95 | + } |
| 96 | +} |
0 commit comments