|
2 | 2 |
|
3 | 3 | import java.io.File; |
4 | 4 | import java.io.UnsupportedEncodingException; |
5 | | -import java.lang.reflect.Field; |
6 | 5 | import java.nio.charset.StandardCharsets; |
7 | 6 | import java.util.Arrays; |
8 | 7 | import java.util.LinkedHashSet; |
@@ -141,35 +140,13 @@ public final class StringUtilities { |
141 | 140 |
|
142 | 141 | public static final String EMPTY = ""; |
143 | 142 |
|
144 | | - // Reflective access to String's internal byte[] value and byte coder fields (JDK 9+). |
145 | | - // Enables direct byte-level hashing, bypassing charAt()'s per-character coder check. |
146 | | - // null on JDK 8 (value is char[], not byte[]) or JDK 16+ without --add-opens. |
147 | | - private static final Field STRING_VALUE_FIELD; |
148 | | - private static final Field STRING_CODER_FIELD; |
| 143 | + // Reusable char buffer for hashCodeIgnoreCase and equalsIgnoreCase. |
| 144 | + // getChars() bulk-copies chars via SIMD, then the hash/compare loop |
| 145 | + // uses direct array access — no charAt() method call or coder check per char. |
| 146 | + private static final int CHAR_BUF_SIZE = 256; |
| 147 | + private static final ThreadLocal<char[]> TL_CHAR_BUF = ThreadLocal.withInitial(() -> new char[CHAR_BUF_SIZE]); |
| 148 | + |
149 | 149 |
|
150 | | - static { |
151 | | - Field sv = null, sc = null; |
152 | | - try { |
153 | | - // JDK 9+ stores String internally as byte[] with a coder flag. |
154 | | - // On JDK 8, 'value' is char[] and 'coder' doesn't exist — we detect and skip. |
155 | | - sv = String.class.getDeclaredField("value"); |
156 | | - sc = String.class.getDeclaredField("coder"); |
157 | | - if (sv.getType() != byte[].class) { |
158 | | - // JDK 8: value is char[], not byte[] — VarHandle optimization not applicable |
159 | | - sv = null; sc = null; |
160 | | - } else { |
161 | | - sv.setAccessible(true); |
162 | | - sc.setAccessible(true); |
163 | | - // Verify access works (fails on JDK 16+ without --add-opens) |
164 | | - sc.getByte(""); |
165 | | - } |
166 | | - } catch (Throwable ignored) { |
167 | | - sv = null; sc = null; |
168 | | - } |
169 | | - STRING_VALUE_FIELD = sv; |
170 | | - STRING_CODER_FIELD = sc; |
171 | | - } |
172 | | - |
173 | 150 | // Security configuration - all disabled by default for backward compatibility |
174 | 151 | // Read dynamically to allow runtime configuration changes for testing |
175 | 152 | private static boolean isSecurityEnabled() { |
@@ -306,47 +283,9 @@ public static boolean equalsIgnoreCase(String s1, String s2) { |
306 | 283 | if (s1 == null || s2 == null) { |
307 | 284 | return false; |
308 | 285 | } |
309 | | - // Fast path: direct byte comparison for LATIN1 strings (JDK 9+ with compact strings) |
310 | | - if (STRING_VALUE_FIELD != null) { |
311 | | - try { |
312 | | - byte c1 = STRING_CODER_FIELD.getByte(s1); |
313 | | - byte c2 = STRING_CODER_FIELD.getByte(s2); |
314 | | - if (c1 == 0 && c2 == 0) { // Both LATIN1 |
315 | | - byte[] v1 = (byte[]) STRING_VALUE_FIELD.get(s1); |
316 | | - byte[] v2 = (byte[]) STRING_VALUE_FIELD.get(s2); |
317 | | - return equalsIgnoreCaseLatin1(v1, v2); |
318 | | - } |
319 | | - } catch (IllegalAccessException ignored) { |
320 | | - // fall through |
321 | | - } |
322 | | - } |
323 | 286 | return s1.equalsIgnoreCase(s2); |
324 | 287 | } |
325 | 288 |
|
326 | | - /** |
327 | | - * Compare two LATIN1 byte arrays case-insensitively. |
328 | | - * Each byte represents a single character (0-255). |
329 | | - */ |
330 | | - private static boolean equalsIgnoreCaseLatin1(byte[] v1, byte[] v2) { |
331 | | - if (v1.length != v2.length) return false; |
332 | | - for (int i = 0; i < v1.length; i++) { |
333 | | - int b1 = v1[i] & 0xFF; |
334 | | - int b2 = v2[i] & 0xFF; |
335 | | - if (b1 == b2) continue; |
336 | | - // Fast ASCII case fold |
337 | | - if (b1 <= 'Z' && b1 >= 'A') b1 += 32; |
338 | | - if (b2 <= 'Z' && b2 >= 'A') b2 += 32; |
339 | | - if (b1 == b2) continue; |
340 | | - // Non-ASCII: full Unicode fold |
341 | | - if (b1 >= 128 || b2 >= 128) { |
342 | | - if (Character.toLowerCase(Character.toUpperCase((char) b1)) == |
343 | | - Character.toLowerCase(Character.toUpperCase((char) b2))) continue; |
344 | | - } |
345 | | - return false; |
346 | | - } |
347 | | - return true; |
348 | | - } |
349 | | - |
350 | 289 | /** |
351 | 290 | * Checks if the first string contains the second string, ignoring case considerations. |
352 | 291 | * <p> |
@@ -1034,63 +973,39 @@ public static int hashCodeIgnoreCase(CharSequence cs) { |
1034 | 973 | public static int hashCodeIgnoreCase(String s) { |
1035 | 974 | if (s == null) return 0; |
1036 | 975 |
|
1037 | | - // Fast path: direct byte[] access for LATIN1 strings (JDK 9+ with compact strings). |
1038 | | - // Bypasses charAt()'s per-character coder check and method call overhead. |
1039 | | - if (STRING_VALUE_FIELD != null) { |
1040 | | - try { |
1041 | | - byte coder = STRING_CODER_FIELD.getByte(s); |
1042 | | - if (coder == 0) { // LATIN1 — one byte per char, all values 0-255 |
1043 | | - byte[] value = (byte[]) STRING_VALUE_FIELD.get(s); |
1044 | | - return hashLatin1Bytes(value); |
1045 | | - } |
1046 | | - } catch (IllegalAccessException e) { |
1047 | | - // Unreachable after setAccessible(true) — fall through to charAt path |
1048 | | - } |
1049 | | - } |
1050 | | - |
1051 | | - return hashCodeIgnoreCaseChars(s); |
1052 | | - } |
1053 | | - |
1054 | | - /** |
1055 | | - * Hash LATIN1 String bytes directly — no charAt() overhead, no coder check per char. |
1056 | | - * For ASCII keys (the 99% case in map lookups), each byte IS the character value. |
1057 | | - */ |
1058 | | - private static int hashLatin1Bytes(byte[] value) { |
1059 | | - int h = 0; |
1060 | | - for (int i = 0; i < value.length; i++) { |
1061 | | - int c = value[i] & 0xFF; |
1062 | | - if (c <= 'Z') { |
1063 | | - if (c >= 'A') { |
1064 | | - c += 32; |
1065 | | - } |
1066 | | - } else if (c >= 128) { |
1067 | | - c = Character.toLowerCase(Character.toUpperCase((char) c)); |
1068 | | - } |
1069 | | - h = 31 * h + c; |
1070 | | - } |
1071 | | - return h; |
1072 | | - } |
1073 | | - |
1074 | | - /** |
1075 | | - * Fallback: charAt-based hash for when VarHandle is unavailable or string is UTF16. |
1076 | | - */ |
1077 | | - private static int hashCodeIgnoreCaseChars(String s) { |
1078 | 976 | final int n = s.length(); |
| 977 | + // Bulk-copy chars via getChars (SIMD-optimized), then hash from the array. |
| 978 | + // Avoids charAt()'s per-character method call and JDK 9+ coder check overhead. |
| 979 | + // Uses no reflection, no VarHandle — works on all JDK versions. |
| 980 | + char[] buf = getCharBuf(n); |
| 981 | + s.getChars(0, n, buf, 0); |
| 982 | + |
1079 | 983 | int h = 0; |
1080 | 984 | for (int i = 0; i < n; i++) { |
1081 | | - char c = s.charAt(i); |
| 985 | + char c = buf[i]; |
1082 | 986 | if (c <= 'Z') { // digits, symbols, and uppercase all ≤ 90 |
1083 | 987 | if (c >= 'A') { // uppercase A-Z: fold to lowercase |
1084 | 988 | c += 32; |
1085 | 989 | } |
1086 | 990 | } else if (c >= 128) { // non-ASCII: full Unicode case fold |
1087 | 991 | c = Character.toLowerCase(Character.toUpperCase(c)); |
1088 | 992 | } |
| 993 | + // else: c is 91-127 (lowercase a-z, symbols) — no folding, 2 comparisons |
1089 | 994 | h = 31 * h + c; |
1090 | 995 | } |
1091 | 996 | return h; |
1092 | 997 | } |
1093 | 998 |
|
| 999 | + /** Get a reusable char buffer from ThreadLocal, growing if needed. */ |
| 1000 | + private static char[] getCharBuf(int minSize) { |
| 1001 | + char[] buf = TL_CHAR_BUF.get(); |
| 1002 | + if (minSize > buf.length) { |
| 1003 | + buf = new char[minSize]; |
| 1004 | + TL_CHAR_BUF.set(buf); |
| 1005 | + } |
| 1006 | + return buf; |
| 1007 | + } |
| 1008 | + |
1094 | 1009 | /** |
1095 | 1010 | * Case-fold a single character for hashing purposes. |
1096 | 1011 | * Used by the CharSequence overload of hashCodeIgnoreCase. |
|
0 commit comments