Skip to content

Commit 856b66a

Browse files
committed
Revert "Apply StringUtilities.getChars() across java-util charAt() hot loops"
This reverts commit 0c00f88.
1 parent 0c00f88 commit 856b66a

5 files changed

Lines changed: 25 additions & 54 deletions

File tree

changelog.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
### Revision History
22

33
#### 4.99.0 (Unreleased)
4-
* **FEATURE**: New `StringUtilities.getChars(String s)` — public API that returns a ThreadLocal `char[]` buffer populated via `String.getChars()` (SIMD-optimized bulk copy). Callers replace `str.charAt(i)` loops with direct `buf[i]` array access, avoiding per-character method call and JDK 9+ coder check overhead. Applied across `StringUtilities` (`trimLength`, `hexStringToByteArray`, `count`, `levenshteinDistance`, `damerauLevenshteinDistance`, `snakeToCamel`, `camelToSnake`, `isNumeric`), `ByteUtilities.decode`, `MathUtilities.smartNumberParse`, and `DateUtilities` (`validateSecure`, `parseNanosecond`, `parseTimeZone`, `removeDateMarkerCharacters`).
4+
* **FEATURE**: New `StringUtilities.getChars(String s)` — public API that returns a ThreadLocal `char[]` buffer populated via `String.getChars()` (SIMD-optimized bulk copy). Callers can replace `str.charAt(i)` loops with direct `buf[i]` array access, avoiding per-character method call and JDK 9+ coder check overhead. Use `s.length()` for the valid range. Buffer is shared per-thread — valid until the next `getChars()` call on the same thread.
55
* **PERFORMANCE**: `StringUtilities.hashCodeIgnoreCase(String)` — uses `StringUtilities.getChars()` (SIMD-optimized bulk copy) into a ThreadLocal `char[]` buffer, then hashes from the array directly. Avoids `charAt()`'s per-character method call and JDK 9+ compact-string coder check overhead. No reflection, no VarHandle, no `--add-opens` — works on all JDK versions (8-25+). Benchmark shows CaseInsensitiveMap GET improved **70-75%** (230 → 58-69 ns/op), PUT improved **48-52%** (135 → 65-71 ns/op), and MIXED-CASE GET improved **68%** (302 → 93-97 ns/op) on 100K entries.
66
* **PERFORMANCE**: New `FastReader.readLine(char[] dest, int off, int maxLen)` — dedicated line-reading method optimized for TOON's line-oriented parsing. Combines scanning, copying, and line-ending consumption (`\n`, `\r`, `\r\n`) into a single call. Uses a `c <= '\r'` range guard so printable characters (the vast majority) require only one comparison per character instead of two. Eliminates the per-line overhead of separate `readUntil()` + `read()` + pushback round-trip. JFR shows TOON line-reading samples dropped from 173 to 125 (28% reduction), and `FastReader.read()` calls halved (53 → 25 samples).
77
* **PERFORMANCE**: `FastReader.readUntil()` pushback drain loop now uses a local variable for `pushbackPosition` instead of repeated member field access, avoiding load/store through `this` on each iteration. JFR shows 14.8% reduction in aggregate FastReader CPU share.

src/main/java/com/cedarsoftware/util/ByteUtilities.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,10 +255,9 @@ private static byte[] decodeInternal(final CharSequence s, boolean enforceSecuri
255255
return null;
256256
}
257257
byte[] bytes = new byte[len >> 1];
258-
char[] buf = (s instanceof String) ? StringUtilities.getChars((String) s) : s.toString().toCharArray();
259258
for (int i = 0, j = 0; i < len; i += 2) {
260-
char c1 = buf[i];
261-
char c2 = buf[i + 1];
259+
char c1 = s.charAt(i);
260+
char c2 = s.charAt(i + 1);
262261
// Check if the characters are within ASCII range
263262
if (c1 >= HEX_LOOKUP.length || c2 >= HEX_LOOKUP.length) {
264263
return null;

src/main/java/com/cedarsoftware/util/DateUtilities.java

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -336,10 +336,8 @@ private static void validateMalformedInput(String input) {
336336
// Check for excessive nested grouping
337337
int openParens = 0;
338338
int maxNesting = 0;
339-
int inputLen = input.length();
340-
char[] inputBuf = StringUtilities.getChars(input);
341-
for (int i = 0; i < inputLen; i++) {
342-
char c = inputBuf[i];
339+
for (int i = 0; i < input.length(); i++) {
340+
char c = input.charAt(i);
343341
if (c == '(') {
344342
openParens++;
345343
maxNesting = Math.max(maxNesting, openParens);
@@ -874,9 +872,8 @@ private static long convertFractionToNanos(String fracSec) {
874872
int len = digits.length();
875873

876874
// Validate all characters are digits
877-
char[] digitBuf = StringUtilities.getChars(digits);
878875
for (int i = 0; i < len; i++) {
879-
char c = digitBuf[i];
876+
char c = digits.charAt(i);
880877
if (c < '0' || c > '9') {
881878
throw new IllegalArgumentException("Invalid fractional second: " + fracSec);
882879
}
@@ -917,9 +914,8 @@ private static ZoneId getTimeZone(String tz) {
917914
}
918915

919916
// Additional security validation: prevent control characters and null bytes
920-
char[] tzBuf = StringUtilities.getChars(tz);
921917
for (int i = 0; i < tz.length(); i++) {
922-
char c = tzBuf[i];
918+
char c = tz.charAt(i);
923919
if (c < 32 || c == 127) { // Control characters including null byte
924920
throw new IllegalArgumentException("Invalid timezone string contains control characters");
925921
}
@@ -1020,11 +1016,9 @@ private static void verifyNoGarbageLeft(String remnant) {
10201016
}
10211017

10221018
private static String removeDateMarkerCharacters(String input) {
1023-
int inputLen = input.length();
1024-
char[] buf = StringUtilities.getChars(input);
10251019
StringBuilder builder = null;
1026-
for (int i = 0; i < inputLen; i++) {
1027-
char c = buf[i];
1020+
for (int i = 0; i < input.length(); i++) {
1021+
char c = input.charAt(i);
10281022
if (c == 'T' || c == ',') {
10291023
if (builder == null) {
10301024
builder = new StringBuilder(input.length() - 1);

src/main/java/com/cedarsoftware/util/MathUtilities.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -502,19 +502,18 @@ public static Number parseToMinimalNumericType(CharSequence numStr)
502502
}
503503

504504
final int len = text.length();
505-
char[] buf = StringUtilities.getChars(text);
506505
int start = 0;
507506
if (len > 0) {
508-
char first = buf[0];
507+
char first = text.charAt(0);
509508
if (first == '-' || first == '+') {
510509
start = 1;
511510
}
512511
}
513512

514513
// Trim integer leading zeros (keeping one zero before non-digit, e.g., "000.1" -> "0.1")
515514
while (start + 1 < len
516-
&& buf[start] == '0'
517-
&& Character.isDigit(buf[start + 1]))
515+
&& text.charAt(start) == '0'
516+
&& Character.isDigit(text.charAt(start + 1)))
518517
{
519518
start++;
520519
}
@@ -530,7 +529,7 @@ public static Number parseToMinimalNumericType(CharSequence numStr)
530529
boolean exponentOverflow = false;
531530

532531
for (int i = start; i < len; i++) {
533-
char c = buf[i];
532+
char c = text.charAt(i);
534533
if (c == '.') {
535534
hasDecimalPoint = true;
536535
inExponent = false;

src/main/java/com/cedarsoftware/util/StringUtilities.java

Lines changed: 12 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -480,11 +480,10 @@ public static int trimLength(String s) {
480480
}
481481
int start = 0;
482482
int end = s.length();
483-
char[] buf = getChars(s);
484-
while (start < end && buf[start] <= ' ') {
483+
while (start < end && s.charAt(start) <= ' ') {
485484
start++;
486485
}
487-
while (end > start && buf[end - 1] <= ' ') {
486+
while (end > start && s.charAt(end - 1) <= ' ') {
488487
end--;
489488
}
490489
return end - start;
@@ -528,10 +527,9 @@ public static byte[] decode(String s) {
528527
byte[] bytes = new byte[len / 2];
529528
int pos = 0;
530529

531-
char[] buf = getChars(s);
532530
for (int i = 0; i < len; i += 2) {
533-
int hi = Character.digit(buf[i], 16);
534-
int lo = Character.digit(buf[i + 1], 16);
531+
int hi = Character.digit(s.charAt(i), 16);
532+
int lo = Character.digit(s.charAt(i + 1), 16);
535533
if (hi == -1 || lo == -1) {
536534
return null;
537535
}
@@ -580,9 +578,8 @@ public static int count(String s, char c) {
580578

581579
int answer = 0;
582580
int len = s.length();
583-
char[] buf = getChars(s);
584581
for (int i = 0; i < len; i++) {
585-
if (buf[i] == c) {
582+
if (s.charAt(i) == c) {
586583
answer++;
587584
}
588585
}
@@ -743,8 +740,6 @@ public static int levenshteinDistance(CharSequence s, CharSequence t) {
743740

744741
int sLen = s.length();
745742
int tLen = t.length();
746-
char[] sBuf = (s instanceof String) ? getChars((String) s) : charSequenceToArray(s);
747-
char[] tBuf = (t instanceof String) ? ((String) t).toCharArray() : charSequenceToArray(t);
748743
for (int i = 0; i < sLen; i++) {
749744
// calculate v1 (current row distances) from the previous row v0
750745

@@ -754,7 +749,7 @@ public static int levenshteinDistance(CharSequence s, CharSequence t) {
754749

755750
// use formula to fill in the rest of the row
756751
for (int j = 0; j < tLen; j++) {
757-
int cost = (sBuf[i] == tBuf[j]) ? 0 : 1;
752+
int cost = (s.charAt(i) == t.charAt(j)) ? 0 : 1;
758753
int left = v1[j] + 1;
759754
int up = v0[j + 1] + 1;
760755
int diagonal = v0[j] + cost;
@@ -822,12 +817,10 @@ public static int damerauLevenshteinDistance(CharSequence source, CharSequence t
822817
distanceMatrix[0][targetIndex] = targetIndex;
823818
}
824819

825-
char[] srcBuf = (source instanceof String) ? getChars((String) source) : charSequenceToArray(source);
826-
char[] tgtBuf = (target instanceof String) ? ((String) target).toCharArray() : charSequenceToArray(target);
827820
for (int srcIndex = 1; srcIndex <= srcLen; srcIndex++) {
828821
for (int targetIndex = 1; targetIndex <= targetLen; targetIndex++) {
829822
// If the current characters in both strings are equal
830-
int cost = srcBuf[srcIndex - 1] == tgtBuf[targetIndex - 1] ? 0 : 1;
823+
int cost = source.charAt(srcIndex - 1) == target.charAt(targetIndex - 1) ? 0 : 1;
831824

832825
// Find the current distance by determining the shortest path to a
833826
// match (hence the 'minimum' calculation on distances).
@@ -845,7 +838,7 @@ public static int damerauLevenshteinDistance(CharSequence source, CharSequence t
845838

846839
// transposition check (if the current and previous
847840
// character are switched around (e.g.: t[se]t and t[es]t)...
848-
if (srcBuf[srcIndex - 1] == tgtBuf[targetIndex - 2] && srcBuf[srcIndex - 2] == tgtBuf[targetIndex - 1]) {
841+
if (source.charAt(srcIndex - 1) == target.charAt(targetIndex - 2) && source.charAt(srcIndex - 2) == target.charAt(targetIndex - 1)) {
849842
// What's the minimum cost between the current distance
850843
// and a transposition.
851844
int transpositionCost = distanceMatrix[srcIndex - 2][targetIndex - 2] + cost;
@@ -1023,16 +1016,6 @@ public static char[] getChars(String s) {
10231016
return buf;
10241017
}
10251018

1026-
/** Convert a CharSequence to a char array (for non-String CharSequences). */
1027-
private static char[] charSequenceToArray(CharSequence cs) {
1028-
int n = cs.length();
1029-
char[] arr = new char[n];
1030-
for (int i = 0; i < n; i++) {
1031-
arr[i] = cs.charAt(i);
1032-
}
1033-
return arr;
1034-
}
1035-
10361019
/** Internal: get a reusable char buffer from ThreadLocal, growing if needed. */
10371020
private static char[] getCharBuf(int minSize) {
10381021
char[] buf = TL_CHAR_BUF.get();
@@ -1196,11 +1179,10 @@ public static String snakeToCamel(String snake) {
11961179
return null;
11971180
}
11981181
int len = snake.length();
1199-
char[] buf = getChars(snake);
12001182
StringBuilder result = new StringBuilder(len);
12011183
boolean upper = false;
12021184
for (int i = 0; i < len; i++) {
1203-
char c = buf[i];
1185+
char c = snake.charAt(i);
12041186
if (c == '_') {
12051187
upper = true;
12061188
continue;
@@ -1221,11 +1203,9 @@ public static String camelToSnake(String camel) {
12211203
if (camel == null) {
12221204
return null;
12231205
}
1224-
int len = camel.length();
1225-
char[] buf = getChars(camel);
12261206
StringBuilder result = new StringBuilder();
1227-
for (int i = 0; i < len; i++) {
1228-
char c = buf[i];
1207+
for (int i = 0; i < camel.length(); i++) {
1208+
char c = camel.charAt(i);
12291209
if (Character.isUpperCase(c) && i > 0) {
12301210
result.append('_');
12311211
}
@@ -1244,9 +1224,8 @@ public static boolean isNumeric(String s) {
12441224
if (s == null || s.isEmpty()) {
12451225
return false;
12461226
}
1247-
char[] buf = getChars(s);
12481227
for (int i = 0; i < s.length(); i++) {
1249-
if (!Character.isDigit(buf[i])) {
1228+
if (!Character.isDigit(s.charAt(i))) {
12501229
return false;
12511230
}
12521231
}

0 commit comments

Comments
 (0)