Skip to content

Commit 0c00f88

Browse files
Claude4.0oclaude
andcommitted
Apply StringUtilities.getChars() across java-util charAt() hot loops
Replaced charAt(i) with direct buf[i] array access in 14 methods across 4 classes, using StringUtilities.getChars() for SIMD bulk copy: - StringUtilities: trimLength, hexStringToByteArray, count, snakeToCamel, camelToSnake, isNumeric, levenshteinDistance, damerauLevenshteinDistance - ByteUtilities: decode (hex string to byte array) - MathUtilities: smartNumberParse - DateUtilities: validateSecure, parseNanosecond, parseTimeZone, removeDateMarkerCharacters All 19,551 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 960da8b commit 0c00f88

5 files changed

Lines changed: 54 additions & 25 deletions

File tree

changelog.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
### Revision History
22

33
#### 4.99.0 (Unreleased)
4-
* **FEATURE**: New `StringUtilities.getChars(String s)` — public API that returns a ThreadLocal `char[]` buffer populated via `String.getChars()` (SIMD-optimized bulk copy). Callers can replace `str.charAt(i)` loops with direct `buf[i]` array access, avoiding per-character method call and JDK 9+ coder check overhead. Use `s.length()` for the valid range. Buffer is shared per-thread — valid until the next `getChars()` call on the same thread.
4+
* **FEATURE**: New `StringUtilities.getChars(String s)` — public API that returns a ThreadLocal `char[]` buffer populated via `String.getChars()` (SIMD-optimized bulk copy). Callers replace `str.charAt(i)` loops with direct `buf[i]` array access, avoiding per-character method call and JDK 9+ coder check overhead. Applied across `StringUtilities` (`trimLength`, `hexStringToByteArray`, `count`, `levenshteinDistance`, `damerauLevenshteinDistance`, `snakeToCamel`, `camelToSnake`, `isNumeric`), `ByteUtilities.decode`, `MathUtilities.smartNumberParse`, and `DateUtilities` (`validateSecure`, `parseNanosecond`, `parseTimeZone`, `removeDateMarkerCharacters`).
55
* **PERFORMANCE**: `StringUtilities.hashCodeIgnoreCase(String)` — uses `StringUtilities.getChars()` (SIMD-optimized bulk copy) into a ThreadLocal `char[]` buffer, then hashes from the array directly. Avoids `charAt()`'s per-character method call and JDK 9+ compact-string coder check overhead. No reflection, no VarHandle, no `--add-opens` — works on all JDK versions (8-25+). Benchmark shows CaseInsensitiveMap GET improved **70-75%** (230 → 58-69 ns/op), PUT improved **48-52%** (135 → 65-71 ns/op), and MIXED-CASE GET improved **68%** (302 → 93-97 ns/op) on 100K entries.
66
* **PERFORMANCE**: New `FastReader.readLine(char[] dest, int off, int maxLen)` — dedicated line-reading method optimized for TOON's line-oriented parsing. Combines scanning, copying, and line-ending consumption (`\n`, `\r`, `\r\n`) into a single call. Uses a `c <= '\r'` range guard so printable characters (the vast majority) require only one comparison per character instead of two. Eliminates the per-line overhead of separate `readUntil()` + `read()` + pushback round-trip. JFR shows TOON line-reading samples dropped from 173 to 125 (28% reduction), and `FastReader.read()` calls halved (53 → 25 samples).
77
* **PERFORMANCE**: `FastReader.readUntil()` pushback drain loop now uses a local variable for `pushbackPosition` instead of repeated member field access, avoiding load/store through `this` on each iteration. JFR shows 14.8% reduction in aggregate FastReader CPU share.

src/main/java/com/cedarsoftware/util/ByteUtilities.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,10 @@ private static byte[] decodeInternal(final CharSequence s, boolean enforceSecuri
255255
return null;
256256
}
257257
byte[] bytes = new byte[len >> 1];
258+
char[] buf = (s instanceof String) ? StringUtilities.getChars((String) s) : s.toString().toCharArray();
258259
for (int i = 0, j = 0; i < len; i += 2) {
259-
char c1 = s.charAt(i);
260-
char c2 = s.charAt(i + 1);
260+
char c1 = buf[i];
261+
char c2 = buf[i + 1];
261262
// Check if the characters are within ASCII range
262263
if (c1 >= HEX_LOOKUP.length || c2 >= HEX_LOOKUP.length) {
263264
return null;

src/main/java/com/cedarsoftware/util/DateUtilities.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,10 @@ private static void validateMalformedInput(String input) {
336336
// Check for excessive nested grouping
337337
int openParens = 0;
338338
int maxNesting = 0;
339-
for (int i = 0; i < input.length(); i++) {
340-
char c = input.charAt(i);
339+
int inputLen = input.length();
340+
char[] inputBuf = StringUtilities.getChars(input);
341+
for (int i = 0; i < inputLen; i++) {
342+
char c = inputBuf[i];
341343
if (c == '(') {
342344
openParens++;
343345
maxNesting = Math.max(maxNesting, openParens);
@@ -872,8 +874,9 @@ private static long convertFractionToNanos(String fracSec) {
872874
int len = digits.length();
873875

874876
// Validate all characters are digits
877+
char[] digitBuf = StringUtilities.getChars(digits);
875878
for (int i = 0; i < len; i++) {
876-
char c = digits.charAt(i);
879+
char c = digitBuf[i];
877880
if (c < '0' || c > '9') {
878881
throw new IllegalArgumentException("Invalid fractional second: " + fracSec);
879882
}
@@ -914,8 +917,9 @@ private static ZoneId getTimeZone(String tz) {
914917
}
915918

916919
// Additional security validation: prevent control characters and null bytes
920+
char[] tzBuf = StringUtilities.getChars(tz);
917921
for (int i = 0; i < tz.length(); i++) {
918-
char c = tz.charAt(i);
922+
char c = tzBuf[i];
919923
if (c < 32 || c == 127) { // Control characters including null byte
920924
throw new IllegalArgumentException("Invalid timezone string contains control characters");
921925
}
@@ -1016,9 +1020,11 @@ private static void verifyNoGarbageLeft(String remnant) {
10161020
}
10171021

10181022
private static String removeDateMarkerCharacters(String input) {
1023+
int inputLen = input.length();
1024+
char[] buf = StringUtilities.getChars(input);
10191025
StringBuilder builder = null;
1020-
for (int i = 0; i < input.length(); i++) {
1021-
char c = input.charAt(i);
1026+
for (int i = 0; i < inputLen; i++) {
1027+
char c = buf[i];
10221028
if (c == 'T' || c == ',') {
10231029
if (builder == null) {
10241030
builder = new StringBuilder(input.length() - 1);

src/main/java/com/cedarsoftware/util/MathUtilities.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -502,18 +502,19 @@ public static Number parseToMinimalNumericType(CharSequence numStr)
502502
}
503503

504504
final int len = text.length();
505+
char[] buf = StringUtilities.getChars(text);
505506
int start = 0;
506507
if (len > 0) {
507-
char first = text.charAt(0);
508+
char first = buf[0];
508509
if (first == '-' || first == '+') {
509510
start = 1;
510511
}
511512
}
512513

513514
// Trim integer leading zeros (keeping one zero before non-digit, e.g., "000.1" -> "0.1")
514515
while (start + 1 < len
515-
&& text.charAt(start) == '0'
516-
&& Character.isDigit(text.charAt(start + 1)))
516+
&& buf[start] == '0'
517+
&& Character.isDigit(buf[start + 1]))
517518
{
518519
start++;
519520
}
@@ -529,7 +530,7 @@ public static Number parseToMinimalNumericType(CharSequence numStr)
529530
boolean exponentOverflow = false;
530531

531532
for (int i = start; i < len; i++) {
532-
char c = text.charAt(i);
533+
char c = buf[i];
533534
if (c == '.') {
534535
hasDecimalPoint = true;
535536
inExponent = false;

src/main/java/com/cedarsoftware/util/StringUtilities.java

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -480,10 +480,11 @@ public static int trimLength(String s) {
480480
}
481481
int start = 0;
482482
int end = s.length();
483-
while (start < end && s.charAt(start) <= ' ') {
483+
char[] buf = getChars(s);
484+
while (start < end && buf[start] <= ' ') {
484485
start++;
485486
}
486-
while (end > start && s.charAt(end - 1) <= ' ') {
487+
while (end > start && buf[end - 1] <= ' ') {
487488
end--;
488489
}
489490
return end - start;
@@ -527,9 +528,10 @@ public static byte[] decode(String s) {
527528
byte[] bytes = new byte[len / 2];
528529
int pos = 0;
529530

531+
char[] buf = getChars(s);
530532
for (int i = 0; i < len; i += 2) {
531-
int hi = Character.digit(s.charAt(i), 16);
532-
int lo = Character.digit(s.charAt(i + 1), 16);
533+
int hi = Character.digit(buf[i], 16);
534+
int lo = Character.digit(buf[i + 1], 16);
533535
if (hi == -1 || lo == -1) {
534536
return null;
535537
}
@@ -578,8 +580,9 @@ public static int count(String s, char c) {
578580

579581
int answer = 0;
580582
int len = s.length();
583+
char[] buf = getChars(s);
581584
for (int i = 0; i < len; i++) {
582-
if (s.charAt(i) == c) {
585+
if (buf[i] == c) {
583586
answer++;
584587
}
585588
}
@@ -740,6 +743,8 @@ public static int levenshteinDistance(CharSequence s, CharSequence t) {
740743

741744
int sLen = s.length();
742745
int tLen = t.length();
746+
char[] sBuf = (s instanceof String) ? getChars((String) s) : charSequenceToArray(s);
747+
char[] tBuf = (t instanceof String) ? ((String) t).toCharArray() : charSequenceToArray(t);
743748
for (int i = 0; i < sLen; i++) {
744749
// calculate v1 (current row distances) from the previous row v0
745750

@@ -749,7 +754,7 @@ public static int levenshteinDistance(CharSequence s, CharSequence t) {
749754

750755
// use formula to fill in the rest of the row
751756
for (int j = 0; j < tLen; j++) {
752-
int cost = (s.charAt(i) == t.charAt(j)) ? 0 : 1;
757+
int cost = (sBuf[i] == tBuf[j]) ? 0 : 1;
753758
int left = v1[j] + 1;
754759
int up = v0[j + 1] + 1;
755760
int diagonal = v0[j] + cost;
@@ -817,10 +822,12 @@ public static int damerauLevenshteinDistance(CharSequence source, CharSequence t
817822
distanceMatrix[0][targetIndex] = targetIndex;
818823
}
819824

825+
char[] srcBuf = (source instanceof String) ? getChars((String) source) : charSequenceToArray(source);
826+
char[] tgtBuf = (target instanceof String) ? ((String) target).toCharArray() : charSequenceToArray(target);
820827
for (int srcIndex = 1; srcIndex <= srcLen; srcIndex++) {
821828
for (int targetIndex = 1; targetIndex <= targetLen; targetIndex++) {
822829
// If the current characters in both strings are equal
823-
int cost = source.charAt(srcIndex - 1) == target.charAt(targetIndex - 1) ? 0 : 1;
830+
int cost = srcBuf[srcIndex - 1] == tgtBuf[targetIndex - 1] ? 0 : 1;
824831

825832
// Find the current distance by determining the shortest path to a
826833
// match (hence the 'minimum' calculation on distances).
@@ -838,7 +845,7 @@ public static int damerauLevenshteinDistance(CharSequence source, CharSequence t
838845

839846
// transposition check (if the current and previous
840847
// character are switched around (e.g.: t[se]t and t[es]t)...
841-
if (source.charAt(srcIndex - 1) == target.charAt(targetIndex - 2) && source.charAt(srcIndex - 2) == target.charAt(targetIndex - 1)) {
848+
if (srcBuf[srcIndex - 1] == tgtBuf[targetIndex - 2] && srcBuf[srcIndex - 2] == tgtBuf[targetIndex - 1]) {
842849
// What's the minimum cost between the current distance
843850
// and a transposition.
844851
int transpositionCost = distanceMatrix[srcIndex - 2][targetIndex - 2] + cost;
@@ -1016,6 +1023,16 @@ public static char[] getChars(String s) {
10161023
return buf;
10171024
}
10181025

1026+
/** Convert a CharSequence to a char array (for non-String CharSequences). */
1027+
private static char[] charSequenceToArray(CharSequence cs) {
1028+
int n = cs.length();
1029+
char[] arr = new char[n];
1030+
for (int i = 0; i < n; i++) {
1031+
arr[i] = cs.charAt(i);
1032+
}
1033+
return arr;
1034+
}
1035+
10191036
/** Internal: get a reusable char buffer from ThreadLocal, growing if needed. */
10201037
private static char[] getCharBuf(int minSize) {
10211038
char[] buf = TL_CHAR_BUF.get();
@@ -1179,10 +1196,11 @@ public static String snakeToCamel(String snake) {
11791196
return null;
11801197
}
11811198
int len = snake.length();
1199+
char[] buf = getChars(snake);
11821200
StringBuilder result = new StringBuilder(len);
11831201
boolean upper = false;
11841202
for (int i = 0; i < len; i++) {
1185-
char c = snake.charAt(i);
1203+
char c = buf[i];
11861204
if (c == '_') {
11871205
upper = true;
11881206
continue;
@@ -1203,9 +1221,11 @@ public static String camelToSnake(String camel) {
12031221
if (camel == null) {
12041222
return null;
12051223
}
1224+
int len = camel.length();
1225+
char[] buf = getChars(camel);
12061226
StringBuilder result = new StringBuilder();
1207-
for (int i = 0; i < camel.length(); i++) {
1208-
char c = camel.charAt(i);
1227+
for (int i = 0; i < len; i++) {
1228+
char c = buf[i];
12091229
if (Character.isUpperCase(c) && i > 0) {
12101230
result.append('_');
12111231
}
@@ -1224,8 +1244,9 @@ public static boolean isNumeric(String s) {
12241244
if (s == null || s.isEmpty()) {
12251245
return false;
12261246
}
1247+
char[] buf = getChars(s);
12271248
for (int i = 0; i < s.length(); i++) {
1228-
if (!Character.isDigit(s.charAt(i))) {
1249+
if (!Character.isDigit(buf[i])) {
12291250
return false;
12301251
}
12311252
}

0 commit comments

Comments
 (0)