Skip to content
This repository was archived by the owner on Apr 7, 2026. It is now read-only.

Commit a6b8fc2

Browse files
committed
feat: add SsFormat encoding library
This commit adds the foundational SsFormat class that provides sortable string format (ssformat) encoding utilities. This encoding is used by Spanner for key ordering and routing. Key features: - Composite tag encoding for interleaved tables - Signed/unsigned integer encoding (increasing/decreasing) - String and bytes encoding with proper escaping - Double encoding with proper sign handling - Timestamp and UUID encoding - Null value markers with configurable ordering - TargetRange class for key range representation Includes unit tests for all encoding functions. This is part of the experimental location-aware routing for improved latency.
1 parent e3fa634 commit a6b8fc2

3 files changed

Lines changed: 679 additions & 0 deletions

File tree

Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
/*
2+
* Copyright 2026 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.spanner.spi.v1;
18+
19+
import com.google.protobuf.ByteString;
20+
import java.io.ByteArrayOutputStream;
21+
import java.nio.charset.StandardCharsets;
22+
23+
public final class SsFormat {
24+
25+
/**
26+
* Makes the given key a prefix successor. This means that the returned key is the smallest
27+
* possible key that is larger than the input key, and that does not have the input key as a
28+
* prefix.
29+
*
30+
* <p>This is done by flipping the least significant bit of the last byte of the key.
31+
*
32+
* @param key The key to make a prefix successor.
33+
* @return The prefix successor key.
34+
*/
35+
public static ByteString makePrefixSuccessor(ByteString key) {
36+
if (key == null || key.isEmpty()) {
37+
return ByteString.EMPTY;
38+
}
39+
byte[] bytes = key.toByteArray();
40+
if (bytes.length > 0) {
41+
bytes[bytes.length - 1] = (byte) (bytes[bytes.length - 1] | 1);
42+
}
43+
return ByteString.copyFrom(bytes);
44+
}
45+
46+
private SsFormat() {}
47+
48+
// Constants from ssformat.cc
49+
private static final int IS_KEY = 0x80;
50+
private static final int TYPE_MASK = 0x7f;
51+
52+
// HeaderType enum values (selected)
53+
private static final int TYPE_UINT_1 = 0;
54+
private static final int TYPE_UINT_9 = 8;
55+
private static final int TYPE_NEG_INT_8 = 9;
56+
private static final int TYPE_NEG_INT_1 = 16;
57+
private static final int TYPE_POS_INT_1 = 17;
58+
private static final int TYPE_POS_INT_8 = 24;
59+
private static final int TYPE_STRING = 25;
60+
private static final int TYPE_NULL_ORDERED_FIRST = 27;
61+
private static final int TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_FIRST = 28;
62+
private static final int TYPE_DECREASING_UINT_9 = 32;
63+
private static final int TYPE_DECREASING_UINT_1 = 40;
64+
private static final int TYPE_DECREASING_NEG_INT_8 = 41;
65+
private static final int TYPE_DECREASING_NEG_INT_1 = 48;
66+
private static final int TYPE_DECREASING_POS_INT_1 = 49;
67+
private static final int TYPE_DECREASING_POS_INT_8 = 56;
68+
private static final int TYPE_DECREASING_STRING = 57;
69+
private static final int TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_LAST = 59;
70+
private static final int TYPE_NULL_ORDERED_LAST = 60;
71+
private static final int TYPE_NEG_DOUBLE_8 = 66;
72+
private static final int TYPE_NEG_DOUBLE_1 = 73;
73+
private static final int TYPE_POS_DOUBLE_1 = 74;
74+
private static final int TYPE_POS_DOUBLE_8 = 81;
75+
private static final int TYPE_DECREASING_NEG_DOUBLE_8 = 82;
76+
private static final int TYPE_DECREASING_NEG_DOUBLE_1 = 89;
77+
private static final int TYPE_DECREASING_POS_DOUBLE_1 = 90;
78+
private static final int TYPE_DECREASING_POS_DOUBLE_8 = 97;
79+
80+
// EscapeChar enum values
81+
private static final byte ASCENDING_ZERO_ESCAPE = (byte) 0xf0;
82+
private static final byte ASCENDING_FF_ESCAPE = (byte) 0x10;
83+
private static final byte SEP = (byte) 0x78; // 'x'
84+
85+
// For AppendCompositeTag
86+
private static final int K_OBJECT_EXISTENCE_TAG = 0x7e;
87+
private static final int K_MAX_FIELD_TAG = 0xffff;
88+
89+
public static void appendCompositeTag(ByteArrayOutputStream out, int tag) {
90+
if (tag == K_OBJECT_EXISTENCE_TAG || tag <= 0 || tag > K_MAX_FIELD_TAG) {
91+
throw new IllegalArgumentException("Invalid tag value: " + tag);
92+
}
93+
94+
if (tag < 16) {
95+
// Short tag: 000 TTTT S (S is LSB of tag, but here tag is original, so S=0)
96+
// Encodes as (tag << 1)
97+
out.write((byte) (tag << 1));
98+
} else {
99+
// Long tag
100+
int shiftedTag = tag << 1; // LSB is 0 for prefix successor
101+
if (shiftedTag < (1 << (5 + 8))) { // Original tag < 4096
102+
// Header: num_extra_bytes=1 (01xxxxx), P=payload bits from tag
103+
// (1 << 5) is 00100000
104+
// (shiftedTag >> 8) are the 5 MSBs of the payload part of the tag
105+
out.write((byte) ((1 << 5) | (shiftedTag >> 8)));
106+
out.write((byte) (shiftedTag & 0xFF));
107+
} else { // Original tag >= 4096 and <= K_MAX_FIELD_TAG (65535)
108+
// Header: num_extra_bytes=2 (10xxxxx)
109+
// (2 << 5) is 01000000
110+
out.write((byte) ((2 << 5) | (shiftedTag >> 16)));
111+
out.write((byte) ((shiftedTag >> 8) & 0xFF));
112+
out.write((byte) (shiftedTag & 0xFF));
113+
}
114+
}
115+
}
116+
117+
public static void appendNullOrderedFirst(ByteArrayOutputStream out) {
118+
out.write((byte) (IS_KEY | TYPE_NULL_ORDERED_FIRST));
119+
out.write((byte) 0);
120+
}
121+
122+
public static void appendNullOrderedLast(ByteArrayOutputStream out) {
123+
out.write((byte) (IS_KEY | TYPE_NULL_ORDERED_LAST));
124+
out.write((byte) 0);
125+
}
126+
127+
public static void appendNotNullMarkerNullOrderedFirst(ByteArrayOutputStream out) {
128+
out.write((byte) (IS_KEY | TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_FIRST));
129+
}
130+
131+
public static void appendNotNullMarkerNullOrderedLast(ByteArrayOutputStream out) {
132+
out.write((byte) (IS_KEY | TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_LAST));
133+
}
134+
135+
public static void appendUnsignedIntIncreasing(ByteArrayOutputStream out, long val) {
136+
if (val < 0) {
137+
throw new IllegalArgumentException("Unsigned int cannot be negative: " + val);
138+
}
139+
byte[] buf = new byte[9]; // Max 9 bytes for value payload
140+
int len = 0;
141+
142+
long tempVal = val;
143+
buf[8 - len] = (byte) ((tempVal & 0x7F) << 1); // LSB is prefix-successor bit (0)
144+
tempVal >>= 7;
145+
len++;
146+
147+
while (tempVal > 0) {
148+
buf[8 - len] = (byte) (tempVal & 0xFF);
149+
tempVal >>= 8;
150+
len++;
151+
}
152+
153+
out.write((byte) (IS_KEY | (TYPE_UINT_1 + len - 1)));
154+
for (int i = 0; i < len; i++) {
155+
out.write((byte) (buf[8 - len + 1 + i] & 0xFF));
156+
}
157+
}
158+
159+
public static void appendUnsignedIntDecreasing(ByteArrayOutputStream out, long val) {
160+
if (val < 0) {
161+
throw new IllegalArgumentException("Unsigned int cannot be negative: " + val);
162+
}
163+
byte[] buf = new byte[9];
164+
int len = 0;
165+
long tempVal = val;
166+
167+
// InvertByte(val & 0x7f) << 1
168+
buf[8 - len] = (byte) ((~(tempVal & 0x7F) & 0x7F) << 1);
169+
tempVal >>= 7;
170+
len++;
171+
172+
while (tempVal > 0) {
173+
buf[8 - len] = (byte) (~(tempVal & 0xFF));
174+
tempVal >>= 8;
175+
len++;
176+
}
177+
// If val was 0, loop doesn't run for len > 1. If len is still 1, all bits of tempVal (0) are
178+
// covered.
179+
// If val was large, but remaining tempVal became 0, this is correct.
180+
// If tempVal was 0 initially, buf[8] has (~0 & 0x7f) << 1. len = 1.
181+
// If tempVal was >0 but became 0 after some shifts, buf[8-len] has inverted last byte.
182+
183+
out.write((byte) (IS_KEY | (TYPE_DECREASING_UINT_1 - len + 1)));
184+
for (int i = 0; i < len; i++) {
185+
out.write((byte) (buf[8 - len + 1 + i] & 0xFF));
186+
}
187+
}
188+
189+
private static void appendIntInternal(
190+
ByteArrayOutputStream out, long val, boolean decreasing, boolean isDouble) {
191+
if (decreasing) {
192+
val = ~val;
193+
}
194+
195+
byte[] buf = new byte[8]; // Max 8 bytes for payload
196+
int len = 0;
197+
long tempVal = val;
198+
199+
if (tempVal >= 0) {
200+
buf[7 - len] = (byte) ((tempVal & 0x7F) << 1);
201+
tempVal >>= 7;
202+
len++;
203+
while (tempVal > 0) {
204+
buf[7 - len] = (byte) (tempVal & 0xFF);
205+
tempVal >>= 8;
206+
len++;
207+
}
208+
} else { // tempVal < 0
209+
// For negative numbers, extend sign bit after shifting
210+
buf[7 - len] = (byte) ((tempVal & 0x7F) << 1);
211+
// Simulate sign extension for right shift of negative number
212+
// (x >> 7) | 0xFE00000000000000ULL; (if x has 64 bits)
213+
// In Java, right shift `>>` on negative longs performs sign extension.
214+
tempVal >>= 7;
215+
len++;
216+
while (tempVal != -1L) { // Loop until all remaining bits are 1s (sign extension)
217+
buf[7 - len] = (byte) (tempVal & 0xFF);
218+
tempVal >>= 8;
219+
len++;
220+
if (len > 8) throw new AssertionError("Signed int encoding overflow");
221+
}
222+
}
223+
224+
int type;
225+
if (val >= 0) { // Original val before potential bit-negation for decreasing
226+
if (!decreasing) {
227+
type = isDouble ? (TYPE_POS_DOUBLE_1 + len - 1) : (TYPE_POS_INT_1 + len - 1);
228+
} else {
229+
type =
230+
isDouble
231+
? (TYPE_DECREASING_POS_DOUBLE_1 + len - 1)
232+
: (TYPE_DECREASING_POS_INT_1 + len - 1);
233+
}
234+
} else {
235+
if (!decreasing) {
236+
type = isDouble ? (TYPE_NEG_DOUBLE_1 - len + 1) : (TYPE_NEG_INT_1 - len + 1);
237+
} else {
238+
type =
239+
isDouble
240+
? (TYPE_DECREASING_NEG_DOUBLE_1 - len + 1)
241+
: (TYPE_DECREASING_NEG_INT_1 - len + 1);
242+
}
243+
}
244+
out.write((byte) (IS_KEY | type));
245+
for (int i = 0; i < len; i++) {
246+
out.write((byte) (buf[7 - len + 1 + i] & 0xFF));
247+
}
248+
}
249+
250+
public static void appendIntIncreasing(ByteArrayOutputStream out, long value) {
251+
appendIntInternal(out, value, false, false);
252+
}
253+
254+
public static void appendIntDecreasing(ByteArrayOutputStream out, long value) {
255+
appendIntInternal(out, value, true, false);
256+
}
257+
258+
public static void appendDoubleIncreasing(ByteArrayOutputStream out, double value) {
259+
long enc = Double.doubleToRawLongBits(value);
260+
if (enc < 0) {
261+
enc =
262+
Long.MIN_VALUE
263+
- enc; // kint64min - enc (equivalent to ~enc for negative values due to 2's
264+
// complement)
265+
}
266+
appendIntInternal(out, enc, false, true);
267+
}
268+
269+
public static void appendDoubleDecreasing(ByteArrayOutputStream out, double value) {
270+
long enc = Double.doubleToRawLongBits(value);
271+
if (enc < 0) {
272+
enc = Long.MIN_VALUE - enc;
273+
}
274+
appendIntInternal(out, enc, true, true);
275+
}
276+
277+
private static void appendByteSequence(
278+
ByteArrayOutputStream out, byte[] bytes, boolean decreasing) {
279+
out.write((byte) (IS_KEY | (decreasing ? TYPE_DECREASING_STRING : TYPE_STRING)));
280+
281+
for (byte b : bytes) {
282+
byte currentByte = decreasing ? (byte) ~b : b;
283+
int unsignedByte = currentByte & 0xFF;
284+
if (unsignedByte == 0x00) {
285+
out.write((byte) 0x00);
286+
out.write(
287+
decreasing
288+
? ASCENDING_ZERO_ESCAPE
289+
: ASCENDING_ZERO_ESCAPE); // After inversion, 0xFF becomes 0x00. Escape for 0x00
290+
// (inverted) is F0.
291+
// If increasing, 0x00 -> 0x00 F0.
292+
} else if (unsignedByte == 0xFF) {
293+
out.write((byte) 0xFF);
294+
out.write(
295+
decreasing
296+
? ASCENDING_FF_ESCAPE
297+
: ASCENDING_FF_ESCAPE); // After inversion, 0x00 becomes 0xFF. Escape for 0xFF
298+
// (inverted) is 0x10.
299+
// If increasing, 0xFF -> 0xFF 0x10.
300+
} else {
301+
out.write((byte) unsignedByte);
302+
}
303+
}
304+
// Terminator
305+
out.write((byte) (decreasing ? 0xFF : 0x00));
306+
out.write(SEP);
307+
}
308+
309+
public static void appendStringIncreasing(ByteArrayOutputStream out, String value) {
310+
appendByteSequence(out, value.getBytes(StandardCharsets.UTF_8), false);
311+
}
312+
313+
public static void appendStringDecreasing(ByteArrayOutputStream out, String value) {
314+
appendByteSequence(out, value.getBytes(StandardCharsets.UTF_8), true);
315+
}
316+
317+
public static void appendBytesIncreasing(ByteArrayOutputStream out, byte[] value) {
318+
appendByteSequence(out, value, false);
319+
}
320+
321+
public static void appendBytesDecreasing(ByteArrayOutputStream out, byte[] value) {
322+
appendByteSequence(out, value, true);
323+
}
324+
325+
/**
326+
* Encodes a timestamp as 12 bytes: 8 bytes for seconds since epoch (with offset to handle
327+
* negative), 4 bytes for nanoseconds.
328+
*/
329+
public static byte[] encodeTimestamp(long seconds, int nanos) {
330+
// Add offset to make negative seconds sort correctly
331+
long kSecondsOffset = 1L << 63;
332+
long hi = seconds + kSecondsOffset;
333+
int lo = nanos;
334+
335+
byte[] buf = new byte[12];
336+
// Big-endian encoding
337+
buf[0] = (byte) (hi >> 56);
338+
buf[1] = (byte) (hi >> 48);
339+
buf[2] = (byte) (hi >> 40);
340+
buf[3] = (byte) (hi >> 32);
341+
buf[4] = (byte) (hi >> 24);
342+
buf[5] = (byte) (hi >> 16);
343+
buf[6] = (byte) (hi >> 8);
344+
buf[7] = (byte) hi;
345+
buf[8] = (byte) (lo >> 24);
346+
buf[9] = (byte) (lo >> 16);
347+
buf[10] = (byte) (lo >> 8);
348+
buf[11] = (byte) lo;
349+
return buf;
350+
}
351+
352+
/** Encodes a UUID (128-bit) as 16 bytes in big-endian order. */
353+
public static byte[] encodeUuid(long high, long low) {
354+
byte[] buf = new byte[16];
355+
// Big-endian encoding
356+
buf[0] = (byte) (high >> 56);
357+
buf[1] = (byte) (high >> 48);
358+
buf[2] = (byte) (high >> 40);
359+
buf[3] = (byte) (high >> 32);
360+
buf[4] = (byte) (high >> 24);
361+
buf[5] = (byte) (high >> 16);
362+
buf[6] = (byte) (high >> 8);
363+
buf[7] = (byte) high;
364+
buf[8] = (byte) (low >> 56);
365+
buf[9] = (byte) (low >> 48);
366+
buf[10] = (byte) (low >> 40);
367+
buf[11] = (byte) (low >> 32);
368+
buf[12] = (byte) (low >> 24);
369+
buf[13] = (byte) (low >> 16);
370+
buf[14] = (byte) (low >> 8);
371+
buf[15] = (byte) low;
372+
return buf;
373+
}
374+
}

0 commit comments

Comments
 (0)