Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ public void generate(
writer.addImport("java.util.Collections");
writer.addImport("java.util.List");
writer.addImport("com.hedera.pbj.runtime.hashing.XXH3_64");
writer.addImport("com.hedera.pbj.runtime.hashing.XXH3_64.HashingWritableSequentialData");
writer.addImport("com.hedera.pbj.runtime.hashing.XXH3FieldHash");
writer.addImport("com.hedera.pbj.runtime.hashing.SixtyFourBitHashable");
writer.addImport("static com.hedera.pbj.runtime.hashing.XXH3FieldHash.*");

// Iterate over all the items in the protobuf schema
for (final var item : msgDef.messageBody().messageElement()) {
Expand Down Expand Up @@ -296,11 +298,11 @@ private void generateClass(
final boolean isComparable,
final ContextualLookupHelper lookupHelper)
throws IOException {
final String implementsComparable;
final String implementsCode;
if (isComparable) {
implementsComparable = "implements Comparable<$javaRecordName> ";
implementsCode = "implements SixtyFourBitHashable, Comparable<$javaRecordName> ";
} else {
implementsComparable = "";
implementsCode = "implements SixtyFourBitHashable ";
}

final String staticModifier = Generator.isInner(msgDef) ? " static" : "";
Expand All @@ -324,7 +326,7 @@ private void generateClass(
.replace("$javaDocComment", javaDocComment)
.replace("$deprecated", deprecated)
.replace("$staticModifier", staticModifier)
.replace("$implementsComparable", implementsComparable)
.replace("$implementsComparable", implementsCode)
.replace("$javaRecordName", javaRecordName)
.replace("$bodyContent", bodyContent));
// spotless:on
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: Apache-2.0
package com.hedera.pbj.runtime;

import com.hedera.pbj.runtime.hashing.SixtyFourBitHashable;
import com.hedera.pbj.runtime.hashing.XXH3_64;
import com.hedera.pbj.runtime.io.buffer.Bytes;
import edu.umd.cs.findbugs.annotations.NonNull;
import java.util.Objects;
Expand All @@ -21,7 +23,7 @@
* @param bytes a list of the raw bytes of each occurrence of the field (e.g. for repeated fields)
*/
public record UnknownField(int field, @NonNull ProtoConstants wireType, @NonNull Bytes bytes)
implements Comparable<UnknownField> {
implements Comparable<UnknownField>, SixtyFourBitHashable {
/**
* A {@code Comparable<UnknownField>} implementation that sorts UnknownField objects by their `field` numbers
* in the increasing order. This comparator is used for maintaining a stable and deterministic order for any
Expand Down Expand Up @@ -52,27 +54,28 @@ public boolean equals(final Object o) {
* An `Object.hashCode()` implementation that computes a hash code using all the members of the UnknownField record:
* the `field`, the `wireType`, and the `bytes`.
* The implementation should remain stable over time because this is a public API.
* <p>
* This hash code has to match how the field would be hashed if it was a normal field in the schema
* </p>
*/
@Override
public int hashCode() {
int hashCode = 1;

hashCode = 31 * hashCode + Integer.hashCode(field);
hashCode = 31 * hashCode + Integer.hashCode(wireType.ordinal());
hashCode = 31 * hashCode + bytes.hashCode();

// Shifts: 30, 27, 16, 20, 5, 18, 10, 24, 30
hashCode += hashCode << 30;
hashCode ^= hashCode >>> 27;
hashCode += hashCode << 16;
hashCode ^= hashCode >>> 20;
hashCode += hashCode << 5;
hashCode ^= hashCode >>> 18;
hashCode += hashCode << 10;
hashCode ^= hashCode >>> 24;
hashCode += hashCode << 30;
return (int)hashCode64();
}

return hashCode;
/**
* A `SixtyFourBitHashable.hashCode64()` implementation that computes a 64-bit hash code using all the members of
* the UnknownField record: the `field`, the `wireType`, and the `bytes`.
* The implementation should remain stable over time because this is a public API.
* <p>
* This hash code has to match how the field would be hashed if it was a normal field in the schema
* </p>
*
* @return a 64-bit hash code for this UnknownField object
*/
@Override
public long hashCode64() {
return bytes.hashCode64();
Comment on lines +76 to +78
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this. But you want to also consider feeding the wire type and the field number into the hasher. Otherwise the hash code may not be as dispersed as we'd like.

}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.hedera.pbj.runtime.hashing;

/**
* Interface for objects that can be hashed to a 64-bit long value.
*/
public interface SixtyFourBitHashable {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: I have no strong opinion on this name and the current one does look logical, but HashCode64 might be another option too as it's basically equal to the very method name introduced here.

/**
* Hash this object to a 64-bit long value.
*
* @return the 64-bit hash value
*/
long hashCode64();
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
package com.hedera.pbj.runtime.hashing;

import com.hedera.pbj.runtime.Utf8Tools;
import com.hedera.pbj.runtime.io.WritableSequentialData;
import com.hedera.pbj.runtime.io.buffer.BufferedData;
import com.hedera.pbj.runtime.io.buffer.Bytes;
Expand All @@ -24,7 +23,7 @@
* @see <a href="https://xxhash.com">xxhash</a>
*/
@SuppressWarnings({"DuplicatedCode", "NumericOverflow"})
public class XXH3_64 {
public final class XXH3_64 {
/**
* The default seed value used for hashing. ZERO is chosen as this is the default for xxhash and used in tools lile
* <code>xxhsum</code> command line tool.
Expand Down Expand Up @@ -490,11 +489,216 @@ public long hashBytesToLong(final byte[] input, final int off, final int length)
return finalizeHash(length, acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7);
}

/**
* Hashes a CharSequence as raw UTF16 to a 64-bit {@code long} value.
*
* <p>Equivalent to {@code hashToLong(input, (b, f) -> f.putBytes(b, off, len))}.
*
* @param charSequence the character sequence to hash
* @return the hash value
*/
public long hashCharsToLong(final CharSequence charSequence) {
int len = charSequence.length();
if (len <= 8) {
if (len > 4) {
long lo = getLong(charSequence, 0) ^ bitflip34;
long hi = getLong(charSequence, len - 4) ^ bitflip56;
long acc = (len << 1) + Long.reverseBytes(lo) + hi + mix(lo, hi);
return avalanche3(acc);
}
if (len >= 2) {
long input1 = getInt(charSequence, 0);
long input2 = getInt(charSequence, len - 2);
long keyed = (input2 & 0xFFFFFFFFL) ^ (input1 << 32) ^ bitflip12;
return rrmxmx(keyed, len << 1);
}
if (len != 0) {
long c = charSequence.charAt(0);
long combined = (c << 16) | (c >>> 8) | 512L;
return avalanche64(combined ^ bitflip00);
}
return hash0;
}
if (len <= 64) {
long acc = len * (INIT_ACC_1 << 1);

if (len > 16) {
if (len > 32) {
if (len > 48) {
acc += mix16B(charSequence, 24, secret12, secret13);
acc += mix16B(charSequence, len - 32, secret14, secret15);
}
acc += mix16B(charSequence, 16, secret08, secret09);
acc += mix16B(charSequence, len - 24, secret10, secret11);
}
acc += mix16B(charSequence, 8, secret04, secret05);
acc += mix16B(charSequence, len - 16, secret06, secret07);
}
acc += mix16B(charSequence, 0, secret00, secret01);
acc += mix16B(charSequence, len - 8, secret02, secret03);

return avalanche3(acc);
}
if (len <= 120) {
long acc = len * (INIT_ACC_1 << 1);
acc += mix16B(charSequence, 0, secret00, secret01);
acc += mix16B(charSequence, 8, secret02, secret03);
acc += mix16B(charSequence, 16, secret04, secret05);
acc += mix16B(charSequence, 24, secret06, secret07);
acc += mix16B(charSequence, 32, secret08, secret09);
acc += mix16B(charSequence, 40, secret10, secret11);
acc += mix16B(charSequence, 48, secret12, secret13);
acc += mix16B(charSequence, 56, secret14, secret15);

acc = avalanche3(acc);

if (len >= 72) {
acc += mix16B(charSequence, 64, secShift00, secShift01);
if (len >= 80) {
acc += mix16B(charSequence, 72, secShift02, secShift03);
if (len >= 88) {
acc += mix16B(charSequence, 80, secShift04, secShift05);
if (len >= 96) {
acc += mix16B(charSequence, 88, secShift06, secShift07);
if (len >= 104) {
acc += mix16B(charSequence, 96, secShift08, secShift09);
if (len >= 112) {
acc += mix16B(charSequence, 104, secShift10, secShift11);
if (len >= 120) acc += mix16B(charSequence, 112, secShift12, secShift13);
}
}
}
}
}
}
acc += mix16B(charSequence, len - 8, secShift14, secShift15);
return avalanche3(acc);
}

long acc0 = INIT_ACC_0;
long acc1 = INIT_ACC_1;
long acc2 = INIT_ACC_2;
long acc3 = INIT_ACC_3;
long acc4 = INIT_ACC_4;
long acc5 = INIT_ACC_5;
long acc6 = INIT_ACC_6;
long acc7 = INIT_ACC_7;

final int nbBlocks = (len - 1) >>> (BLOCK_LEN_EXP - 1);
for (int n = 0; n < nbBlocks; n++) {
final int offBlock = n << (BLOCK_LEN_EXP - 1);
for (int s = 0; s < 16; s += 1) {
int offStripe = offBlock + (s << 5);

long b0 = getLong(charSequence, offStripe);
long b1 = getLong(charSequence, offStripe + 4);
long b2 = getLong(charSequence, offStripe + 4 * 2);
long b3 = getLong(charSequence, offStripe + 4 * 3);
long b4 = getLong(charSequence, offStripe + 4 * 4);
long b5 = getLong(charSequence, offStripe + 4 * 5);
long b6 = getLong(charSequence, offStripe + 4 * 6);
long b7 = getLong(charSequence, offStripe + 4 * 7);

acc0 += b1 + contrib(b0, secret[s]);
acc1 += b0 + contrib(b1, secret[s + 1]);
acc2 += b3 + contrib(b2, secret[s + 2]);
acc3 += b2 + contrib(b3, secret[s + 3]);
acc4 += b5 + contrib(b4, secret[s + 4]);
acc5 += b4 + contrib(b5, secret[s + 5]);
acc6 += b7 + contrib(b6, secret[s + 6]);
acc7 += b6 + contrib(b7, secret[s + 7]);
}

acc0 = mixAcc(acc0, secret16);
acc1 = mixAcc(acc1, secret17);
acc2 = mixAcc(acc2, secret18);
acc3 = mixAcc(acc3, secret19);
acc4 = mixAcc(acc4, secret20);
acc5 = mixAcc(acc5, secret21);
acc6 = mixAcc(acc6, secret22);
acc7 = mixAcc(acc7, secret23);
}

final int nbStripes = ((len - 1) - (nbBlocks << (BLOCK_LEN_EXP - 1))) >>> 5;
final int offBlock = nbBlocks << (BLOCK_LEN_EXP - 1);
for (int s = 0; s < nbStripes; s++) {
int offStripe = offBlock + (s << 5);

long b0 = getLong(charSequence, offStripe);
long b1 = getLong(charSequence, offStripe + 4);
long b2 = getLong(charSequence, offStripe + 4 * 2);
long b3 = getLong(charSequence, offStripe + 4 * 3);
long b4 = getLong(charSequence, offStripe + 4 * 4);
long b5 = getLong(charSequence, offStripe + 4 * 5);
long b6 = getLong(charSequence, offStripe + 4 * 6);
long b7 = getLong(charSequence, offStripe + 4 * 7);

acc0 += b1 + contrib(b0, secret[s]);
acc1 += b0 + contrib(b1, secret[s + 1]);
acc2 += b3 + contrib(b2, secret[s + 2]);
acc3 += b2 + contrib(b3, secret[s + 3]);
acc4 += b5 + contrib(b4, secret[s + 4]);
acc5 += b4 + contrib(b5, secret[s + 5]);
acc6 += b7 + contrib(b6, secret[s + 6]);
acc7 += b6 + contrib(b7, secret[s + 7]);
}

{
int offStripe = len - 32;

long b0 = getLong(charSequence, offStripe);
long b1 = getLong(charSequence, offStripe + 4);
long b2 = getLong(charSequence, offStripe + 4 * 2);
long b3 = getLong(charSequence, offStripe + 4 * 3);
long b4 = getLong(charSequence, offStripe + 4 * 4);
long b5 = getLong(charSequence, offStripe + 4 * 5);
long b6 = getLong(charSequence, offStripe + 4 * 6);
long b7 = getLong(charSequence, offStripe + 4 * 7);

acc0 += b1 + contrib(b0, secShift16);
acc1 += b0 + contrib(b1, secShift17);
acc2 += b3 + contrib(b2, secShift18);
acc3 += b2 + contrib(b3, secShift19);
acc4 += b5 + contrib(b4, secShift20);
acc5 += b4 + contrib(b5, secShift21);
acc6 += b7 + contrib(b6, secShift22);
acc7 += b6 + contrib(b7, secShift23);
}

return finalizeHash((long) len << 1, acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7);
}

// =============================================================================================================
// Private methods
// =============================================================================================================

public static long rrmxmx(long h64, final long length) {

/**
* Reads a {@code long} value from four UTF16 characters from a {@link CharSequence} with given offset.
*
* @param charSequence a char sequence
* @param off an offset
* @return the value
*/
private static long getLong(final CharSequence charSequence, int off) {
return (long) charSequence.charAt(off)
| ((long) charSequence.charAt(off + 1) << 16)
| ((long) charSequence.charAt(off + 2) << 32)
| ((long) charSequence.charAt(off + 3) << 48);
}

/**
* Reads an {@code int} value from two UTF16 characters from a {@link CharSequence} with given offset.
*
* @param charSequence a char sequence
* @param off an offset
* @return the value
*/
private static int getInt(CharSequence charSequence, int off) {
return (int) charSequence.charAt(off) | ((int) charSequence.charAt(off + 1) << 16);
}

static long rrmxmx(long h64, final long length) {
h64 ^= Long.rotateLeft(h64, 49) ^ Long.rotateLeft(h64, 24);
h64 *= 0x9FB21C651E98DF25L;
h64 ^= (h64 >>> 35) + length;
Expand All @@ -508,15 +712,22 @@ private static long mix16B(final byte[] input, final int offIn, final long sec0,
return mix2Accs(lo, hi, sec0, sec1);
}

public static long avalanche64(long h64) {

private static long mix16B( final CharSequence input, final int offIn, final long sec0, final long sec1) {
long lo = getLong(input, offIn);
long hi = getLong(input, offIn + 4);
return mix2Accs(lo, hi, sec0, sec1);
}

static long avalanche64(long h64) {
h64 ^= h64 >>> 33;
h64 *= INIT_ACC_2;
h64 ^= h64 >>> 29;
h64 *= INIT_ACC_3;
return h64 ^ (h64 >>> 32);
}

public static long avalanche3(long h64) {
static long avalanche3(long h64) {
h64 ^= h64 >>> 37;
h64 *= 0x165667919E3779F9L;
return h64 ^ (h64 >>> 32);
Expand All @@ -535,7 +746,7 @@ private static long mixAcc(long acc, long sec) {
return (acc ^ (acc >>> 47) ^ sec) * INIT_ACC_7;
}

public static long mix(long a, long b) {
static long mix(long a, long b) {
long x = a * b;
long y = Math.unsignedMultiplyHigh(a, b);
return x ^ y;
Expand All @@ -553,6 +764,10 @@ private long finalizeHash(
return avalanche3(result64);
}

/**
* A writable sequential data implementation that hashes data using the XXH3_64 algorithm.
* It buffers writes in bulk and processes them to compute the hash incrementally.
*/
public class HashingWritableSequentialData implements WritableSequentialData {
/** The size of the buffer used for writing data in bulk. */
private static final int BULK_SIZE = 256;
Expand Down
Loading
Loading