Skip to content

Commit dca81e2

Browse files
authored
Merge pull request #86 from tarterp/add_chrysalis_hybrid_murmur
added chrysalis hybrid murmur
2 parents 69abce3 + f460c49 commit dca81e2

1 file changed

Lines changed: 122 additions & 0 deletions

File tree

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
DESCRIPTION = '''
2+
A hybrid hashing algorithm that processes short strings with a standard
3+
iterative loop and switches to a parallelized four-accumulator strategy for
4+
strings of 16 bytes or more. The intermediate result is finalized using a
5+
modified MurmurHash3 avalanche mixer to produce the output.
6+
'''
7+
TYPE = 'unsigned_int'
8+
TEST_1 = 3873075608
9+
10+
def rol4(value, count):
11+
"""Rotate Left 32-bit"""
12+
count %= 32
13+
value &= 0xFFFFFFFF
14+
return ((value << count) | (value >> (32 - count))) & 0xFFFFFFFF
15+
16+
def ror4(value, count):
17+
"""Rotate Right 32-bit"""
18+
count %= 32
19+
value &= 0xFFFFFFFF
20+
return ((value >> count) | (value << (32 - count))) & 0xFFFFFFFF
21+
22+
def hash(data):
23+
24+
# Convert string to list of integer bytes
25+
if isinstance(data, str):
26+
data = [ord(c) for c in data]
27+
else:
28+
data = list(data)
29+
30+
data_len = len(data)
31+
idx = 0
32+
seed = 0
33+
34+
# ---------------------------------------------------------
35+
# Path 1: Long Strings (>= 16 chars)
36+
# ---------------------------------------------------------
37+
if data_len >= 0x10:
38+
accumulator_1 = 0x2D10317
39+
accumulator_2 = 0x64998966
40+
accumulator_3 = 0xDEADBEEF
41+
accumulator_4 = 0x4076453E
42+
43+
# Process in blocks of 4 bytes
44+
while data_len >= 4:
45+
# Accumulator 1
46+
term = (0x9E3779B1 * accumulator_1) & 0xFFFFFFFF
47+
sub = (0x3B5C4B9 * data[idx]) & 0xFFFFFFFF
48+
accumulator_1 = rol4((term - sub) & 0xFFFFFFFF, 13)
49+
50+
# Accumulator 2
51+
term = (0x9E3779B1 * accumulator_2) & 0xFFFFFFFF
52+
sub = (0x3B5C4B9 * data[idx + 1]) & 0xFFFFFFFF
53+
accumulator_2 = rol4((term - sub) & 0xFFFFFFFF, 13)
54+
55+
# Accumulator 3
56+
term = (0x9E3779B1 * accumulator_3) & 0xFFFFFFFF
57+
sub = (0x3B5C4B9 * data[idx + 2]) & 0xFFFFFFFF
58+
accumulator_3 = rol4((term - sub) & 0xFFFFFFFF, 13)
59+
60+
# Accumulator 4
61+
v12 = data[idx + 3]
62+
term = (0x9E3779B1 * accumulator_4) & 0xFFFFFFFF
63+
sub = (0x3B5C4B9 * v12) & 0xFFFFFFFF
64+
accumulator_4 = rol4((term - sub) & 0xFFFFFFFF, 13)
65+
66+
idx += 4
67+
# CRITICAL: The C code decrements the length variable here.
68+
# This affects the loop condition for the "Tail Loop" below.
69+
data_len -= 4
70+
71+
# Merge accumulators into v13
72+
v13 = (rol4(accumulator_1, 1) +
73+
rol4(accumulator_2, 7) +
74+
rol4(accumulator_3, 12) +
75+
ror4(accumulator_4, 14)) & 0xFFFFFFFF
76+
77+
# Calculate specific seed for long strings
78+
# seed = 0x842A6D03 - 0x61C8864F * v13
79+
mult_res = (0x61C8864F * v13) & 0xFFFFFFFF
80+
seed = (0x842A6D03 - mult_res) & 0xFFFFFFFF
81+
82+
# ---------------------------------------------------------
83+
# Path 2: Short Strings (< 16 chars)
84+
# ---------------------------------------------------------
85+
else:
86+
seed = 0xF50426A0
87+
88+
# ---------------------------------------------------------
89+
# Common Tail Loop
90+
# ---------------------------------------------------------
91+
# NOTE: If the "Long String" path was taken, 'idx' is now large (e.g., 16)
92+
# and 'data_len' is small (e.g., 2).
93+
# Since idx >= data_len, this loop is SKIPPED for long strings.
94+
# This loop ONLY runs fully for short strings.
95+
96+
i = (idx + seed) & 0xFFFFFFFF
97+
98+
while idx < data_len:
99+
v16 = data[idx]
100+
idx += 1
101+
102+
# i = 0x9E3779B1 * __ROL4__(i + 0x165667B1 * v16, 11)
103+
term_inner = (i + (0x165667B1 * v16)) & 0xFFFFFFFF
104+
rotated = rol4(term_inner, 11)
105+
i = (0x9E3779B1 * rotated) & 0xFFFFFFFF
106+
107+
# ---------------------------------------------------------
108+
# Final Mixing
109+
# ---------------------------------------------------------
110+
# Term 1: (0x85EBCA77 * (i ^ (i >> 15)))
111+
term1 = (0x85EBCA77 * (i ^ (i >> 15))) & 0xFFFFFFFF
112+
113+
# Term 2: term1 ^ (term1 >> 13)
114+
term2 = term1 ^ (term1 >> 13)
115+
116+
# Term 3: 0xC2B2AE3D * term2
117+
term3 = (0xC2B2AE3D * term2) & 0xFFFFFFFF
118+
119+
# Final: term3 ^ (term3 >> 16)
120+
final_hash = term3 ^ (term3 >> 16)
121+
122+
return final_hash

0 commit comments

Comments
 (0)