Skip to content

Commit 5d2b41d

Browse files
committed
saving.
1 parent 4d2f7ee commit 5d2b41d

2 files changed

Lines changed: 187 additions & 0 deletions

File tree

README.md

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,62 @@ The following additional types are implemented, but less tested:
2727

2828
## Usage
2929

30+
31+
To use the XOR and Binary Fuse filters, first prepare an array of keys, then construct the filter:
32+
33+
```java
34+
import org.fastfilter.xor.Xor8;
35+
import org.fastfilter.xor.Xor16;
36+
import org.fastfilter.xor.XorBinaryFuse8;
37+
import org.fastfilter.xor.XorBinaryFuse16;
38+
39+
// Example keys
40+
long[] keys = {1, 2, 3, 4, 5};
41+
42+
// Construct XOR filters
43+
Xor8 xor8 = Xor8.construct(keys);
44+
Xor16 xor16 = Xor16.construct(keys);
45+
XorBinaryFuse8 xorBinaryFuse8 = XorBinaryFuse8.construct(keys);
46+
XorBinaryFuse16 xorBinaryFuse16 = XorBinaryFuse16.construct(keys);
47+
48+
// Check membership
49+
boolean mightContain = xor8.mayContain(1L); // true
50+
boolean mightContain2 = xor8.mayContain(6L); // false (with high probability)
51+
```
52+
53+
All filters implement the `Filter` interface and support the `mayContain(long key)` method to check if a key might be in the set. Note that false positives are possible, but false negatives are not.
54+
55+
56+
### Serialization and Deserialization
57+
58+
Filters can be serialized to and deserialized from a `ByteBuffer` for persistence or transmission:
59+
60+
```java
61+
import java.nio.ByteBuffer;
62+
63+
// Assuming you have a constructed filter, e.g., Xor8 xor8 = Xor8.construct(keys);
64+
65+
// Get the serialized size
66+
int size = xor8.getSerializedSize();
67+
68+
// Allocate a ByteBuffer
69+
ByteBuffer buffer = ByteBuffer.allocate(size);
70+
71+
// Serialize the filter
72+
xor8.serialize(buffer);
73+
74+
// Prepare buffer for reading (flip)
75+
buffer.flip();
76+
77+
// Deserialize the filter
78+
Xor8 deserializedXor8 = Xor8.deserialize(buffer);
79+
80+
// The deserialized filter behaves identically to the original
81+
boolean result = deserializedXor8.mayContain(1L); // true
82+
```
83+
84+
This allows saving filters to files, databases, or sending them over networks.
85+
3086
### Maven
3187

3288
When using Maven: The latest version, 1.0.4, is not yet available on Maven central, see [issue #48](https://github.com/FastFilter/fastfilter_java/issues/48). However, it is available at https://jitpack.io/:
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
package org.fastfilter.xor;
2+
3+
import static org.junit.Assert.assertTrue;
4+
5+
import java.util.HashSet;
6+
import java.util.Random;
7+
import java.util.Set;
8+
9+
import org.fastfilter.Filter;
10+
import org.junit.Test;
11+
12+
public class StringFilters {
13+
14+
private static final int NUM_STRINGS = 100_000;
15+
private static final int NUM_TEST_STRINGS = 1_000;
16+
private static final Random random = new Random(42);
17+
18+
private static final long[] keys = generateKeys();
19+
private static final long[] testKeys = generateTestKeys();
20+
21+
private static long[] generateKeys() {
22+
String[] strings = new String[NUM_STRINGS];
23+
for (int i = 0; i < NUM_STRINGS; i++) {
24+
strings[i] = generateRandomString();
25+
}
26+
long[] k = new long[NUM_STRINGS];
27+
for (int i = 0; i < NUM_STRINGS; i++) {
28+
k[i] = hashString(strings[i]);
29+
}
30+
checkUniqueness(k, "keys");
31+
return k;
32+
}
33+
34+
private static long[] generateTestKeys() {
35+
String[] strings = new String[NUM_TEST_STRINGS];
36+
for (int i = 0; i < NUM_TEST_STRINGS; i++) {
37+
strings[i] = generateRandomString();
38+
}
39+
long[] k = new long[NUM_TEST_STRINGS];
40+
for (int i = 0; i < NUM_TEST_STRINGS; i++) {
41+
k[i] = hashString(strings[i]);
42+
}
43+
checkUniqueness(k, "test keys");
44+
return k;
45+
}
46+
47+
private static void checkUniqueness(long[] array, String name) {
48+
Set<Long> set = new HashSet<>();
49+
int collisions = 0;
50+
for (long l : array) {
51+
if (!set.add(l)) {
52+
collisions++;
53+
}
54+
}
55+
if (collisions > 0) {
56+
System.out.println("Warning: " + collisions + " hash collisions in " + name);
57+
} else {
58+
System.out.println("No hash collisions in " + name);
59+
}
60+
}
61+
62+
private static String generateRandomString() {
63+
int length = 5 + random.nextInt(16); // 5 to 20 chars
64+
StringBuilder sb = new StringBuilder(length);
65+
for (int i = 0; i < length; i++) {
66+
sb.append((char) ('a' + random.nextInt(26)));
67+
}
68+
return sb.toString();
69+
}
70+
71+
private static long hashString(String s) {
72+
long h = 0;
73+
for (char c : s.toCharArray()) {
74+
h = h * 31 + c;
75+
}
76+
return h;
77+
}
78+
79+
@Test
80+
public void testXor8() {
81+
testFilter(Xor8.class);
82+
}
83+
84+
@Test
85+
public void testXor16() {
86+
testFilter(Xor16.class);
87+
}
88+
89+
@Test
90+
public void testXorBinaryFuse8() {
91+
testFilter(XorBinaryFuse8.class);
92+
}
93+
94+
@Test
95+
public void testXorBinaryFuse16() {
96+
testFilter(XorBinaryFuse16.class);
97+
}
98+
99+
@Test
100+
public void testXorBinaryFuse32() {
101+
testFilter(XorBinaryFuse32.class);
102+
}
103+
104+
private void testFilter(Class<?> filterClass) {
105+
// Construct filter
106+
Filter filter;
107+
try {
108+
filter = (Filter) filterClass.getMethod("construct", long[].class).invoke(null, (Object) keys);
109+
} catch (Exception e) {
110+
throw new RuntimeException(e);
111+
}
112+
113+
// Check all keys are in the filter
114+
for (int i = 0; i < NUM_STRINGS; i++) {
115+
assertTrue("Key " + i + " should be in filter", filter.mayContain(keys[i]));
116+
}
117+
118+
// Check false positives on test keys
119+
int falsePositives = 0;
120+
for (int i = 0; i < NUM_TEST_STRINGS; i++) {
121+
if (filter.mayContain(testKeys[i])) {
122+
falsePositives++;
123+
}
124+
}
125+
126+
// Expect low false positive rate (less than 1% for most filters)
127+
double fpp = (double) falsePositives / NUM_TEST_STRINGS;
128+
System.out.println(filterClass.getSimpleName() + " false positive rate: " + fpp);
129+
assertTrue("False positive rate should be low: " + fpp, fpp < 0.01); // Allow up to 1%
130+
}
131+
}

0 commit comments

Comments
 (0)