Skip to content

Commit 79218be

Browse files
committed
adding serialization/deserialization
1 parent df21d49 commit 79218be

4 files changed

Lines changed: 218 additions & 0 deletions

File tree

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,21 @@ about 0.0015%. The type is `binary_fuse16_t` and you may use it with
5454
functions such as `binary_fuse16_allocate`, `binary_fuse16_populate`,
5555
`binary_fuse8_contain` and `binary_fuse8_free`.
5656
57+
You may serialize the data as follows:
58+
59+
```C
60+
size_t buffer_size = binary_fuse16_serialization_bytes(&filter);
61+
char *buffer = (char*)malloc(buffer_size);
62+
binary_fuse16_serialize(&filter, buffer);
63+
binary_fuse16_free(&filter);
64+
binary_fuse16_deserialize(&filter, buffer);
65+
free(buffer);
66+
```
67+
68+
The serialization does not handle endianess: it is expected that you will serialize
69+
and deserialize on the little endian systems. (Big endian systems are vanishingly rare.)
70+
71+
5772
## C++ wrapper
5873

5974
If you want a C++ version, you can roll your own:

include/binaryfusefilter.h

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,103 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
734734
return true;
735735
}
736736

737+
static inline size_t binary_fuse16_serialization_bytes(binary_fuse16_t *filter) {
738+
return sizeof(filter->Seed) + sizeof(filter->SegmentLength) +
739+
sizeof(filter->SegmentLengthMask) + sizeof(filter->SegmentCount) +
740+
sizeof(filter->SegmentCountLength) + sizeof(filter->ArrayLength) +
741+
sizeof(uint16_t) * filter->ArrayLength;
742+
}
743+
744+
static inline size_t binary_fuse8_serialization_bytes(const binary_fuse8_t *filter) {
745+
return sizeof(filter->Seed) + sizeof(filter->SegmentLength) +
746+
sizeof(filter->SegmentCount) +
747+
sizeof(filter->SegmentCountLength) + sizeof(filter->ArrayLength) +
748+
sizeof(uint8_t) * filter->ArrayLength;
749+
}
750+
751+
// serialize a filter to a buffer, the buffer should have a capacity of at least
752+
// binary_fuse16_serialization_bytes(filter) bytes.
753+
// Native endianess only.
754+
static inline void binary_fuse16_serialize(const binary_fuse16_t *filter, char *buffer) {
755+
memcpy(buffer, &filter->Seed, sizeof(filter->Seed));
756+
buffer += sizeof(filter->Seed);
757+
memcpy(buffer, &filter->SegmentLength, sizeof(filter->SegmentLength));
758+
buffer += sizeof(filter->SegmentLength);
759+
memcpy(buffer, &filter->SegmentCount, sizeof(filter->SegmentCount));
760+
buffer += sizeof(filter->SegmentCount);
761+
memcpy(buffer, &filter->SegmentCountLength, sizeof(filter->SegmentCountLength));
762+
buffer += sizeof(filter->SegmentCountLength);
763+
memcpy(buffer, &filter->ArrayLength, sizeof(filter->ArrayLength));
764+
buffer += sizeof(filter->ArrayLength);
765+
memcpy(buffer, filter->Fingerprints, sizeof(filter->ArrayLength) * sizeof(uint16_t));
766+
}
737767

768+
// serialize a filter to a buffer, the buffer should have a capacity of at least
769+
// binary_fuse8_serialization_bytes(filter) bytes.
770+
// Native endianess only.
771+
static inline void binary_fuse8_serialize(const binary_fuse8_t *filter, char *buffer) {
772+
memcpy(buffer, &filter->Seed, sizeof(filter->Seed));
773+
buffer += sizeof(filter->Seed);
774+
memcpy(buffer, &filter->SegmentLength, sizeof(filter->SegmentLength));
775+
buffer += sizeof(filter->SegmentLength);
776+
memcpy(buffer, &filter->SegmentCount, sizeof(filter->SegmentCount));
777+
buffer += sizeof(filter->SegmentCount);
778+
memcpy(buffer, &filter->SegmentCountLength, sizeof(filter->SegmentCountLength));
779+
buffer += sizeof(filter->SegmentCountLength);
780+
memcpy(buffer, &filter->ArrayLength, sizeof(filter->ArrayLength));
781+
buffer += sizeof(filter->ArrayLength);
782+
memcpy(buffer, filter->Fingerprints, sizeof(filter->ArrayLength) * sizeof(uint8_t));
783+
}
738784

785+
// deserialize a filter from a buffer, returns true on success, false on failure.
786+
// The output will be reallocated, so the caller should call binary_fuse16_free(filter) before
787+
// if the filter was already allocated. The caller needs to call binary_fuse16_free(filter) after.
788+
// The number of bytes read is binary_fuse16_serialization_bytes(output).
789+
// Native endianess only.
790+
static inline bool binary_fuse16_deserialize(binary_fuse16_t * filter, const char *buffer) {
791+
memcpy(&filter->Seed, buffer, sizeof(filter->Seed));
792+
buffer += sizeof(filter->Seed);
793+
memcpy(&filter->SegmentLength, buffer, sizeof(filter->SegmentLength));
794+
buffer += sizeof(filter->SegmentLength);
795+
filter->SegmentLengthMask = filter->SegmentLength - 1;
796+
memcpy(&filter->SegmentCount, buffer, sizeof(filter->SegmentCount));
797+
buffer += sizeof(filter->SegmentCount);
798+
memcpy(&filter->SegmentCountLength, buffer, sizeof(filter->SegmentCountLength));
799+
buffer += sizeof(filter->SegmentCountLength);
800+
memcpy(&filter->ArrayLength, buffer, sizeof(filter->ArrayLength));
801+
buffer += sizeof(filter->ArrayLength);
802+
filter->Fingerprints = (uint16_t*)malloc(filter->ArrayLength * sizeof(uint16_t));
803+
if(filter->Fingerprints == NULL) {
804+
return false;
805+
}
806+
memcpy(filter->Fingerprints, buffer, sizeof(filter->ArrayLength) * sizeof(uint16_t));
807+
return true;
808+
}
809+
810+
811+
// deserialize a filter from a buffer, returns true on success, false on failure.
812+
// The output will be reallocated, so the caller should call binary_fuse8_free(filter) before
813+
// if the filter was already allocated. The caller needs to call binary_fuse8_free(filter) after.
814+
// The number of bytes read is binary_fuse8_serialization_bytes(output).
815+
// Native endianess only.
816+
static inline bool binary_fuse8_deserialize(binary_fuse8_t * filter, const char *buffer) {
817+
memcpy(&filter->Seed, buffer, sizeof(filter->Seed));
818+
buffer += sizeof(filter->Seed);
819+
memcpy(&filter->SegmentLength, buffer, sizeof(filter->SegmentLength));
820+
buffer += sizeof(filter->SegmentLength);
821+
filter->SegmentLengthMask = filter->SegmentLength - 1;
822+
memcpy(&filter->SegmentCount, buffer, sizeof(filter->SegmentCount));
823+
buffer += sizeof(filter->SegmentCount);
824+
memcpy(&filter->SegmentCountLength, buffer, sizeof(filter->SegmentCountLength));
825+
buffer += sizeof(filter->SegmentCountLength);
826+
memcpy(&filter->ArrayLength, buffer, sizeof(filter->ArrayLength));
827+
buffer += sizeof(filter->ArrayLength);
828+
filter->Fingerprints = (uint8_t*)malloc(filter->ArrayLength * sizeof(uint8_t));
829+
if(filter->Fingerprints == NULL) {
830+
return false;
831+
}
832+
memcpy(filter->Fingerprints, buffer, sizeof(filter->ArrayLength) * sizeof(uint8_t));
833+
return true;
834+
}
739835

740836
#endif

include/xorfilter.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,4 +1280,76 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter
12801280

12811281

12821282

1283+
uint64_t seed;
1284+
uint64_t blockLength;
1285+
static inline size_t xor16_serialization_bytes(xor16_t *filter) {
1286+
return sizeof(filter->seed) + sizeof(filter->blockLength) +
1287+
sizeof(uint16_t) * 3 * filter->blockLength;
1288+
}
1289+
1290+
static inline size_t xor8_serialization_bytes(const xor8_t *filter) {
1291+
return sizeof(filter->seed) + sizeof(filter->blockLength) +
1292+
sizeof(uint8_t) * 3 * filter->blockLength;
1293+
}
1294+
1295+
// serialize a filter to a buffer, the buffer should have a capacity of at least
1296+
// xor16_serialization_bytes(filter) bytes.
1297+
// Native endianess only.
1298+
static inline void xor16_serialize(const xor16_t *filter, char *buffer) {
1299+
memcpy(buffer, &filter->seed, sizeof(filter->seed));
1300+
buffer += sizeof(filter->seed);
1301+
memcpy(buffer, &filter->blockLength, sizeof(filter->blockLength));
1302+
buffer += sizeof(filter->blockLength);
1303+
memcpy(buffer, filter->fingerprints, sizeof(filter->blockLength) * 3 * sizeof(uint16_t));
1304+
}
1305+
1306+
// serialize a filter to a buffer, the buffer should have a capacity of at least
1307+
// xor8_serialization_bytes(filter) bytes.
1308+
// Native endianess only.
1309+
static inline void xor8_serialize(const xor8_t *filter, char *buffer) {
1310+
memcpy(buffer, &filter->seed, sizeof(filter->seed));
1311+
buffer += sizeof(filter->seed);
1312+
memcpy(buffer, &filter->blockLength, sizeof(filter->blockLength));
1313+
buffer += sizeof(filter->blockLength);
1314+
memcpy(buffer, filter->fingerprints, sizeof(filter->blockLength) * 3 * sizeof(uint8_t));
1315+
}
1316+
1317+
// deserialize a filter from a buffer, returns true on success, false on failure.
1318+
// The output will be reallocated, so the caller should call xor16_free(filter) before
1319+
// if the filter was already allocated. The caller needs to call xor16_free(filter) after.
1320+
// The number of bytes read is xor16_serialization_bytes(filter).
1321+
// Native endianess only.
1322+
static inline bool xor16_deserialize(xor16_t * filter, const char *buffer) {
1323+
memcpy(&filter->seed, buffer, sizeof(filter->seed));
1324+
buffer += sizeof(filter->seed);
1325+
memcpy(&filter->blockLength, buffer, sizeof(filter->blockLength));
1326+
buffer += sizeof(filter->blockLength);
1327+
filter->fingerprints = (uint16_t*)malloc(filter->blockLength * 3 * sizeof(uint16_t));
1328+
if(filter->fingerprints == NULL) {
1329+
return false;
1330+
}
1331+
memcpy(filter->fingerprints, buffer, sizeof(filter->blockLength) * 3 * sizeof(uint16_t));
1332+
return true;
1333+
}
1334+
1335+
1336+
// deserialize a filter from a buffer, returns true on success, false on failure.
1337+
// The output will be reallocated, so the caller should call xor8_free(filter) before
1338+
// if the filter was already allocated. The caller needs to call xor8_free(filter) after.
1339+
// The number of bytes read is xor8_serialization_bytes(filter).
1340+
// Native endianess only.
1341+
static inline bool xor8_deserialize(xor8_t * filter, const char *buffer) {
1342+
memcpy(&filter->seed, buffer, sizeof(filter->seed));
1343+
buffer += sizeof(filter->seed);
1344+
memcpy(&filter->blockLength, buffer, sizeof(filter->blockLength));
1345+
buffer += sizeof(filter->blockLength);
1346+
filter->fingerprints = (uint8_t*)malloc(filter->blockLength * 3 * sizeof(uint8_t));
1347+
if(filter->fingerprints == NULL) {
1348+
return false;
1349+
}
1350+
memcpy(filter->fingerprints, buffer, sizeof(filter->blockLength) * 3 * sizeof(uint8_t));
1351+
return true;
1352+
}
1353+
1354+
12831355
#endif

tests/unit.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ bool testxor8(size_t size) {
6363
}
6464
}
6565

66+
size_t buffer_size = xor8_serialization_bytes(&filter);
67+
char *buffer = (char*)malloc(buffer_size);
68+
xor8_serialize(&filter, buffer);
69+
xor8_free(&filter);
70+
xor8_deserialize(&filter, buffer);
71+
free(buffer);
72+
6673
size_t random_matches = 0;
6774
size_t trials = 10000000;
6875
for (size_t i = 0; i < trials; i++) {
@@ -102,6 +109,13 @@ bool testxor16(size_t size) {
102109
}
103110
}
104111

112+
size_t buffer_size = xor16_serialization_bytes(&filter);
113+
char *buffer = (char*)malloc(buffer_size);
114+
xor16_serialize(&filter, buffer);
115+
xor16_free(&filter);
116+
xor16_deserialize(&filter, buffer);
117+
free(buffer);
118+
105119
size_t random_matches = 0;
106120
size_t trials = 10000000;
107121
for (size_t i = 0; i < trials; i++) {
@@ -142,6 +156,13 @@ bool testbufferedxor16(size_t size) {
142156
}
143157
}
144158

159+
size_t buffer_size = xor16_serialization_bytes(&filter);
160+
char *buffer = (char*)malloc(buffer_size);
161+
xor16_serialize(&filter, buffer);
162+
xor16_free(&filter);
163+
xor16_deserialize(&filter, buffer);
164+
free(buffer);
165+
145166
size_t random_matches = 0;
146167
size_t trials = 10000000;
147168
for (size_t i = 0; i < trials; i++) {
@@ -181,6 +202,13 @@ bool testbinaryfuse8(size_t size) {
181202
}
182203
}
183204

205+
size_t buffer_size = binary_fuse8_serialization_bytes(&filter);
206+
char *buffer = (char*)malloc(buffer_size);
207+
binary_fuse8_serialize(&filter, buffer);
208+
binary_fuse8_free(&filter);
209+
binary_fuse8_deserialize(&filter, buffer);
210+
free(buffer);
211+
184212
size_t random_matches = 0;
185213
size_t trials = 10000000;
186214
for (size_t i = 0; i < trials; i++) {
@@ -222,6 +250,13 @@ bool testbinaryfuse16(size_t size) {
222250
}
223251
}
224252

253+
size_t buffer_size = binary_fuse16_serialization_bytes(&filter);
254+
char *buffer = (char*)malloc(buffer_size);
255+
binary_fuse16_serialize(&filter, buffer);
256+
binary_fuse16_free(&filter);
257+
binary_fuse16_deserialize(&filter, buffer);
258+
free(buffer);
259+
225260
size_t random_matches = 0;
226261
size_t trials = 10000000;
227262
for (size_t i = 0; i < trials; i++) {

0 commit comments

Comments
 (0)