Skip to content

Commit e0fa4e1

Browse files
committed
Added union and intersection
1 parent 2f3a62b commit e0fa4e1

2 files changed

Lines changed: 161 additions & 0 deletions

File tree

src/bloom_filter_wrapper.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,15 @@ void bind_bloom_filter(nb::module_ &m, const char* name) {
8787

8888
.def("reset", &bloom_filter_type::reset,
8989
"Resets the Bloom filter to its original empty state")
90+
.def("union_with", &bloom_filter_type::union_with,
91+
nb::arg("other"),
92+
"Performs a union operation with another Bloom filter. Both filters must have the same capacity, number of hashes, and seed.")
93+
.def("intersect", &bloom_filter_type::intersect,
94+
nb::arg("other"),
95+
"Performs an intersection operation with another Bloom filter. Both filters must have the same capacity, number of hashes, and seed.")
96+
.def("is_compatible", &bloom_filter_type::is_compatible,
97+
nb::arg("other"),
98+
"Returns True if the other Bloom filter is compatible for union/intersection operations (same capacity, num_hashes, and seed)")
9099
.def("get_serialized_size_bytes",
91100
[](const bloom_filter_type& sk) { return sk.get_serialized_size_bytes(); },
92101
"Returns the size in bytes of the serialized image of the filter")

tests/bloom_filter_test.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,5 +354,157 @@ def test_mathematical_properties(self):
354354
self.assertGreater(bf.num_hashes, 0)
355355
self.assertIsInstance(bf.seed, int)
356356

357+
def test_union_operation(self):
358+
"""Test union operation between compatible bloom filters."""
359+
# Create two compatible bloom filters
360+
bf1 = bloom_filter.create_by_size(1024, 5, seed=12345)
361+
bf2 = bloom_filter.create_by_size(1024, 5, seed=12345)
362+
363+
# Verify they are compatible
364+
self.assertTrue(bf1.is_compatible(bf2))
365+
self.assertTrue(bf2.is_compatible(bf1))
366+
367+
# Add items to first filter
368+
items1 = ["item1", "item2", "item3", "item4", "item5"]
369+
for item in items1:
370+
bf1.update(item)
371+
372+
# Add different items to second filter
373+
items2 = ["item6", "item7", "item8", "item9", "item10"]
374+
for item in items2:
375+
bf2.update(item)
376+
377+
# Add one common item to both
378+
common_item = "common_item"
379+
bf1.update(common_item)
380+
bf2.update(common_item)
381+
382+
# Record initial state
383+
initial_bits1 = bf1.num_bits_used
384+
initial_bits2 = bf2.num_bits_used
385+
386+
# Perform union operation
387+
bf1.union_with(bf2)
388+
389+
# Verify all items from both filters are now in bf1
390+
all_items = items1 + items2 + [common_item]
391+
for item in all_items:
392+
self.assertTrue(bf1.query(item))
393+
394+
# Verify bits used increased (union should have more bits set)
395+
self.assertGreaterEqual(bf1.num_bits_used, initial_bits1)
396+
self.assertGreaterEqual(bf1.num_bits_used, initial_bits2)
397+
398+
# Verify bf2 is unchanged
399+
for item in items2 + [common_item]:
400+
self.assertTrue(bf2.query(item))
401+
for item in items1:
402+
self.assertFalse(bf2.query(item))
403+
404+
def test_intersection_operation(self):
405+
"""Test intersection operation between compatible bloom filters."""
406+
# Create two compatible bloom filters
407+
bf1 = bloom_filter.create_by_size(1024, 5, seed=12345)
408+
bf2 = bloom_filter.create_by_size(1024, 5, seed=12345)
409+
410+
# Verify they are compatible
411+
self.assertTrue(bf1.is_compatible(bf2))
412+
413+
# Add items to first filter
414+
items1 = ["item1", "item2", "item3", "item4", "item5"]
415+
for item in items1:
416+
bf1.update(item)
417+
418+
# Add different items to second filter
419+
items2 = ["item6", "item7", "item8", "item9", "item10"]
420+
for item in items2:
421+
bf2.update(item)
422+
423+
# Add common items to both
424+
common_items = ["common1", "common2", "common3"]
425+
for item in common_items:
426+
bf1.update(item)
427+
bf2.update(item)
428+
429+
# Record initial state
430+
initial_bits1 = bf1.num_bits_used
431+
initial_bits2 = bf2.num_bits_used
432+
433+
# Perform intersection operation
434+
bf1.intersect(bf2)
435+
436+
# Verify only common items remain in bf1
437+
for item in common_items:
438+
self.assertTrue(bf1.query(item))
439+
440+
# Verify items unique to each filter are no longer in bf1
441+
for item in items1:
442+
self.assertFalse(bf1.query(item))
443+
for item in items2:
444+
self.assertFalse(bf1.query(item))
445+
446+
# Verify bits used decreased (intersection should have fewer bits set)
447+
self.assertLessEqual(bf1.num_bits_used, initial_bits1)
448+
449+
# Verify bf2 is unchanged
450+
for item in items2 + common_items:
451+
self.assertTrue(bf2.query(item))
452+
for item in items1:
453+
self.assertFalse(bf2.query(item))
454+
455+
def test_incompatible_filters(self):
456+
"""Test that union and intersection fail with incompatible filters."""
457+
# Create filters with different capacities
458+
bf1 = bloom_filter.create_by_size(1024, 5, seed=12345)
459+
bf2 = bloom_filter.create_by_size(2048, 5, seed=12345)
460+
461+
self.assertFalse(bf1.is_compatible(bf2))
462+
self.assertFalse(bf2.is_compatible(bf1))
463+
464+
# Should raise exception for union
465+
with self.assertRaises(Exception):
466+
bf1.union_with(bf2)
467+
468+
# Should raise exception for intersection
469+
with self.assertRaises(Exception):
470+
bf1.intersect(bf2)
471+
472+
# Create filters with different number of hashes
473+
bf3 = bloom_filter.create_by_size(1024, 3, seed=12345)
474+
self.assertFalse(bf1.is_compatible(bf3))
475+
476+
# Create filters with different seeds
477+
bf4 = bloom_filter.create_by_size(1024, 5, seed=54321)
478+
self.assertFalse(bf1.is_compatible(bf4))
479+
480+
def test_union_intersection_edge_cases(self):
481+
"""Test edge cases for union and intersection operations."""
482+
# Test with empty filters
483+
bf1 = bloom_filter.create_by_size(1024, 5, seed=12345)
484+
bf2 = bloom_filter.create_by_size(1024, 5, seed=12345)
485+
486+
# Union of empty filters should remain empty
487+
bf1.union_with(bf2)
488+
self.assertTrue(bf1.is_empty())
489+
self.assertEqual(bf1.num_bits_used, 0)
490+
491+
# Intersection of empty filters should remain empty
492+
bf1.reset()
493+
bf1.intersect(bf2)
494+
self.assertTrue(bf1.is_empty())
495+
self.assertEqual(bf1.num_bits_used, 0)
496+
497+
# Test union with self
498+
bf1.update("test_item")
499+
initial_bits = bf1.num_bits_used
500+
bf1.union_with(bf1)
501+
self.assertEqual(bf1.num_bits_used, initial_bits)
502+
self.assertTrue(bf1.query("test_item"))
503+
504+
# Test intersection with self
505+
bf1.intersect(bf1)
506+
self.assertEqual(bf1.num_bits_used, initial_bits)
507+
self.assertTrue(bf1.query("test_item"))
508+
357509
if __name__ == '__main__':
358510
unittest.main()

0 commit comments

Comments
 (0)