Skip to content

Commit 95b556a

Browse files
committed
Added string output
1 parent e0fa4e1 commit 95b556a

2 files changed

Lines changed: 142 additions & 0 deletions

File tree

src/bloom_filter_wrapper.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,17 @@ void bind_bloom_filter(nb::module_ &m, const char* name) {
9393
.def("intersect", &bloom_filter_type::intersect,
9494
nb::arg("other"),
9595
"Performs an intersection operation with another Bloom filter. Both filters must have the same capacity, number of hashes, and seed.")
96+
.def("invert", &bloom_filter_type::invert,
97+
"Inverts all bits in the Bloom filter. This changes the meaning of the filter from 'might have seen' to 'definitely have not seen'.")
98+
.def("to_string", &bloom_filter_type::to_string,
99+
nb::arg("print_filter")=false,
100+
"Returns a string representation of the Bloom filter\n\n"
101+
":param print_filter: If True, includes the actual bit array in the output\n:type print_filter: bool, optional\n"
102+
":return: String representation of the filter\n:rtype: str")
103+
.def("__str__", [](const bloom_filter_type& self) { return self.to_string(false); },
104+
"Returns a string summary of the Bloom filter (without printing the bit array)")
105+
.def("__copy__", [](const bloom_filter_type& self) { return bloom_filter_type(self); },
106+
"Returns a copy of the Bloom filter")
96107
.def("is_compatible", &bloom_filter_type::is_compatible,
97108
nb::arg("other"),
98109
"Returns True if the other Bloom filter is compatible for union/intersection operations (same capacity, num_hashes, and seed)")

tests/bloom_filter_test.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,5 +506,136 @@ def test_union_intersection_edge_cases(self):
506506
self.assertEqual(bf1.num_bits_used, initial_bits)
507507
self.assertTrue(bf1.query("test_item"))
508508

509+
def test_invert_operation(self):
510+
"""Test the invert operation on bloom filters."""
511+
num_bits = 8192
512+
num_hashes = 3
513+
514+
bf = bloom_filter.create_by_size(num_bits, num_hashes)
515+
516+
# Add items
517+
n = 500
518+
for i in range(n):
519+
bf.update(i)
520+
521+
num_bits_set = bf.num_bits_used
522+
bf.invert()
523+
524+
# After inversion, bits used should be capacity - original_bits_used
525+
self.assertEqual(bf.num_bits_used, num_bits - num_bits_set)
526+
527+
# Original items should be mostly not-present
528+
num_found = 0
529+
for i in range(n):
530+
if bf.query(i):
531+
num_found += 1
532+
533+
# Should find less than 10% of original items (allowing for false positives)
534+
self.assertLess(num_found, n // 10)
535+
536+
# Many other items should be "present"
537+
num_found = 0
538+
for i in range(n, num_bits):
539+
if bf.query(i):
540+
num_found += 1
541+
542+
# Should find more items than were originally added
543+
self.assertGreater(num_found, n)
544+
545+
def test_invert_empty_filter(self):
546+
"""Test invert operation on an empty bloom filter."""
547+
num_bits = 1024
548+
bf = bloom_filter.create_by_size(num_bits, 5, seed=12345)
549+
550+
# Initially empty
551+
self.assertTrue(bf.is_empty())
552+
self.assertEqual(bf.num_bits_used, 0)
553+
554+
# Invert empty filter
555+
bf.invert()
556+
557+
# After inversion, all bits should be set
558+
self.assertEqual(bf.num_bits_used, num_bits)
559+
560+
# All items should be found (since all bits are now 1)
561+
test_items = ["item1", "item2", "item3", "item4", "item5"]
562+
for item in test_items:
563+
self.assertTrue(bf.query(item))
564+
565+
# Invert again - should be empty again
566+
bf.invert()
567+
self.assertTrue(bf.is_empty())
568+
self.assertEqual(bf.num_bits_used, 0)
569+
570+
# No items should be found
571+
for item in test_items:
572+
self.assertFalse(bf.query(item))
573+
574+
def test_invert_full_filter(self):
575+
"""Test invert operation on a nearly full bloom filter."""
576+
num_bits = 64
577+
bf = bloom_filter.create_by_size(num_bits, 3, seed=12345) # Small filter for testing
578+
579+
# Add many items to fill most bits
580+
for i in range(50):
581+
bf.update(f"item_{i}")
582+
583+
# Record state
584+
bits_before = bf.num_bits_used
585+
586+
# Invert the filter
587+
bf.invert()
588+
589+
# Check that bits used follows the mathematical relationship
590+
self.assertEqual(bf.num_bits_used, num_bits - bits_before)
591+
592+
# Original items should not be found
593+
for i in range(50):
594+
self.assertFalse(bf.query(f"item_{i}"))
595+
596+
# Invert again - should return to original state
597+
bf.invert()
598+
self.assertEqual(bf.num_bits_used, bits_before)
599+
600+
# Original items should be found again
601+
for i in range(50):
602+
self.assertTrue(bf.query(f"item_{i}"))
603+
604+
def test_invert_mathematical_properties(self):
605+
"""Test mathematical properties of the invert operation."""
606+
bf = bloom_filter.create_by_size(1024, 5, seed=12345)
607+
608+
# Test that double inversion is identity
609+
test_items = ["item1", "item2", "item3"]
610+
for item in test_items:
611+
bf.update(item)
612+
613+
# Record initial state
614+
initial_bits = bf.num_bits_used
615+
616+
# First inversion
617+
bf.invert()
618+
bits_after_first = bf.num_bits_used
619+
620+
# Second inversion
621+
bf.invert()
622+
bits_after_second = bf.num_bits_used
623+
624+
# Should be back to original state
625+
self.assertEqual(bits_after_second, initial_bits)
626+
627+
# All original items should be found again
628+
for item in test_items:
629+
self.assertTrue(bf.query(item))
630+
631+
# Test that double inversion preserves the original query behavior
632+
# This is the key property: invert(invert(filter)) == filter
633+
for item in test_items:
634+
self.assertTrue(bf.query(item))
635+
636+
# Test that the filter is not empty after double inversion
637+
self.assertFalse(bf.is_empty())
638+
self.assertEqual(bf.num_bits_used, initial_bits)
639+
509640
if __name__ == '__main__':
510641
unittest.main()

0 commit comments

Comments
 (0)