Skip to content

Commit 0de5033

Browse files
authored
Added new probabilistic functions
1 parent b485d67 commit 0de5033

1 file changed

Lines changed: 118 additions & 4 deletions

File tree

sbf.py

Lines changed: 118 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -565,10 +565,12 @@ def print_filter(self, mode, precision = 5):
565565
print('\n')
566566
print('Area properties:')
567567
print('----------------')
568+
self.expected_area_cells()
568569
for self.j in range(1, self.num_areas + 1):
569570
self.potential_elements = (self.area_members[self.j] * self.num_hashes) - self.area_self_collisions[self.j]
570571
print('Area ' + str(self.j).rjust(len(str(self.num_areas))) + ': ' \
571572
+ str(self.area_members[self.j]) + ' members, ' \
573+
+ str('{:.{prec}f}'.format(round(self.area_expected_cells[self.j], self.precision), prec=self.precision)) + ' expected cells, ' \
572574
+ str(self.area_cells[self.j]) + ' cells out of ' \
573575
+ str(self.potential_elements) + ' potential (' \
574576
+ str(self.area_self_collisions[self.j]) + ' self-collisions)')
@@ -577,12 +579,16 @@ def print_filter(self, mode, precision = 5):
577579
self.compute_area_fpp()
578580
self.compute_apriori_area_fpp()
579581
self.compute_apriori_area_isep()
582+
self.compute_area_isep()
580583
for self.j in range(1, self.num_areas + 1):
581584
print('Area ' + str(self.j).rjust(len(str(self.num_areas))) + \
582-
': emersion ' + str('{:.{prec}f}'.format(round(self.area_emersion(self.j), self.precision), prec=self.precision)) + \
585+
': expected emersion ' + str('{:.{prec}f}'.format(round(self.expected_area_emersion(self.j), self.precision), prec=self.precision)) + \
586+
', emersion ' + str('{:.{prec}f}'.format(round(self.area_emersion(self.j), self.precision), prec=self.precision)) + \
583587
', a-priori fpp ' + str('{:.{prec}f}'.format(round(self.area_apriori_fpp[self.j], self.precision), prec=self.precision)) + \
584588
', fpp ' + str('{:.{prec}f}'.format(round(self.area_fpp[self.j], self.precision), prec=self.precision)) + \
585-
', a-priori isep ' + str('{:.{prec}f}'.format(round(self.area_apriori_isep[self.j], self.precision), prec=self.precision)))
589+
', a-priori isep ' + str('{:.{prec}f}'.format(round(self.area_apriori_isep[self.j], self.precision), prec=self.precision)) + \
590+
', expected ise ' + str('{:.{prec}f}'.format(round((self.area_apriori_isep[self.j] * self.area_members[self.j]), self.precision), prec=self.precision)) + \
591+
', isep ' + str('{:.{prec}f}'.format(round(self.area_isep[self.j], self.precision), prec=self.precision)))
586592

587593
del self.j
588594
del self.mode
@@ -630,21 +636,28 @@ def save_filter(self, filter_path, mode, precision = 5):
630636
self.filter_file.write("sparsity" + ";" + str('{:.{prec}f}'.format(round(self.filter_sparsity(), self.precision), prec=self.precision)) + "\n")
631637
self.filter_file.write("a-priori fpp" + ";" + str('{:.{prec}f}'.format(round(self.filter_apriori_fpp(), self.precision), prec=self.precision)) + "\n")
632638
self.filter_file.write("fpp" + ";" + str('{:.{prec}f}'.format(round(self.filter_fpp(), self.precision), prec=self.precision)) + "\n")
639+
self.filter_file.write("area;members;expected cells;self-collisions;cells;expected emersion;emersion;a-priori fpp;fpp;a-priori isep;expected ise;isep\n")
633640

634641
# area-related parameters:
635-
# area, members, self-collisions, cells, emersion, apriori_fpp, fpp, apriori_isep
642+
# area, members, expected cells, self-collisions, cells, expected emersion, emersion, apriori_fpp, fpp, apriori_isep, expected ise, isep
636643
self.compute_area_fpp()
637644
self.compute_apriori_area_fpp()
638645
self.compute_apriori_area_isep()
646+
self.compute_area_isep()
647+
self.expected_area_cells()
639648
for self.j in range(1, self.num_areas+1):
640649
self.filter_file.write(str(self.j) + ";" + \
641650
str(self.area_members[self.j]) + ";" + \
651+
str('{:.{prec}f}'.format(round(self.area_expected_cells[self.j], self.precision), prec=self.precision)) + ";" + \
642652
str(self.area_self_collisions[self.j]) + ";" + \
643653
str(self.area_cells[self.j]) + ";" + \
654+
str('{:.{prec}f}'.format(round(self.expected_area_emersion(self.j), self.precision), prec=self.precision)) + ";" + \
644655
str('{:.{prec}f}'.format(round(self.area_emersion(self.j), self.precision), prec=self.precision)) + ";" + \
645656
str('{:.{prec}f}'.format(round(self.area_apriori_fpp[self.j], self.precision), prec=self.precision)) + ";" + \
646657
str('{:.{prec}f}'.format(round(self.area_fpp[self.j], self.precision), prec=self.precision)) + ";" + \
647-
str('{:.{prec}f}'.format(round(self.area_apriori_isep[self.j], self.precision), prec=self.precision)) + "\n")
658+
str('{:.{prec}f}'.format(round(self.area_apriori_isep[self.j], self.precision), prec=self.precision)) + ";" + \
659+
str('{:.{prec}f}'.format(round((self.area_apriori_isep[self.j] * self.area_members[self.j]), self.precision), prec=self.precision)) + ";" + \
660+
str('{:.{prec}f}'.format(round(self.area_isep[self.j], self.precision), prec=self.precision)) + "\n")
648661

649662
del self.j
650663

@@ -751,6 +764,46 @@ def compute_apriori_area_fpp(self):
751764
return self.area_apriori_fpp
752765

753766

767+
def expected_area_cells(self):
768+
""" Computes the expected number of cells for each area
769+
770+
Computes the expected number of cells for each area. The expected value
771+
depends only on the filter properties (size, number of hash functions,
772+
number of sets and elements, etc.) and not an actual instance of the
773+
filter.
774+
775+
Returns:
776+
The list of expected number of cells for the areas.
777+
"""
778+
779+
self.area_expected_cells = [0]*(self.num_areas + 1)
780+
781+
for self.i in range(self.num_areas, 0, -1):
782+
783+
self.nfill = 0
784+
785+
for self.j in range(self.i+1, self.num_areas+1):
786+
self.nfill += self.area_members[self.j]
787+
788+
self.p1 = 1 - (1 / self.num_cells)
789+
790+
self.p2 = pow(self.p1, (self.num_hashes * self.nfill))
791+
792+
self.p1 = 1 - pow(self.p1, (self.num_hashes * self.area_members[self.i]))
793+
794+
self.p1 = self.num_cells * self.p1 * self.p2
795+
796+
self.area_expected_cells[self.i] = self.p1
797+
798+
del self.nfill
799+
del self.p1
800+
del self.p2
801+
del self.i
802+
del self.j
803+
804+
return self.area_expected_cells
805+
806+
754807
def compute_apriori_area_isep(self):
755808
""" Computes a-priori inter-set error probability for each area.
756809
@@ -790,6 +843,33 @@ def compute_apriori_area_isep(self):
790843
return self.area_apriori_isep
791844

792845

846+
def compute_area_isep(self):
847+
""" Computes a-posteriori inter-set error probability for each area.
848+
849+
Computes the a-posteriori inter-set error probability (isep) for each
850+
area. This method needs to be called manually after the last insert
851+
operation in order to generate the statistics correctly. Alternatively,
852+
it is called by both the print_filter and the save_filter (when saving
853+
the filter statistics).
854+
855+
Returns:
856+
The list of a-posteriori inter-set error probability (isep) for the areas.
857+
"""
858+
859+
self.area_isep = [0]*(self.num_areas + 1)
860+
861+
for self.i in range(self.num_areas, 0, -1):
862+
863+
self.p = 1 - self.area_emersion(self.i)
864+
self.p = pow(self.p, self.num_hashes)
865+
866+
self.area_isep[self.i] = self.p
867+
868+
del self.p
869+
870+
return self.area_isep
871+
872+
793873
def filter_sparsity(self):
794874
""" Returns the sparsity of the SBF.
795875
@@ -871,3 +951,37 @@ def area_emersion(self, area):
871951
return -1
872952
else:
873953
return (self.area_cells[self.area] / ((self.area_members[self.area] * self.num_hashes) - self.area_self_collisions[self.area]))
954+
955+
956+
def expected_area_emersion(self, area):
957+
""" Computes the expected emersion value for an area.
958+
959+
Computes the expected emersion value for the input area. The expected
960+
value depends only on the filter properties (size, number of hash functions,
961+
number of sets and elements, etc.) and not an actual instance of the filter.
962+
The emersion indicates how much the values in the cells that maps elements
963+
of an area store the area label value (as opposed to a higher value). It
964+
is the ratio between the cells actually set to the area label in the filter
965+
and the cells that would be theoreticall set to the area label if no
966+
collision from higher areas happen.
967+
968+
Args:
969+
area: the area for which to calculate the emersion value.
970+
971+
Returns:
972+
The expected emersion value (float).
973+
"""
974+
975+
self.area = area
976+
self.nfill = 0
977+
978+
for self.i in range(self.area+1, self.num_areas+1):
979+
self.nfill += self.area_members[self.i]
980+
981+
self.p = 1 - (1 / self.num_cells)
982+
983+
self.p = pow(self.p, (self.num_hashes * self.nfill))
984+
985+
del self.nfill
986+
987+
return self.p

0 commit comments

Comments
 (0)