@@ -565,10 +565,12 @@ def print_filter(self, mode, precision = 5):
565565 print ('\n ' )
566566 print ('Area properties:' )
567567 print ('----------------' )
568+ self .expected_area_cells ()
568569 for self .j in range (1 , self .num_areas + 1 ):
569570 self .potential_elements = (self .area_members [self .j ] * self .num_hashes ) - self .area_self_collisions [self .j ]
570571 print ('Area ' + str (self .j ).rjust (len (str (self .num_areas ))) + ': ' \
571572 + str (self .area_members [self .j ]) + ' members, ' \
573+ + str ('{:.{prec}f}' .format (round (self .area_expected_cells [self .j ], self .precision ), prec = self .precision )) + ' expected cells, ' \
572574 + str (self .area_cells [self .j ]) + ' cells out of ' \
573575 + str (self .potential_elements ) + ' potential (' \
574576 + str (self .area_self_collisions [self .j ]) + ' self-collisions)' )
@@ -577,12 +579,16 @@ def print_filter(self, mode, precision = 5):
577579 self .compute_area_fpp ()
578580 self .compute_apriori_area_fpp ()
579581 self .compute_apriori_area_isep ()
582+ self .compute_area_isep ()
580583 for self .j in range (1 , self .num_areas + 1 ):
581584 print ('Area ' + str (self .j ).rjust (len (str (self .num_areas ))) + \
582- ': emersion ' + str ('{:.{prec}f}' .format (round (self .area_emersion (self .j ), self .precision ), prec = self .precision )) + \
585+ ': expected emersion ' + str ('{:.{prec}f}' .format (round (self .expected_area_emersion (self .j ), self .precision ), prec = self .precision )) + \
586+ ', emersion ' + str ('{:.{prec}f}' .format (round (self .area_emersion (self .j ), self .precision ), prec = self .precision )) + \
583587 ', a-priori fpp ' + str ('{:.{prec}f}' .format (round (self .area_apriori_fpp [self .j ], self .precision ), prec = self .precision )) + \
584588 ', fpp ' + str ('{:.{prec}f}' .format (round (self .area_fpp [self .j ], self .precision ), prec = self .precision )) + \
585- ', a-priori isep ' + str ('{:.{prec}f}' .format (round (self .area_apriori_isep [self .j ], self .precision ), prec = self .precision )))
589+ ', a-priori isep ' + str ('{:.{prec}f}' .format (round (self .area_apriori_isep [self .j ], self .precision ), prec = self .precision )) + \
590+ ', expected ise ' + str ('{:.{prec}f}' .format (round ((self .area_apriori_isep [self .j ] * self .area_members [self .j ]), self .precision ), prec = self .precision )) + \
591+ ', isep ' + str ('{:.{prec}f}' .format (round (self .area_isep [self .j ], self .precision ), prec = self .precision )))
586592
587593 del self .j
588594 del self .mode
@@ -630,21 +636,28 @@ def save_filter(self, filter_path, mode, precision = 5):
630636 self .filter_file .write ("sparsity" + ";" + str ('{:.{prec}f}' .format (round (self .filter_sparsity (), self .precision ), prec = self .precision )) + "\n " )
631637 self .filter_file .write ("a-priori fpp" + ";" + str ('{:.{prec}f}' .format (round (self .filter_apriori_fpp (), self .precision ), prec = self .precision )) + "\n " )
632638 self .filter_file .write ("fpp" + ";" + str ('{:.{prec}f}' .format (round (self .filter_fpp (), self .precision ), prec = self .precision )) + "\n " )
639+ self .filter_file .write ("area;members;expected cells;self-collisions;cells;expected emersion;emersion;a-priori fpp;fpp;a-priori isep;expected ise;isep\n " )
633640
634641 # area-related parameters:
635- # area, members, self-collisions, cells, emersion, apriori_fpp, fpp, apriori_isep
642+ # area, members, expected cells, self-collisions, cells, expected emersion, emersion, apriori_fpp, fpp, apriori_isep, expected ise, isep
636643 self .compute_area_fpp ()
637644 self .compute_apriori_area_fpp ()
638645 self .compute_apriori_area_isep ()
646+ self .compute_area_isep ()
647+ self .expected_area_cells ()
639648 for self .j in range (1 , self .num_areas + 1 ):
640649 self .filter_file .write (str (self .j ) + ";" + \
641650 str (self .area_members [self .j ]) + ";" + \
651+ str ('{:.{prec}f}' .format (round (self .area_expected_cells [self .j ], self .precision ), prec = self .precision )) + ";" + \
642652 str (self .area_self_collisions [self .j ]) + ";" + \
643653 str (self .area_cells [self .j ]) + ";" + \
654+ str ('{:.{prec}f}' .format (round (self .expected_area_emersion (self .j ), self .precision ), prec = self .precision )) + ";" + \
644655 str ('{:.{prec}f}' .format (round (self .area_emersion (self .j ), self .precision ), prec = self .precision )) + ";" + \
645656 str ('{:.{prec}f}' .format (round (self .area_apriori_fpp [self .j ], self .precision ), prec = self .precision )) + ";" + \
646657 str ('{:.{prec}f}' .format (round (self .area_fpp [self .j ], self .precision ), prec = self .precision )) + ";" + \
647- str ('{:.{prec}f}' .format (round (self .area_apriori_isep [self .j ], self .precision ), prec = self .precision )) + "\n " )
658+ str ('{:.{prec}f}' .format (round (self .area_apriori_isep [self .j ], self .precision ), prec = self .precision )) + ";" + \
659+ str ('{:.{prec}f}' .format (round ((self .area_apriori_isep [self .j ] * self .area_members [self .j ]), self .precision ), prec = self .precision )) + ";" + \
660+ str ('{:.{prec}f}' .format (round (self .area_isep [self .j ], self .precision ), prec = self .precision )) + "\n " )
648661
649662 del self .j
650663
@@ -751,6 +764,46 @@ def compute_apriori_area_fpp(self):
751764 return self .area_apriori_fpp
752765
753766
767+ def expected_area_cells (self ):
768+ """ Computes the expected number of cells for each area
769+
770+ Computes the expected number of cells for each area. The expected value
771+ depends only on the filter properties (size, number of hash functions,
772+ number of sets and elements, etc.) and not an actual instance of the
773+ filter.
774+
775+ Returns:
776+ The list of expected number of cells for the areas.
777+ """
778+
779+ self .area_expected_cells = [0 ]* (self .num_areas + 1 )
780+
781+ for self .i in range (self .num_areas , 0 , - 1 ):
782+
783+ self .nfill = 0
784+
785+ for self .j in range (self .i + 1 , self .num_areas + 1 ):
786+ self .nfill += self .area_members [self .j ]
787+
788+ self .p1 = 1 - (1 / self .num_cells )
789+
790+ self .p2 = pow (self .p1 , (self .num_hashes * self .nfill ))
791+
792+ self .p1 = 1 - pow (self .p1 , (self .num_hashes * self .area_members [self .i ]))
793+
794+ self .p1 = self .num_cells * self .p1 * self .p2
795+
796+ self .area_expected_cells [self .i ] = self .p1
797+
798+ del self .nfill
799+ del self .p1
800+ del self .p2
801+ del self .i
802+ del self .j
803+
804+ return self .area_expected_cells
805+
806+
754807 def compute_apriori_area_isep (self ):
755808 """ Computes a-priori inter-set error probability for each area.
756809
@@ -790,6 +843,33 @@ def compute_apriori_area_isep(self):
790843 return self .area_apriori_isep
791844
792845
846+ def compute_area_isep (self ):
847+ """ Computes a-posteriori inter-set error probability for each area.
848+
849+ Computes the a-posteriori inter-set error probability (isep) for each
850+ area. This method needs to be called manually after the last insert
851+ operation in order to generate the statistics correctly. Alternatively,
852+ it is called by both the print_filter and the save_filter (when saving
853+ the filter statistics).
854+
855+ Returns:
856+ The list of a-posteriori inter-set error probability (isep) for the areas.
857+ """
858+
859+ self .area_isep = [0 ]* (self .num_areas + 1 )
860+
861+ for self .i in range (self .num_areas , 0 , - 1 ):
862+
863+ self .p = 1 - self .area_emersion (self .i )
864+ self .p = pow (self .p , self .num_hashes )
865+
866+ self .area_isep [self .i ] = self .p
867+
868+ del self .p
869+
870+ return self .area_isep
871+
872+
793873 def filter_sparsity (self ):
794874 """ Returns the sparsity of the SBF.
795875
@@ -871,3 +951,37 @@ def area_emersion(self, area):
871951 return - 1
872952 else :
873953 return (self .area_cells [self .area ] / ((self .area_members [self .area ] * self .num_hashes ) - self .area_self_collisions [self .area ]))
954+
955+
956+ def expected_area_emersion (self , area ):
957+ """ Computes the expected emersion value for an area.
958+
959+ Computes the expected emersion value for the input area. The expected
960+ value depends only on the filter properties (size, number of hash functions,
961+ number of sets and elements, etc.) and not an actual instance of the filter.
962+ The emersion indicates how much the values in the cells that maps elements
963+ of an area store the area label value (as opposed to a higher value). It
964+ is the ratio between the cells actually set to the area label in the filter
965+ and the cells that would be theoreticall set to the area label if no
966+ collision from higher areas happen.
967+
968+ Args:
969+ area: the area for which to calculate the emersion value.
970+
971+ Returns:
972+ The expected emersion value (float).
973+ """
974+
975+ self .area = area
976+ self .nfill = 0
977+
978+ for self .i in range (self .area + 1 , self .num_areas + 1 ):
979+ self .nfill += self .area_members [self .i ]
980+
981+ self .p = 1 - (1 / self .num_cells )
982+
983+ self .p = pow (self .p , (self .num_hashes * self .nfill ))
984+
985+ del self .nfill
986+
987+ return self .p
0 commit comments