@@ -546,8 +546,9 @@ def print_filter(self, mode, precision = 5):
546546 print ('Filter details:' )
547547 print (' - Number of cells: ' + str (self .num_cells ))
548548 print (' - Size in bytes: ' + str (self .cell_size * self .num_cells ))
549- print (' - Filter sparsity: ' + str (round (self .filter_sparsity (), self .precision )))
550- print (' - Filter fpp: ' + str (round (self .filter_fpp (), self .precision )))
549+ print (' - Filter sparsity: ' + str ('{:.{prec}f}' .format (round (self .filter_sparsity (), self .precision ), prec = self .precision )))
550+ print (' - Filter a-priori fpp: ' + str ('{:.{prec}f}' .format (round (self .filter_apriori_fpp (), self .precision ), prec = self .precision )))
551+ print (' - Filter fpp: ' + str ('{:.{prec}f}' .format (round (self .filter_fpp (), self .precision ), prec = self .precision )))
551552 print (' - Number of mapped elements: ' + str (self .members ))
552553 print (' - Number of hash collisions: ' + str (self .collisions ))
553554
@@ -566,15 +567,22 @@ def print_filter(self, mode, precision = 5):
566567 print ('----------------' )
567568 for self .j in range (1 , self .num_areas + 1 ):
568569 self .potential_elements = (self .area_members [self .j ] * self .num_hashes ) - self .area_self_collisions [self .j ]
569- print ('Area ' + str (self .j ).rjust (3 ) + ': ' + str (self .area_members [self .j ]) + ' members, ' + str (self .area_cells [self .j ]) + ' cells out of ' + str (self .potential_elements ) + ' potential (' + str (self .area_self_collisions [self .j ]) + ' self-collisions)' )
570+ print ('Area ' + str (self .j ).rjust (len (str (self .num_areas ))) + ': ' \
571+ + str (self .area_members [self .j ]) + ' members, ' \
572+ + str (self .area_cells [self .j ]) + ' cells out of ' \
573+ + str (self .potential_elements ) + ' potential (' \
574+ + str (self .area_self_collisions [self .j ]) + ' self-collisions)' )
570575
571- print ('\n Emersion and Fpp :\n ' )
576+ print ('\n Emersion, FPP and ISEP :\n ' )
572577 self .compute_area_fpp ()
578+ self .compute_apriori_area_fpp ()
579+ self .compute_apriori_area_isep ()
573580 for self .j in range (1 , self .num_areas + 1 ):
574- if (self .area_flotation (self .j )):
575- print ('Area ' + str (self .j ) + ': emersion ' + str (round (self .area_emersion (self .j ), self .precision )) + ', flotation safe, fpp ' + str (round (self .area_fpp [self .j ], self .precision )))
576- else :
577- print ('Area ' + str (self .j ) + ': emersion ' + str (round (self .area_emersion (self .j ), self .precision )) + ', flotation unsafe, fpp ' + str (round (self .area_fpp [self .j ], self .precision )))
581+ print ('Area ' + str (self .j ).rjust (len (str (self .num_areas ))) + \
582+ ': emersion ' + str ('{:.{prec}f}' .format (round (self .area_emersion (self .j ), self .precision ), prec = self .precision )) + \
583+ ', a-priori fpp ' + str ('{:.{prec}f}' .format (round (self .area_apriori_fpp [self .j ], self .precision ), prec = self .precision )) + \
584+ ', fpp ' + str ('{:.{prec}f}' .format (round (self .area_fpp [self .j ], self .precision ), prec = self .precision )) + \
585+ ', a-priori isep ' + str ('{:.{prec}f}' .format (round (self .area_apriori_isep [self .j ], self .precision ), prec = self .precision )))
578586
579587 del self .j
580588 del self .mode
@@ -584,13 +592,13 @@ def print_filter(self, mode, precision = 5):
584592 def save_filter (self , filter_path , mode , precision = 5 ):
585593 """ Saves the filter and related statistics onto a CSV file.
586594
587- Saves to disk the filter and related statistics (according to the specified
595+ Saves to disk the filter or its statistics (according to the specified
588596 operation mode) to the specified path.
589597
590598 Args:
591599 filter_path: the path to the file where to store the filter
592600 information.
593- mode: If 0, writes the SBF metadata only (CSV: key,value);
601+ mode: If 0, writes the SBF metadata (CSV: key,value);
594602 if 1, writes the SBF cells (CSV: value).
595603 precision: Sets the precision (number of decimal places) to use
596604 when printing float values.
@@ -619,14 +627,24 @@ def save_filter(self, filter_path, mode, precision = 5):
619627 self .filter_file .write ("byte_size" + ";" + str (self .cell_size * self .num_cells ) + "\n " )
620628 self .filter_file .write ("members" + ";" + str (self .members ) + "\n " )
621629 self .filter_file .write ("collisions" + ";" + str (self .collisions ) + "\n " )
622- self .filter_file .write ("sparsity" + ";" + str (round (self .filter_sparsity (), self .precision )) + "\n " )
623- self .filter_file .write ("fpp" + ";" + str (round (self .filter_fpp (), self .precision )) + "\n " )
630+ self .filter_file .write ("sparsity" + ";" + str ('{:.{prec}f}' .format (round (self .filter_sparsity (), self .precision ), prec = self .precision )) + "\n " )
631+ self .filter_file .write ("a-priori fpp" + ";" + str ('{:.{prec}f}' .format (round (self .filter_apriori_fpp (), self .precision ), prec = self .precision )) + "\n " )
632+ self .filter_file .write ("fpp" + ";" + str ('{:.{prec}f}' .format (round (self .filter_fpp (), self .precision ), prec = self .precision )) + "\n " )
624633
625634 # area-related parameters:
626- # area,members,self-collisions,cells,emersion,flotation, fpp
635+ # area, members, self-collisions, cells, emersion, apriori_fpp, fpp, apriori_isep
627636 self .compute_area_fpp ()
637+ self .compute_apriori_area_fpp ()
638+ self .compute_apriori_area_isep ()
628639 for self .j in range (1 , self .num_areas + 1 ):
629- self .filter_file .write (str (self .j ) + ";" + str (self .area_members [self .j ]) + ";" + str (self .area_self_collisions [self .j ]) + ";" + str (self .area_cells [self .j ]) + ";" + str (round (self .area_emersion (self .j ), self .precision )) + ";" + str (self .area_flotation (self .j )) + ";" + str (round (self .area_fpp [self .j ], self .precision )) + "\n " )
640+ self .filter_file .write (str (self .j ) + ";" + \
641+ str (self .area_members [self .j ]) + ";" + \
642+ str (self .area_self_collisions [self .j ]) + ";" + \
643+ str (self .area_cells [self .j ]) + ";" + \
644+ str ('{:.{prec}f}' .format (round (self .area_emersion (self .j ), self .precision ), prec = self .precision )) + ";" + \
645+ str ('{:.{prec}f}' .format (round (self .area_apriori_fpp [self .j ], self .precision ), prec = self .precision )) + ";" + \
646+ str ('{:.{prec}f}' .format (round (self .area_fpp [self .j ], self .precision ), prec = self .precision )) + ";" + \
647+ str ('{:.{prec}f}' .format (round (self .area_apriori_isep [self .j ], self .precision ), prec = self .precision )) + "\n " )
630648
631649 del self .j
632650
@@ -659,7 +677,7 @@ def compute_area_fpp(self):
659677 the filter statistics).
660678
661679 Returns:
662- The list of false positives probability (fpp) for the areas.
680+ The list of a-posteriori false positives probability (fpp) for the areas.
663681 """
664682
665683 self .area_fpp = [0 ]* (self .num_areas + 1 )
@@ -688,6 +706,90 @@ def compute_area_fpp(self):
688706 return self .area_fpp
689707
690708
709+ def compute_apriori_area_fpp (self ):
710+ """ Computes a-priori false positives probability for each area.
711+
712+ Computes the a-priori false positives probability (fpp) for each
713+ area. This method needs to be called manually after the last insert
714+ operation in order to generate the statistics correctly. Alternatively,
715+ it is called by both the print_filter and the save_filter (when saving
716+ the filter statistics).
717+
718+ Returns:
719+ The list of a-priori false positives probability (fpp) for the areas.
720+ """
721+
722+ self .area_apriori_fpp = [0 ]* (self .num_areas + 1 )
723+
724+ for self .i in range (self .num_areas , 0 , - 1 ):
725+
726+ self .c = 0
727+ self .p = 0
728+
729+ for self .j in range (self .i , self .num_areas + 1 ):
730+ self .c += self .area_members [self .j ]
731+
732+ self .p = 1 - (1 / self .num_cells )
733+
734+ self .p = 1 - pow (self .p , (self .num_hashes * self .c ))
735+
736+ self .p = pow (self .p , self .num_hashes )
737+
738+ self .area_apriori_fpp [self .i ] = self .p
739+
740+ for self .j in range (self .i , self .num_areas ):
741+ self .area_apriori_fpp [self .i ] -= self .area_apriori_fpp [self .j + 1 ]
742+
743+ if (self .area_apriori_fpp [self .i ] < 0 ):
744+ self .area_apriori_fpp [self .i ] = 0
745+
746+ del self .j
747+ del self .c
748+ del self .p
749+ del self .i
750+
751+ return self .area_apriori_fpp
752+
753+
754+ def compute_apriori_area_isep (self ):
755+ """ Computes a-priori inter-set error probability for each area.
756+
757+ Computes the a-priori inter-set error probability (isep) for each
758+ area. This method needs to be called manually after the last insert
759+ operation in order to generate the statistics correctly. Alternatively,
760+ it is called by both the print_filter and the save_filter (when saving
761+ the filter statistics).
762+
763+ Returns:
764+ The list of a-priori inter-set error probability (isep) for the areas.
765+ """
766+
767+ self .area_apriori_isep = [0 ]* (self .num_areas + 1 )
768+
769+ for self .i in range (self .num_areas , 0 , - 1 ):
770+
771+ self .nfill = 0
772+ self .p = 0
773+
774+ for self .j in range (self .i + 1 , self .num_areas + 1 ):
775+ self .nfill += self .area_members [self .j ]
776+
777+ self .p = 1 - (1 / self .num_cells )
778+
779+ self .p = 1 - pow (self .p , (self .num_hashes * self .nfill ))
780+
781+ self .p = pow (self .p , self .num_hashes )
782+
783+ self .area_apriori_isep [self .i ] = self .p
784+
785+ del self .nfill
786+ del self .p
787+ del self .i
788+ del self .j
789+
790+ return self .area_apriori_isep
791+
792+
691793 def filter_sparsity (self ):
692794 """ Returns the sparsity of the SBF.
693795
@@ -713,10 +815,10 @@ def filter_fpp(self):
713815 filter (i.e. not area-specific).
714816
715817 Returns:
716- The filter false positives probability (fpp).
818+ The filter a-posteriori false positives probability (fpp).
717819 """
718820
719- self .c = 0
821+ self .c = 0
720822
721823 # Counts non-zero cells
722824 for self .i in range (1 , self .num_areas + 1 ):
@@ -727,6 +829,25 @@ def filter_fpp(self):
727829 return pow (self .p , self .num_hashes )
728830
729831
832+ def filter_apriori_fpp (self ):
833+ """ Computes a-priori false positives probability for the filter.
834+
835+ Computes the a-priori false positive probability over the entire
836+ filter (i.e. not area-specific).
837+
838+ Returns:
839+ The filter a-priori false positives probability (fpp).
840+ """
841+
842+ self .p = 1 - (1 / self .num_cells )
843+
844+ self .p = 1 - pow (self .p , (self .num_hashes * self .members ))
845+
846+ self .p = pow (self .p , self .num_hashes )
847+
848+ return self .p
849+
850+
730851 def area_emersion (self , area ):
731852 """ Computes the emersion value for an area.
732853
@@ -750,23 +871,3 @@ def area_emersion(self, area):
750871 return - 1
751872 else :
752873 return (self .area_cells [self .area ] / ((self .area_members [self .area ] * self .num_hashes ) - self .area_self_collisions [self .area ]))
753-
754-
755- def area_flotation (self , area ):
756- """ Computes the flotation value for an area.
757-
758- Computes the flotation value for the input area. The flotation is True if
759- it is not possible for an element belonging to the area to be recognized
760- as belonging to a different area, False if collisions may cause this to
761- happen.
762-
763- Returns:
764- The flotation value (boolean).
765- """
766-
767- self .area = area
768-
769- if (self .area_members [self .area ] == 0 ):
770- return True
771- else :
772- return (self .area_members [self .area ] * self .num_hashes ) - self .area_self_collisions [self .area ] - self .area_cells [self .area ] < self .num_hashes
0 commit comments