33 Copyright (C) 2017 Luca Calderoni, Dario Maio,
44 University of Bologna
55 Copyright (C) 2017 Paolo Palmieri,
6- Cranfield University
6+ University College Cork
77
88 This program is free software: you can redistribute it and/or modify
99 it under the terms of the GNU Lesser General Public License as published by
2525import base64
2626import csv
2727from sys import byteorder
28+ from datetime import datetime
2829
2930
3031class sbf :
@@ -150,6 +151,8 @@ def __init__(self, bit_mapping, hash_family, num_hashes, num_areas, hash_salt_pa
150151 # false positive probability for each area
151152 # (initialized in the method compute_area_fpp)
152153 #self.area_fpp = [0]*(self.num_areas + 1)
154+ # list of file from which elements have been inserted
155+ self .insert_file_list = []
153156
154157
155158 def __del__ (self ):
@@ -307,6 +310,8 @@ def insert_from_file(self, dataset_path, dataset_delimiter = ','):
307310 for self .row in self .dataset_reader :
308311 self .insert (self .row [1 ], int (self .row [0 ]))
309312
313+ self .insert_file_list .append (self .dataset_path )
314+
310315 del self .dataset_delimiter
311316 del self .dataset_path
312317 self .dataset_file .close ()
@@ -538,6 +543,12 @@ def print_filter(self, mode, precision = 5):
538543 if (self .mode not in [0 ,1 ]):
539544 raise AttributeError ("Invalid mode." )
540545
546+ self .expected_area_cells ()
547+ self .compute_area_fpp ()
548+ self .compute_apriori_area_fpp ()
549+ self .compute_apriori_area_isep ()
550+ self .compute_area_isep ()
551+
541552 print ('\n Spatial Bloom Filter stats:' )
542553 print ('---------------------------' )
543554 print ('Hash details:' )
@@ -549,6 +560,7 @@ def print_filter(self, mode, precision = 5):
549560 print (' - Filter sparsity: ' + str ('{:.{prec}f}' .format (round (self .filter_sparsity (), self .precision ), prec = self .precision )))
550561 print (' - Filter a-priori fpp: ' + str ('{:.{prec}f}' .format (round (self .filter_apriori_fpp (), self .precision ), prec = self .precision )))
551562 print (' - Filter fpp: ' + str ('{:.{prec}f}' .format (round (self .filter_fpp (), self .precision ), prec = self .precision )))
563+ print (' - Filter a-priori safeness probability: ' + str ('{:.{prec}f}' .format (round (self .safeness , self .precision ), prec = self .precision )))
552564 print (' - Number of mapped elements: ' + str (self .members ))
553565 print (' - Number of hash collisions: ' + str (self .collisions ))
554566
@@ -565,21 +577,16 @@ def print_filter(self, mode, precision = 5):
565577 print ('\n ' )
566578 print ('Area properties:' )
567579 print ('----------------' )
568- self .expected_area_cells ()
569580 for self .j in range (1 , self .num_areas + 1 ):
570581 self .potential_elements = (self .area_members [self .j ] * self .num_hashes ) - self .area_self_collisions [self .j ]
571582 print ('Area ' + str (self .j ).rjust (len (str (self .num_areas ))) + ': ' \
572583 + str (self .area_members [self .j ]) + ' members, ' \
573- + str ('{:.{prec}f}' . format ( round (self .area_expected_cells [self .j ], self . precision ), prec = self . precision )) + ' expected cells, ' \
584+ + str (round (self .area_expected_cells [self .j ])) + ' expected cells, ' \
574585 + str (self .area_cells [self .j ]) + ' cells out of ' \
575586 + str (self .potential_elements ) + ' potential (' \
576587 + str (self .area_self_collisions [self .j ]) + ' self-collisions)' )
577588
578589 print ('\n Emersion, FPP and ISEP:\n ' )
579- self .compute_area_fpp ()
580- self .compute_apriori_area_fpp ()
581- self .compute_apriori_area_isep ()
582- self .compute_area_isep ()
583590 for self .j in range (1 , self .num_areas + 1 ):
584591 print ('Area ' + str (self .j ).rjust (len (str (self .num_areas ))) + \
585592 ': expected emersion ' + str ('{:.{prec}f}' .format (round (self .expected_area_emersion (self .j ), self .precision ), prec = self .precision )) + \
@@ -588,14 +595,15 @@ def print_filter(self, mode, precision = 5):
588595 ', fpp ' + str ('{:.{prec}f}' .format (round (self .area_fpp [self .j ], self .precision ), prec = self .precision )) + \
589596 ', a-priori isep ' + str ('{:.{prec}f}' .format (round (self .area_apriori_isep [self .j ], self .precision ), prec = self .precision )) + \
590597 ', expected ise ' + str ('{:.{prec}f}' .format (round ((self .area_apriori_isep [self .j ] * self .area_members [self .j ]), self .precision ), prec = self .precision )) + \
591- ', isep ' + str ('{:.{prec}f}' .format (round (self .area_isep [self .j ], self .precision ), prec = self .precision )))
598+ ', isep ' + str ('{:.{prec}f}' .format (round (self .area_isep [self .j ], self .precision ), prec = self .precision )) + \
599+ ', a-priori safep ' + str ('{:.{prec}f}' .format (round (self .area_apriori_safep [self .j ], self .precision ), prec = self .precision )))
592600
593601 del self .j
594602 del self .mode
595603 del self .potential_elements
596604
597605
598- def save_filter (self , filter_path , mode , precision = 5 ):
606+ def save_filter (self , mode , filter_path = '' , precision = 5 ):
599607 """ Saves the filter and related statistics onto a CSV file.
600608
601609 Saves to disk the filter or its statistics (according to the specified
@@ -614,16 +622,25 @@ def save_filter(self, filter_path, mode, precision = 5):
614622 OSError: The file cannot be created.
615623 """
616624
617- self .filter_path = filter_path
618625 self .mode = mode
626+ self .filter_path = filter_path
619627 self .precision = precision
620628
629+ if (self .filter_path == '' ):
630+ self .filter_path = 'sbf-stats-' + datetime .now ().strftime ("%Y%m%d-%H%M%S" ) + '.csv'
631+
621632 # Tries to load the hash salts from the specified file
622633 try :
623634 with open (self .filter_path , 'w' ) as self .filter_file :
624635
625636 if (self .mode == 0 ):
626637
638+ self .compute_area_fpp ()
639+ self .compute_apriori_area_fpp ()
640+ self .compute_apriori_area_isep ()
641+ self .compute_area_isep ()
642+ self .expected_area_cells ()
643+
627644 self .filter_file .write ("hash_family" + ";" + self .hash_family + "\n " )
628645 self .filter_file .write ("hash_number" + ";" + str (self .num_hashes ) + "\n " )
629646 self .filter_file .write ("area_number" + ";" + str (self .num_areas ) + "\n " )
@@ -636,19 +653,15 @@ def save_filter(self, filter_path, mode, precision = 5):
636653 self .filter_file .write ("sparsity" + ";" + str ('{:.{prec}f}' .format (round (self .filter_sparsity (), self .precision ), prec = self .precision )) + "\n " )
637654 self .filter_file .write ("a-priori fpp" + ";" + str ('{:.{prec}f}' .format (round (self .filter_apriori_fpp (), self .precision ), prec = self .precision )) + "\n " )
638655 self .filter_file .write ("fpp" + ";" + str ('{:.{prec}f}' .format (round (self .filter_fpp (), self .precision ), prec = self .precision )) + "\n " )
639- self .filter_file .write ("area;members;expected cells;self-collisions;cells;expected emersion;emersion;a-priori fpp;fpp;a-priori isep;expected ise;isep\n " )
656+ self .filter_file .write ("a-priori safeness probability" + ";" + str ('{:.{prec}f}' .format (round (self .safeness , self .precision ), prec = self .precision )) + "\n " )
657+ self .filter_file .write ("area;members;expected cells;self-collisions;cells;expected emersion;emersion;a-priori fpp;fpp;a-priori isep;expected ise;isep;a-priori safep\n " )
640658
641659 # area-related parameters:
642660 # area, members, expected cells, self-collisions, cells, expected emersion, emersion, apriori_fpp, fpp, apriori_isep, expected ise, isep
643- self .compute_area_fpp ()
644- self .compute_apriori_area_fpp ()
645- self .compute_apriori_area_isep ()
646- self .compute_area_isep ()
647- self .expected_area_cells ()
648661 for self .j in range (1 , self .num_areas + 1 ):
649662 self .filter_file .write (str (self .j ) + ";" + \
650663 str (self .area_members [self .j ]) + ";" + \
651- str ('{:.{prec}f}' . format ( round (self .area_expected_cells [self .j ], self . precision ), prec = self . precision )) + ";" + \
664+ str (round (self .area_expected_cells [self .j ])) + ";" + \
652665 str (self .area_self_collisions [self .j ]) + ";" + \
653666 str (self .area_cells [self .j ]) + ";" + \
654667 str ('{:.{prec}f}' .format (round (self .expected_area_emersion (self .j ), self .precision ), prec = self .precision )) + ";" + \
@@ -657,7 +670,8 @@ def save_filter(self, filter_path, mode, precision = 5):
657670 str ('{:.{prec}f}' .format (round (self .area_fpp [self .j ], self .precision ), prec = self .precision )) + ";" + \
658671 str ('{:.{prec}f}' .format (round (self .area_apriori_isep [self .j ], self .precision ), prec = self .precision )) + ";" + \
659672 str ('{:.{prec}f}' .format (round ((self .area_apriori_isep [self .j ] * self .area_members [self .j ]), self .precision ), prec = self .precision )) + ";" + \
660- str ('{:.{prec}f}' .format (round (self .area_isep [self .j ], self .precision ), prec = self .precision )) + "\n " )
673+ str ('{:.{prec}f}' .format (round (self .area_isep [self .j ], self .precision ), prec = self .precision )) + ";" + \
674+ str ('{:.{prec}f}' .format (round (self .area_apriori_safep [self .j ], self .precision ), prec = self .precision )) + "\n " )
661675
662676 del self .j
663677
@@ -818,25 +832,35 @@ def compute_apriori_area_isep(self):
818832 """
819833
820834 self .area_apriori_isep = [0 ]* (self .num_areas + 1 )
835+ self .area_apriori_safep = [0 ]* (self .num_areas + 1 )
836+ self .safeness = 1
821837
822838 for self .i in range (self .num_areas , 0 , - 1 ):
823839
824840 self .nfill = 0
825- self .p = 0
841+ self .p1 = 0
842+ self .p2 = 0
826843
827844 for self .j in range (self .i + 1 , self .num_areas + 1 ):
828845 self .nfill += self .area_members [self .j ]
829846
830- self .p = 1 - (1 / self .num_cells )
847+ self .p1 = 1 - (1 / self .num_cells )
831848
832- self .p = 1 - pow (self .p , (self .num_hashes * self .nfill ))
849+ self .p1 = 1 - pow (self .p1 , (self .num_hashes * self .nfill ))
833850
834- self .p = pow (self .p , self .num_hashes )
851+ self .p1 = pow (self .p1 , self .num_hashes )
852+
853+ self .p2 = 1 - self .p1
854+ self .p2 = pow (self .p2 , self .area_members [self .i ])
835855
836- self .area_apriori_isep [self .i ] = self .p
856+ self .safeness *= self .p2
857+
858+ self .area_apriori_isep [self .i ] = self .p1
859+ self .area_apriori_safep [self .i ] = self .p2
837860
838861 del self .nfill
839- del self .p
862+ del self .p1
863+ del self .p2
840864 del self .i
841865 del self .j
842866
0 commit comments