Skip to content

Commit 84a6869

Browse files
committed
Improved probabilistic functions
1 parent 6214150 commit 84a6869

1 file changed

Lines changed: 48 additions & 24 deletions

File tree

sbf.py

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Copyright (C) 2017 Luca Calderoni, Dario Maio,
44
University of Bologna
55
Copyright (C) 2017 Paolo Palmieri,
6-
Cranfield University
6+
University College Cork
77
88
This program is free software: you can redistribute it and/or modify
99
it under the terms of the GNU Lesser General Public License as published by
@@ -25,6 +25,7 @@
2525
import base64
2626
import csv
2727
from sys import byteorder
28+
from datetime import datetime
2829

2930

3031
class sbf:
@@ -150,6 +151,8 @@ def __init__(self, bit_mapping, hash_family, num_hashes, num_areas, hash_salt_pa
150151
# false positive probability for each area
151152
# (initialized in the method compute_area_fpp)
152153
#self.area_fpp = [0]*(self.num_areas + 1)
154+
# list of file from which elements have been inserted
155+
self.insert_file_list = []
153156

154157

155158
def __del__(self):
@@ -307,6 +310,8 @@ def insert_from_file(self, dataset_path, dataset_delimiter = ','):
307310
for self.row in self.dataset_reader:
308311
self.insert(self.row[1], int(self.row[0]))
309312

313+
self.insert_file_list.append(self.dataset_path)
314+
310315
del self.dataset_delimiter
311316
del self.dataset_path
312317
self.dataset_file.close()
@@ -538,6 +543,12 @@ def print_filter(self, mode, precision = 5):
538543
if (self.mode not in [0,1]):
539544
raise AttributeError("Invalid mode.")
540545

546+
self.expected_area_cells()
547+
self.compute_area_fpp()
548+
self.compute_apriori_area_fpp()
549+
self.compute_apriori_area_isep()
550+
self.compute_area_isep()
551+
541552
print('\nSpatial Bloom Filter stats:')
542553
print('---------------------------')
543554
print('Hash details:')
@@ -549,6 +560,7 @@ def print_filter(self, mode, precision = 5):
549560
print(' - Filter sparsity: ' + str('{:.{prec}f}'.format(round(self.filter_sparsity(), self.precision), prec=self.precision)))
550561
print(' - Filter a-priori fpp: ' + str('{:.{prec}f}'.format(round(self.filter_apriori_fpp(), self.precision), prec=self.precision)))
551562
print(' - Filter fpp: ' + str('{:.{prec}f}'.format(round(self.filter_fpp(), self.precision), prec=self.precision)))
563+
print(' - Filter a-priori safeness probability: ' + str('{:.{prec}f}'.format(round(self.safeness, self.precision), prec=self.precision)))
552564
print(' - Number of mapped elements: ' + str(self.members))
553565
print(' - Number of hash collisions: ' + str(self.collisions))
554566

@@ -565,21 +577,16 @@ def print_filter(self, mode, precision = 5):
565577
print('\n')
566578
print('Area properties:')
567579
print('----------------')
568-
self.expected_area_cells()
569580
for self.j in range(1, self.num_areas + 1):
570581
self.potential_elements = (self.area_members[self.j] * self.num_hashes) - self.area_self_collisions[self.j]
571582
print('Area ' + str(self.j).rjust(len(str(self.num_areas))) + ': ' \
572583
+ str(self.area_members[self.j]) + ' members, ' \
573-
+ str('{:.{prec}f}'.format(round(self.area_expected_cells[self.j], self.precision), prec=self.precision)) + ' expected cells, ' \
584+
+ str(round(self.area_expected_cells[self.j])) + ' expected cells, ' \
574585
+ str(self.area_cells[self.j]) + ' cells out of ' \
575586
+ str(self.potential_elements) + ' potential (' \
576587
+ str(self.area_self_collisions[self.j]) + ' self-collisions)')
577588

578589
print('\nEmersion, FPP and ISEP:\n')
579-
self.compute_area_fpp()
580-
self.compute_apriori_area_fpp()
581-
self.compute_apriori_area_isep()
582-
self.compute_area_isep()
583590
for self.j in range(1, self.num_areas + 1):
584591
print('Area ' + str(self.j).rjust(len(str(self.num_areas))) + \
585592
': expected emersion ' + str('{:.{prec}f}'.format(round(self.expected_area_emersion(self.j), self.precision), prec=self.precision)) + \
@@ -588,14 +595,15 @@ def print_filter(self, mode, precision = 5):
588595
', fpp ' + str('{:.{prec}f}'.format(round(self.area_fpp[self.j], self.precision), prec=self.precision)) + \
589596
', a-priori isep ' + str('{:.{prec}f}'.format(round(self.area_apriori_isep[self.j], self.precision), prec=self.precision)) + \
590597
', expected ise ' + str('{:.{prec}f}'.format(round((self.area_apriori_isep[self.j] * self.area_members[self.j]), self.precision), prec=self.precision)) + \
591-
', isep ' + str('{:.{prec}f}'.format(round(self.area_isep[self.j], self.precision), prec=self.precision)))
598+
', isep ' + str('{:.{prec}f}'.format(round(self.area_isep[self.j], self.precision), prec=self.precision)) + \
599+
', a-priori safep ' + str('{:.{prec}f}'.format(round(self.area_apriori_safep[self.j], self.precision), prec=self.precision)))
592600

593601
del self.j
594602
del self.mode
595603
del self.potential_elements
596604

597605

598-
def save_filter(self, filter_path, mode, precision = 5):
606+
def save_filter(self, mode, filter_path = '', precision = 5):
599607
""" Saves the filter and related statistics onto a CSV file.
600608
601609
Saves to disk the filter or its statistics (according to the specified
@@ -614,16 +622,25 @@ def save_filter(self, filter_path, mode, precision = 5):
614622
OSError: The file cannot be created.
615623
"""
616624

617-
self.filter_path = filter_path
618625
self.mode = mode
626+
self.filter_path = filter_path
619627
self.precision = precision
620628

629+
if (self.filter_path == ''):
630+
self.filter_path = 'sbf-stats-' + datetime.now().strftime("%Y%m%d-%H%M%S") + '.csv'
631+
621632
# Tries to load the hash salts from the specified file
622633
try:
623634
with open(self.filter_path, 'w') as self.filter_file:
624635

625636
if (self.mode == 0):
626637

638+
self.compute_area_fpp()
639+
self.compute_apriori_area_fpp()
640+
self.compute_apriori_area_isep()
641+
self.compute_area_isep()
642+
self.expected_area_cells()
643+
627644
self.filter_file.write("hash_family" + ";" + self.hash_family + "\n")
628645
self.filter_file.write("hash_number" + ";" + str(self.num_hashes) + "\n")
629646
self.filter_file.write("area_number" + ";" + str(self.num_areas) + "\n")
@@ -636,19 +653,15 @@ def save_filter(self, filter_path, mode, precision = 5):
636653
self.filter_file.write("sparsity" + ";" + str('{:.{prec}f}'.format(round(self.filter_sparsity(), self.precision), prec=self.precision)) + "\n")
637654
self.filter_file.write("a-priori fpp" + ";" + str('{:.{prec}f}'.format(round(self.filter_apriori_fpp(), self.precision), prec=self.precision)) + "\n")
638655
self.filter_file.write("fpp" + ";" + str('{:.{prec}f}'.format(round(self.filter_fpp(), self.precision), prec=self.precision)) + "\n")
639-
self.filter_file.write("area;members;expected cells;self-collisions;cells;expected emersion;emersion;a-priori fpp;fpp;a-priori isep;expected ise;isep\n")
656+
self.filter_file.write("a-priori safeness probability" + ";" + str('{:.{prec}f}'.format(round(self.safeness, self.precision), prec=self.precision)) + "\n")
657+
self.filter_file.write("area;members;expected cells;self-collisions;cells;expected emersion;emersion;a-priori fpp;fpp;a-priori isep;expected ise;isep;a-priori safep\n")
640658

641659
# area-related parameters:
642660
# area, members, expected cells, self-collisions, cells, expected emersion, emersion, apriori_fpp, fpp, apriori_isep, expected ise, isep
643-
self.compute_area_fpp()
644-
self.compute_apriori_area_fpp()
645-
self.compute_apriori_area_isep()
646-
self.compute_area_isep()
647-
self.expected_area_cells()
648661
for self.j in range(1, self.num_areas+1):
649662
self.filter_file.write(str(self.j) + ";" + \
650663
str(self.area_members[self.j]) + ";" + \
651-
str('{:.{prec}f}'.format(round(self.area_expected_cells[self.j], self.precision), prec=self.precision)) + ";" + \
664+
str(round(self.area_expected_cells[self.j])) + ";" + \
652665
str(self.area_self_collisions[self.j]) + ";" + \
653666
str(self.area_cells[self.j]) + ";" + \
654667
str('{:.{prec}f}'.format(round(self.expected_area_emersion(self.j), self.precision), prec=self.precision)) + ";" + \
@@ -657,7 +670,8 @@ def save_filter(self, filter_path, mode, precision = 5):
657670
str('{:.{prec}f}'.format(round(self.area_fpp[self.j], self.precision), prec=self.precision)) + ";" + \
658671
str('{:.{prec}f}'.format(round(self.area_apriori_isep[self.j], self.precision), prec=self.precision)) + ";" + \
659672
str('{:.{prec}f}'.format(round((self.area_apriori_isep[self.j] * self.area_members[self.j]), self.precision), prec=self.precision)) + ";" + \
660-
str('{:.{prec}f}'.format(round(self.area_isep[self.j], self.precision), prec=self.precision)) + "\n")
673+
str('{:.{prec}f}'.format(round(self.area_isep[self.j], self.precision), prec=self.precision)) + ";" + \
674+
str('{:.{prec}f}'.format(round(self.area_apriori_safep[self.j], self.precision), prec=self.precision)) + "\n")
661675

662676
del self.j
663677

@@ -818,25 +832,35 @@ def compute_apriori_area_isep(self):
818832
"""
819833

820834
self.area_apriori_isep = [0]*(self.num_areas + 1)
835+
self.area_apriori_safep = [0]*(self.num_areas + 1)
836+
self.safeness = 1
821837

822838
for self.i in range(self.num_areas, 0, -1):
823839

824840
self.nfill = 0
825-
self.p = 0
841+
self.p1 = 0
842+
self.p2 = 0
826843

827844
for self.j in range(self.i+1, self.num_areas+1):
828845
self.nfill += self.area_members[self.j]
829846

830-
self.p = 1 - (1 / self.num_cells)
847+
self.p1 = 1 - (1 / self.num_cells)
831848

832-
self.p = 1 - pow(self.p, (self.num_hashes * self.nfill))
849+
self.p1 = 1 - pow(self.p1, (self.num_hashes * self.nfill))
833850

834-
self.p = pow(self.p, self.num_hashes)
851+
self.p1 = pow(self.p1, self.num_hashes)
852+
853+
self.p2 = 1 - self.p1
854+
self.p2 = pow(self.p2, self.area_members[self.i])
835855

836-
self.area_apriori_isep[self.i] = self.p
856+
self.safeness *= self.p2
857+
858+
self.area_apriori_isep[self.i] = self.p1
859+
self.area_apriori_safep[self.i] = self.p2
837860

838861
del self.nfill
839-
del self.p
862+
del self.p1
863+
del self.p2
840864
del self.i
841865
del self.j
842866

0 commit comments

Comments
 (0)