@@ -381,13 +381,15 @@ def __aggregate_stats_for_cache_hit_error_rate(run_dirs: List[str], z: float = 1
381381def __get_result_files (results_dir : str ):
382382 if not os .path .exists (results_dir ):
383383 print (f"No results found in { results_dir } " )
384- return [], [], [], [], []
384+ return [], [], [], [], [], [], []
385385
386386 gptcache_files : List [str ] = []
387387 vcache_local_files : List [str ] = []
388388 vcache_global_files : List [str ] = []
389389 berkeley_embedding_files : List [str ] = []
390390 vcache_berkeley_embedding_files : List [str ] = []
391+ sigmoid_probability_files : List [str ] = []
392+ sigmoid_only_files : List [str ] = []
391393
392394 for d in os .listdir (results_dir ):
393395 # Process GPTCache (static threshold) directories
@@ -435,12 +437,32 @@ def __get_result_files(results_dir: str):
435437 if file .startswith ("results_" ) and file .endswith (".json" ):
436438 vcache_berkeley_embedding_files .append (os .path .join (dir_path , file ))
437439
440+ # Process Sigmoid Probability directories
441+ elif d .startswith ("sigmoid_probability_" ) and os .path .isdir (
442+ os .path .join (results_dir , d )
443+ ):
444+ dir_path : str = os .path .join (results_dir , d )
445+ for file in os .listdir (dir_path ):
446+ if file .startswith ("results_" ) and file .endswith (".json" ):
447+ sigmoid_probability_files .append (os .path .join (dir_path , file ))
448+
449+ # Process Sigmoid Only directories
450+ elif d .startswith ("sigmoid_only_" ) and os .path .isdir (
451+ os .path .join (results_dir , d )
452+ ):
453+ dir_path : str = os .path .join (results_dir , d )
454+ for file in os .listdir (dir_path ):
455+ if file .startswith ("results_" ) and file .endswith (".json" ):
456+ sigmoid_only_files .append (os .path .join (dir_path , file ))
457+
438458 return (
439459 gptcache_files ,
440460 vcache_local_files ,
441461 vcache_global_files ,
442462 berkeley_embedding_files ,
443463 vcache_berkeley_embedding_files ,
464+ sigmoid_probability_files ,
465+ sigmoid_only_files ,
444466 )
445467
446468
@@ -463,6 +485,8 @@ def generate_combined_plots(
463485 vcache_global_files ,
464486 berkeley_embedding_files ,
465487 vcache_berkeley_embedding_files ,
488+ sigmoid_probability_files ,
489+ sigmoid_only_files ,
466490 ) = __get_result_files (results_dir )
467491
468492 if (
@@ -471,6 +495,8 @@ def generate_combined_plots(
471495 and not vcache_global_files
472496 and not berkeley_embedding_files
473497 and not vcache_berkeley_embedding_files
498+ and not sigmoid_probability_files
499+ and not sigmoid_only_files
474500 ):
475501 print (
476502 f"No folders found for { dataset } , { embedding_model_name } , { llm_model_name } \n "
@@ -565,6 +591,40 @@ def generate_combined_plots(
565591 print (f"Error loading { vcache_berkeley_embedding_file_path } : { e } " )
566592 continue
567593
594+ ############################################################
595+ ### Sigmoid Probability
596+ sigmoid_probability_data_frames : Dict [float , pd .DataFrame ] = {}
597+ for sigmoid_probability_file_path in sigmoid_probability_files :
598+ with open (sigmoid_probability_file_path , "r" ) as f :
599+ try :
600+ data : Any = json .load (f )
601+ dataframe , _ , chopped_index = convert_to_dataframe_from_json_file (
602+ json_data = data , keep_split = keep_split
603+ )
604+ delta : float = data ["config" ]["delta" ]
605+ sigmoid_probability_data_frames [delta ] = dataframe
606+ chopped_index = chopped_index
607+ except Exception as e :
608+ print (f"Error loading { sigmoid_probability_file_path } : { e } " )
609+ continue
610+
611+ ############################################################
612+ ### Sigmoid Only
613+ sigmoid_only_data_frames : Dict [float , pd .DataFrame ] = {}
614+ for sigmoid_only_file_path in sigmoid_only_files :
615+ with open (sigmoid_only_file_path , "r" ) as f :
616+ try :
617+ data : Any = json .load (f )
618+ dataframe , _ , chopped_index = convert_to_dataframe_from_json_file (
619+ json_data = data , keep_split = keep_split
620+ )
621+ delta : float = data ["config" ]["delta" ]
622+ sigmoid_only_data_frames [delta ] = dataframe
623+ chopped_index = chopped_index
624+ except Exception as e :
625+ print (f"Error loading { sigmoid_only_file_path } : { e } " )
626+ continue
627+
568628 if chopped_index is None :
569629 print (
570630 f"No data found for { dataset } , { embedding_model_name } , { llm_model_name } in { results_dir } "
@@ -577,6 +637,8 @@ def generate_combined_plots(
577637 vcache_global_data_frames = vcache_global_data_frames ,
578638 berkeley_embedding_data_frames = berkeley_embedding_data_frames ,
579639 vcache_berkeley_embedding_data_frames = vcache_berkeley_embedding_data_frames ,
640+ sigmoid_probability_data_frames = sigmoid_probability_data_frames ,
641+ sigmoid_only_data_frames = sigmoid_only_data_frames ,
580642 results_dir = results_dir ,
581643 timestamp = timestamp ,
582644 font_size = font_size ,
@@ -620,6 +682,8 @@ def generate_combined_plots(
620682 vcache_global_data_frames = vcache_global_data_frames ,
621683 berkeley_embedding_data_frames = berkeley_embedding_data_frames ,
622684 vcache_berkeley_embedding_data_frames = vcache_berkeley_embedding_data_frames ,
685+ sigmoid_probability_data_frames = sigmoid_probability_data_frames ,
686+ sigmoid_only_data_frames = sigmoid_only_data_frames ,
623687 results_dir = results_dir ,
624688 timestamp = timestamp ,
625689 font_size = font_size ,
@@ -635,6 +699,8 @@ def generate_combined_plots(
635699 vcache_global_data_frames = vcache_global_data_frames ,
636700 berkeley_embedding_data_frames = berkeley_embedding_data_frames ,
637701 vcache_berkeley_embedding_data_frames = vcache_berkeley_embedding_data_frames ,
702+ sigmoid_probability_data_frames = sigmoid_probability_data_frames ,
703+ sigmoid_only_data_frames = sigmoid_only_data_frames ,
638704 results_dir = results_dir ,
639705 timestamp = timestamp ,
640706 font_size = font_size ,
@@ -679,6 +745,8 @@ def __plot_legend(
679745 vcache_global_data_frames : Dict [float , pd .DataFrame ],
680746 berkeley_embedding_data_frames : Dict [float , pd .DataFrame ],
681747 vcache_berkeley_embedding_data_frames : Dict [float , pd .DataFrame ],
748+ sigmoid_probability_data_frames : Dict [float , pd .DataFrame ],
749+ sigmoid_only_data_frames : Dict [float , pd .DataFrame ],
682750 results_dir : str ,
683751 timestamp : str ,
684752 font_size : int ,
@@ -760,6 +828,34 @@ def __plot_legend(
760828 )
761829 labels .append ("Fine-tuned Embedding" )
762830
831+ if sigmoid_probability_data_frames :
832+ lines .append (
833+ Line2D (
834+ [0 ],
835+ [0 ],
836+ color = "#89D572" ,
837+ linewidth = 3 ,
838+ linestyle = "-" ,
839+ marker = "o" ,
840+ markersize = 8 ,
841+ )
842+ )
843+ labels .append ("Sigmoid Probability" )
844+
845+ if sigmoid_only_data_frames :
846+ lines .append (
847+ Line2D (
848+ [0 ],
849+ [0 ],
850+ color = "#E2A043" ,
851+ linewidth = 3 ,
852+ linestyle = "-" ,
853+ marker = "o" ,
854+ markersize = 8 ,
855+ )
856+ )
857+ labels .append ("Sigmoid Only" )
858+
763859 ax .legend (lines , labels , loc = "center" , ncol = 2 , fontsize = font_size , frameon = False )
764860
765861 legend_filename = results_dir + "/legend.pdf"
@@ -1245,6 +1341,8 @@ def __plot_avg_latency_vs_error_rate(
12451341 vcache_global_data_frames : Dict [float , pd .DataFrame ],
12461342 berkeley_embedding_data_frames : Dict [float , pd .DataFrame ],
12471343 vcache_berkeley_embedding_data_frames : Dict [float , pd .DataFrame ],
1344+ sigmoid_probability_data_frames : Dict [float , pd .DataFrame ],
1345+ sigmoid_only_data_frames : Dict [float , pd .DataFrame ],
12481346 results_dir : str ,
12491347 timestamp : str ,
12501348 font_size : int ,
@@ -1269,6 +1367,12 @@ def __plot_avg_latency_vs_error_rate(
12691367 vcache_berkeley_embedding_run_dirs_map = __collect_run_dirs_by_prefix_and_key (
12701368 results_dir , "vcache_berkeley_embedding_"
12711369 )
1370+ sigmoid_probability_run_dirs_map = __collect_run_dirs_by_prefix_and_key (
1371+ results_dir , "sigmoid_probability_"
1372+ )
1373+ sigmoid_only_run_dirs_map = __collect_run_dirs_by_prefix_and_key (
1374+ results_dir , "sigmoid_only_"
1375+ )
12721376
12731377 avg_latency_no_cache = - 1 # Initialize
12741378
@@ -1470,6 +1574,52 @@ def prepare_latency_error_series_data(
14701574 8 ,
14711575 )
14721576
1577+ ############################################################
1578+ ### Sigmoid Probability
1579+ if sigmoid_probability_data_frames :
1580+ sp_lat , sp_err , sp_lat_le , sp_lat_ue , sp_err_le , sp_err_ue , sp_multi = (
1581+ prepare_latency_error_series_data (
1582+ sigmoid_probability_data_frames ,
1583+ sigmoid_probability_run_dirs_map ,
1584+ ERROR_RATE_UPPER_BOUND ,
1585+ )
1586+ )
1587+ __draw_confidence_series (
1588+ sp_lat ,
1589+ sp_err ,
1590+ sp_lat_le ,
1591+ sp_lat_ue ,
1592+ sp_err_le ,
1593+ sp_err_ue ,
1594+ sp_multi ,
1595+ "#89D572" ,
1596+ "Sigmoid Probability" ,
1597+ 8 ,
1598+ )
1599+
1600+ ############################################################
1601+ ### Sigmoid Only
1602+ if sigmoid_only_data_frames :
1603+ so_lat , so_err , so_lat_le , so_lat_ue , so_err_le , so_err_ue , so_multi = (
1604+ prepare_latency_error_series_data (
1605+ sigmoid_only_data_frames ,
1606+ sigmoid_only_run_dirs_map ,
1607+ ERROR_RATE_UPPER_BOUND ,
1608+ )
1609+ )
1610+ __draw_confidence_series (
1611+ so_lat ,
1612+ so_err ,
1613+ so_lat_le ,
1614+ so_lat_ue ,
1615+ so_err_le ,
1616+ so_err_ue ,
1617+ so_multi ,
1618+ "#E2A043" ,
1619+ "Sigmoid Only" ,
1620+ 8 ,
1621+ )
1622+
14731623 ############################################################
14741624 ### Baseline: No Cache Plotting
14751625 # Ensure avg_latency_no_cache has been set by one of the prepare_latency_error_series_data calls
@@ -1508,6 +1658,8 @@ def __plot_cache_hit_vs_error_rate(
15081658 vcache_global_data_frames : Dict [float , pd .DataFrame ],
15091659 berkeley_embedding_data_frames : Dict [float , pd .DataFrame ],
15101660 vcache_berkeley_embedding_data_frames : Dict [float , pd .DataFrame ],
1661+ sigmoid_probability_data_frames : Dict [float , pd .DataFrame ],
1662+ sigmoid_only_data_frames : Dict [float , pd .DataFrame ],
15111663 results_dir : str ,
15121664 timestamp : str ,
15131665 font_size : int ,
@@ -1532,6 +1684,12 @@ def __plot_cache_hit_vs_error_rate(
15321684 vcache_berkeley_embedding_run_dirs_map = __collect_run_dirs_by_prefix_and_key (
15331685 results_dir , "vcache_berkeley_embedding_"
15341686 )
1687+ sigmoid_probability_run_dirs_map = __collect_run_dirs_by_prefix_and_key (
1688+ results_dir , "sigmoid_probability_"
1689+ )
1690+ sigmoid_only_run_dirs_map = __collect_run_dirs_by_prefix_and_key (
1691+ results_dir , "sigmoid_only_"
1692+ )
15351693
15361694 # Helper to prepare data for a series
15371695 def prepare_cache_hit_error_series_data (
@@ -1718,6 +1876,50 @@ def prepare_cache_hit_error_series_data(
17181876 8 ,
17191877 )
17201878
1879+ # Sigmoid Probability
1880+ if sigmoid_probability_data_frames :
1881+ sp_err , sp_ch , sp_err_le , sp_err_ue , sp_ch_le , sp_ch_ue , sp_multi = (
1882+ prepare_cache_hit_error_series_data (
1883+ sigmoid_probability_data_frames ,
1884+ sigmoid_probability_run_dirs_map ,
1885+ ERROR_RATE_UPPER_BOUND ,
1886+ )
1887+ )
1888+ __draw_confidence_series (
1889+ sp_err ,
1890+ sp_ch ,
1891+ sp_err_le ,
1892+ sp_err_ue ,
1893+ sp_ch_le ,
1894+ sp_ch_ue ,
1895+ sp_multi ,
1896+ "#89D572" ,
1897+ "Sigmoid Probability" ,
1898+ 8 ,
1899+ )
1900+
1901+ # Sigmoid Only
1902+ if sigmoid_only_data_frames :
1903+ so_err , so_ch , so_err_le , so_err_ue , so_ch_le , so_ch_ue , so_multi = (
1904+ prepare_cache_hit_error_series_data (
1905+ sigmoid_only_data_frames ,
1906+ sigmoid_only_run_dirs_map ,
1907+ ERROR_RATE_UPPER_BOUND ,
1908+ )
1909+ )
1910+ __draw_confidence_series (
1911+ so_err ,
1912+ so_ch ,
1913+ so_err_le ,
1914+ so_err_ue ,
1915+ so_ch_le ,
1916+ so_ch_ue ,
1917+ so_multi ,
1918+ "#E2A043" ,
1919+ "Sigmoid Only" ,
1920+ 8 ,
1921+ )
1922+
17211923 plt .xlabel ("Error Rate (%)" , fontsize = font_size )
17221924 plt .ylabel ("Cache Hit Rate (%)" , fontsize = font_size )
17231925 plt .tick_params (axis = "both" , labelsize = font_size - 2 )
0 commit comments