1+ import datetime
12import json
23import os
4+ import subprocess
35
4- from eval .BERTscore_eval import run_bertscore_eval
5- from eval .bleu_eval import run_bleu_eval
6- from eval .LLM_as_Judge import SemanticEvaluator
7- from eval .rouge_eval import run_rouge_eval
6+ from eval_generation .BERTscore_eval import run_bertscore_eval
7+ from eval_generation .bleu_eval import run_bleu_eval
8+ from eval_generation .LLM_as_Judge import SemanticEvaluator
9+ from eval_generation .rouge_eval import run_rouge_eval
810
911
1012def run_semantic_evaluation (save_results_path , test_data_path ):
@@ -19,17 +21,45 @@ def load_config(config_path):
1921 return config
2022
2123
24+ def run_notebook_with_date (input_notebook_path , output_dir ):
25+ """
26+ Executes a Jupyter notebook and saves the output with the current date in the filename.
27+ """
28+ date_str = datetime .datetime .now ().strftime ("%Y%m%d" )
29+ base_name = os .path .splitext (os .path .basename (input_notebook_path ))[0 ]
30+ output_notebook = f"{ base_name } _{ date_str } .ipynb"
31+ output_path = os .path .join (output_dir , output_notebook )
32+ # Run the notebook and save the output
33+ subprocess .run (
34+ [
35+ "jupyter" ,
36+ "nbconvert" ,
37+ "--to" ,
38+ "notebook" ,
39+ "--execute" ,
40+ "--output" ,
41+ output_path ,
42+ input_notebook_path ,
43+ ],
44+ check = True ,
45+ )
46+ print (f"Executed notebook saved to: { output_path } " )
47+
48+
2249def main ():
2350 cwd = os .getcwd ()
2451 # Paths
25- test_data_path = os .path .join (cwd , "eval/data/test_samples_german_faq.csv" )
26- results_dir = os .path .join (cwd , "eval/results" )
52+ test_data_path = os .path .join (
53+ cwd , "eval_generation/data/test_samples_german_faq.csv"
54+ )
55+ date_str = datetime .datetime .now ().strftime ("%Y%m%d" )
56+ results_dir = os .path .join (cwd , f"eval_generation/results/results_{ date_str } " )
2757 os .makedirs (results_dir , exist_ok = True )
2858 llm_judge_results_path = os .path .join (results_dir , "llm_judge_results_de.csv" )
2959
3060 # 1. Run LLM as Judge and save results
3161 print ("Running LLM as Judge (semantic evaluation)..." )
32- # run_semantic_evaluation(llm_judge_results_path, test_data_path)
62+ run_semantic_evaluation (llm_judge_results_path , test_data_path )
3363
3464 # 2. Prepare config for metrics
3565 config = {
@@ -43,7 +73,7 @@ def main():
4373 }
4474
4575 # save paths to json file. The file will be used by notebooks
46- config_path = os .path .join (cwd , "eval /result_paths.json" )
76+ config_path = os .path .join (cwd , "eval_generation /result_paths.json" )
4777 with open (config_path , "w" ) as f :
4878 json .dump (config , f , indent = 4 )
4979
@@ -61,6 +91,14 @@ def main():
6191
6292 print ("All metrics computed and saved." )
6393
94+ # 6. run notebooks for visualization
95+ notebook_path = os .path .join (
96+ cwd , "eval_generation/automatic_eval/lexical_semantic_eval.ipynb"
97+ )
98+ run_notebook_with_date (notebook_path , results_dir )
99+
100+ print ("Evaluation pipeline completed." )
101+
64102
65103if __name__ == "__main__" :
66104 main ()
0 commit comments