FIX validation scripts

mlindauer · mlindauer · commit 3ec656ce8793 · 2018-04-20T20:58:26.000+02:00
* Consider training set for computing SBS
diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ In `validation/`, we provide a script to validate your results files
 on known test data. Please note that all files of the test scenario has to be provided for this script.
 Example call:
 
-```python validation/validate_cli.py --result_fn results.json --test_as example_files/SAT11-INDU-TRAIN/``` 
+```python validation/validate_cli.py --result_fn results.json --test_as example_files/SAT11-INDU-TEST/  --train_as example_files/SAT11-INDU-TRAIN/``` 
 
 Add "." to your PYTHONPATH to avoid import errors, e.g., 
 ```export PYTHONPATH=.//:$PYTHONPATH```
diff --git a/validation/validate.py b/validation/validate.py
@@ -35,6 +35,7 @@ def __init__(self, runtime_cutoff:int,
 
         self.runtime_cutoff = runtime_cutoff
         self.maximize = maximize
+        self.worse_than_sbs = 0 # int counter
         
         self.logger = logging.getLogger("Stats")
 
@@ -93,16 +94,19 @@ def show(self, remove_unsolvable: bool=True):
         
         if self.maximize:
             self.logger.info("Gap closed: %.4f" %((par10 - self.sbs_par10) / (self.oracle_par10 - self.sbs_par10)))
+            self.logger.info("Gap remaining: %.4f" %((self.oracle_par10 - par10) / (self.oracle_par10 - self.sbs_par10)))
         else:
             self.logger.info("Gap closed: %.4f" %((self.sbs_par10 - par10) / (self.sbs_par10 - self.oracle_par10)))
+            self.logger.info("Gap remaining: %.4f" %((par10 - self.oracle_par10) / (self.sbs_par10 - self.oracle_par10)))
             
 class Validator(object):
 
     def __init__(self):
         ''' Constructor '''
         self.logger = logging.getLogger("Validation")
 
-    def validate_runtime(self, schedules: dict, test_scenario: ASlibScenario):
+    def validate_runtime(self, schedules: dict, test_scenario: ASlibScenario,
+                         train_scenario: ASlibScenario):
         '''
             validate selected schedules on test instances for runtime
 
@@ -112,6 +116,8 @@ def validate_runtime(self, schedules: dict, test_scenario: ASlibScenario):
                 algorithm schedules per instance
             test_scenario: ASlibScenario
                 ASlib scenario with test instances
+            train_scenario: ASlibScenario
+                ASlib scenario with test instances -- required for SBS
         '''
         if test_scenario.performance_type[0] != "runtime":
             raise ValueError("Cannot validate non-runtime scenario with runtime validation method")
@@ -133,7 +139,8 @@ def validate_runtime(self, schedules: dict, test_scenario: ASlibScenario):
             sys.exit(1)
             
         stat.oracle_par10 = test_scenario.performance_data.min(axis=1).sum()
-        stat.sbs_par10 = test_scenario.performance_data.sum(axis=0).min()
+        sbs = train_scenario.performance_data.sum(axis=0).argmin()
+        stat.sbs_par10 = test_scenario.performance_data.sum(axis=0)[sbs]
 
         for inst, schedule in schedules.items():
             self.logger.debug("Validate: %s on %s" % (schedule, inst))
@@ -195,7 +202,8 @@ def validate_runtime(self, schedules: dict, test_scenario: ASlibScenario):
 
         return stat
 
-    def validate_quality(self, schedules: dict, test_scenario: ASlibScenario):
+    def validate_quality(self, schedules: dict, test_scenario: ASlibScenario,
+                         train_scenario: ASlibScenario):
         '''
             validate selected schedules on test instances for solution quality
 
@@ -205,6 +213,8 @@ def validate_quality(self, schedules: dict, test_scenario: ASlibScenario):
                 algorithm schedules per instance
             test_scenario: ASlibScenario
                 ASlib scenario with test instances
+            train_scenario: ASlibScenario
+                ASlib scenario with test instances -- required for SBS
         '''
         if test_scenario.performance_type[0] != "solution_quality":
             raise ValueError("Cannot validate non-solution_quality scenario with solution_quality validation method")
@@ -213,6 +223,7 @@ def validate_quality(self, schedules: dict, test_scenario: ASlibScenario):
         
         if test_scenario.maximize[0]:
             test_scenario.performance_data *= -1
+            train_scenario.performance_data *= -1
             self.logger.debug("Removing *-1 in performance data because of maximization")
         
         stat = Stats(runtime_cutoff=None, maximize=test_scenario.maximize[0])
@@ -224,10 +235,12 @@ def validate_quality(self, schedules: dict, test_scenario: ASlibScenario):
         
         if test_scenario.maximize[0]: 
             stat.oracle_par10 = test_scenario.performance_data.max(axis=1).sum()
-            stat.sbs_par10 = test_scenario.performance_data.sum(axis=0).max()
+            sbs = train_scenario.performance_data.sum(axis=0).argmax()
+            stat.sbs_par10 = test_scenario.performance_data.sum(axis=0)[sbs]
         else:
             stat.oracle_par10 = test_scenario.performance_data.min(axis=1).sum()
-            stat.sbs_par10 = test_scenario.performance_data.sum(axis=0).min()
+            sbs = train_scenario.performance_data.sum(axis=0).argmin()
+            stat.sbs_par10 = test_scenario.performance_data.sum(axis=0)[sbs]
     
         for inst, schedule in schedules.items():
             
@@ -252,6 +265,14 @@ def validate_quality(self, schedules: dict, test_scenario: ASlibScenario):
             
             stat.par1 += perf
             stat.solved += 1
+            if test_scenario.maximize[0]:
+                if perf < test_scenario.performance_data[sbs][inst]:
+                    stat.worse_than_sbs += 1
+                    print("%s(%.3f) vs %s (%.3f)" %(selected_algo, perf, sbs, test_scenario.performance_data[sbs][inst]))
+            else:
+                if perf > test_scenario.performance_data[sbs][inst]:
+                    stat.worse_than_sbs += 1
+                    print("%s(%.3f) vs %s (%.3f)" %(selected_algo, perf, sbs, test_scenario.performance_data[sbs][inst]))
         
         stat.show(remove_unsolvable=False)
         
diff --git a/validation/validate_cli.py b/validation/validate_cli.py
@@ -14,20 +14,23 @@
     parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
     parser.add_argument("--result_fn", help="Result json file with predictions for each test instances")
     parser.add_argument("--test_as", help="Directory with *all* test data in ASlib format")
+    parser.add_argument("--train_as", help="Directory with *all* train data in ASlib format")
     
     args_ = parser.parse_args()
     
-    #read scenario
-    scenario = ASlibScenario()
-    scenario.read_scenario(dn=args_.test_as)
+    #read scenarios
+    test_scenario = ASlibScenario()
+    test_scenario.read_scenario(dn=args_.test_as)
+    train_scenario = ASlibScenario()
+    train_scenario.read_scenario(dn=args_.train_as)
     
     # read result file
     with open(args_.result_fn) as fp:
         schedules = json.load(fp)
     
     validator = Validator()
     
-    if scenario.performance_type[0] == "runtime":
-        validator.validate_runtime(schedules=schedules, test_scenario=scenario)
+    if test_scenario.performance_type[0] == "runtime":
+        validator.validate_runtime(schedules=schedules, test_scenario=test_scenario, train_scenario=train_scenario)
     else:
-        validator.validate_quality(schedules=schedules, test_scenario=scenario)
+        validator.validate_quality(schedules=schedules, test_scenario=test_scenario, train_scenario=train_scenario)