@@ -1038,8 +1038,8 @@ class ParametersRunning(ParametersBase):
10381038 - "tensorboard": Tensorboard logger.
10391039 - "wandb": Weights and Biases logger.
10401040
1041- validation_metrics : list
1042- List of metrics to be used for validation . Default is ["ldos"].
1041+ logging_metrics : list
1042+ List of metrics to be used for logging . Default is ["ldos"].
10431043 Possible options are:
10441044
10451045 - "ldos": MSE of the LDOS.
@@ -1049,15 +1049,21 @@ class ParametersRunning(ParametersBase):
10491049 - "total_energy_actual_fe": Total energy computed with ground truth Fermi energy.
10501050 - "fermi_energy": Fermi energy.
10511051 - "density": Electron density.
1052- - "density_relative": Rlectron density (MAPE).
1052+ - "density_relative": Electron density (MAPE).
10531053 - "dos": Density of states.
10541054 - "dos_relative": Density of states (MAPE).
1055+
1056+ The units for energy metrics are meV/atom.
1057+ Selected metrics are evalauted every `logging_metrics_interval` (see below) epochs.
1058+ To use the energy metrics the validation snapshots need not be shuffled.
1059+ Note that evaluating the energy metrics takes considerably longer than just LDOS
1060+ and therefore it is discouraged.
10551061
1056- validate_on_training_data : bool
1057- Whether to validate on the training data as well . Default is False.
1062+ log_metrics_on_train_set : bool
1063+ Whether to also log metrics evaluated on the training set . Default is False.
10581064
1059- validate_every_n_epochs : int
1060- Determines how often validation is performed . Default is 1.
1065+ logging_metrics_interval : int
1066+ Determines how often (in the unit of epochs) metrics are logged . Default is 1.
10611067
10621068 training_log_interval : int
10631069 Determines how often detailed performance info is printed during
@@ -1103,8 +1109,8 @@ def __init__(self):
11031109 self .learning_rate_scheduler = None
11041110 self .learning_rate_decay = 0.1
11051111 self .learning_rate_patience = 0
1106- self ._during_training_metric = "ldos"
1107- self ._after_training_metric = "ldos"
1112+ self ._validation_metric = "ldos"
1113+ self ._final_validation_metric = "ldos"
11081114 # self.use_compression = False
11091115 self .num_workers = 0
11101116 self .use_shuffling_for_samplers = True
@@ -1116,9 +1122,9 @@ def __init__(self):
11161122 self .logging_dir = "./mala_logging"
11171123 self .logging_dir_append_date = True
11181124 self .logger = None
1119- self .validation_metrics = ["ldos" ]
1120- self .validate_on_training_data = False
1121- self .validate_every_n_epochs = 1
1125+ self .logging_metrics = ["ldos" ]
1126+ self .log_metrics_on_train_set = False
1127+ self .logging_metrics_interval = 1
11221128 self .inference_data_grid = [0 , 0 , 0 ]
11231129 self .use_mixed_precision = False
11241130 self .use_graphs = False
@@ -1137,60 +1143,75 @@ def _update_ddp(self, new_ddp):
11371143 New DDP setting.
11381144 """
11391145 super (ParametersRunning , self )._update_ddp (new_ddp )
1140- self .during_training_metric = self .during_training_metric
1141- self .after_training_metric = self .after_training_metric
1146+ self .validation_metric = self .validation_metric
1147+ self .final_validation_metric = self .final_validation_metric
11421148
11431149 @property
1144- def during_training_metric (self ):
1150+ def validation_metric (self ):
11451151 """
1146- Control the metric used during training .
1152+ Control the metric used for validation .
11471153
1148- Metric for evaluated on the validation set during training.
1154+ Metric to be evaluated on the validation set during training.
11491155 Default is "ldos", meaning that the regular loss on the LDOS will be
1150- used as a metric. Possible options are "band_energy" and
1151- "total_energy". For these, the band resp. total energy of the
1152- validation snapshots will be calculated and compared to the provided
1153- DFT results. Of these, the mean average error in eV/atom will be
1154- calculated.
1155- """
1156- return self ._during_training_metric
1156+ used as a metric.
1157+
1158+ Possible options are:
11571159
1158- @during_training_metric .setter
1159- def during_training_metric (self , value ):
1160+ - "ldos": MSE of the LDOS.
1161+ - "band_energy": Band energy.
1162+ - "band_energy_actual_fe": Band energy computed with ground truth Fermi energy.
1163+ - "total_energy": Total energy.
1164+ - "total_energy_actual_fe": Total energy computed with ground truth Fermi energy.
1165+ - "fermi_energy": Fermi energy.
1166+ - "density": Electron density.
1167+ - "density_relative": Electron density (MAPE).
1168+ - "dos": Density of states.
1169+ - "dos_relative": Density of states (MAPE).
1170+
1171+ The units for energy metrics are meV/atom.
1172+ Selected metric is evalauted after every epoch on the validation set.
1173+ The validation metric is used as a criterion for early stopping and also
1174+ for checkpointing the best model.
1175+ Note that evaluating the energy metrics takes considerably longer than LDOS
1176+ and therefore it is discouraged.
1177+ """
1178+ return self ._validation_metric
1179+
1180+ @validation_metric .setter
1181+ def validation_metric (self , value ):
11601182 if value != "ldos" :
11611183 if self ._configuration ["ddp" ]:
11621184 raise Exception (
11631185 "Currently, MALA can only operate with the "
11641186 '"ldos" metric for ddp runs.'
11651187 )
1166- if value not in self .validation_metrics :
1167- self .validation_metrics .append (value )
1168- self ._during_training_metric = value
1188+ if value not in self .logging_metrics :
1189+ self .logging_metrics .append (value )
1190+ self ._validation_metric = value
11691191
11701192 @property
1171- def after_training_metric (self ):
1193+ def final_validation_metric (self ):
11721194 """
1173- Get the metric used during training .
1195+ Metric for final model evaluation .
11741196
1175- Metric for evaluated on the validation and test set before and after
1176- training. Default is "LDOS", meaning that the regular loss on the LDOS
1177- will be used as a metric. Possible options are "band_energy" and
1178- "total_energy". For these, the band resp. total energy of the
1179- validation snapshots will be calculated and compared to the provided
1180- DFT results. Of these, the mean average error in eV/atom will be
1181- calculated.
1197+ This metric is evaluated on the validation set after training.
1198+ Available options are the same as for `validation_metric`.
1199+ Default is "LDOS", meaning that MSE of the LDOS
1200+ will be used as a metric.
1201+ The final validation metric is used as a target
1202+ for hyperparameter optimization.
11821203 """
1183- return self ._after_training_metric
1204+ return self ._final_validation_metric
11841205
1185- @after_training_metric .setter
1186- def after_training_metric (self , value ):
1206+ @final_validation_metric .setter
1207+ def final_validation_metric (self , value ):
11871208 if value != "ldos" :
11881209 if self ._configuration ["ddp" ]:
11891210 raise Exception (
11901211 "Currently, MALA can only operate with the "
11911212 '"ldos" metric for ddp runs.'
11921213 )
1193- self ._after_training_metric = value
1214+ self ._final_validation_metric = value
11941215
11951216 @property
11961217 def use_graphs (self ):
0 commit comments