Add serialisation for sampler statistics.

lohedges · lohedges · commit 795f3773a9db · 2026-04-01T12:03:38.000+01:00
diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py
@@ -1197,6 +1197,7 @@ def increment_filename(base_filename, suffix):
             output_directory / f"energy_components_{lam}.txt"
         )
         filenames["gcmc_ghosts"] = str(output_directory / f"gcmc_ghosts_{lam}.txt")
+        filenames["sampler_stats"] = str(output_directory / f"sampler_stats_{lam}.pkl")
         if restart:
             filenames["config"] = str(
                 output_directory / increment_filename("config", "yaml")
diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py
@@ -106,6 +106,8 @@ def __init__(
         self._openmm_states = [None] * len(lambdas)
         self._gcmc_samplers = [None] * len(lambdas)
         self._gcmc_states = [None] * len(lambdas)
+        self._gcmc_stats = [None] * len(lambdas)
+        self._terminal_flip_stats = [[0, 0]] * len(lambdas)
         self._num_proposed = _np.matrix(_np.zeros((len(lambdas), len(lambdas))))
         self._num_accepted = _np.matrix(_np.zeros((len(lambdas), len(lambdas))))
         self._num_swaps = _np.matrix(_np.zeros((len(lambdas), len(lambdas))))
@@ -130,6 +132,14 @@ def __setstate__(self, state):
         for key, value in state.items():
             setattr(self, key, value)
 
+        # Provide defaults for attributes added after the initial release,
+        # so that old checkpoint files can still be loaded.
+        n = len(self._lambdas)
+        if not hasattr(self, "_gcmc_stats"):
+            self._gcmc_stats = [None] * n
+        if not hasattr(self, "_terminal_flip_stats"):
+            self._terminal_flip_stats = [[0, 0]] * n
+
     def __getstate__(self):
         """
         Get the state of the object.
@@ -145,6 +155,8 @@ def __getstate__(self):
             # Don't pickle the GCMC samplers since they need to be recreated.
             "_gcmc_samplers": len(self._gcmc_samplers) * [None],
             "_gcmc_states": self._gcmc_states,
+            "_gcmc_stats": self._gcmc_stats,
+            "_terminal_flip_stats": self._terminal_flip_stats,
             "_num_proposed": self._num_proposed,
             "_num_accepted": self._num_accepted,
             "_num_swaps": self._num_swaps,
@@ -823,7 +835,7 @@ def __init__(self, system, config):
                 state = self._dynamics_cache._states[i]
                 dynamics.context().setState(self._dynamics_cache._openmm_states[state])
 
-                # Reset the GCMC water state.
+                # Reset the GCMC water state and restore statistics.
                 if gcmc_sampler is not None:
                     gcmc_sampler.push()
                     try:
@@ -834,6 +846,13 @@ def __init__(self, system, config):
                         )
                     finally:
                         gcmc_sampler.pop()
+                    if self._dynamics_cache._gcmc_stats[i] is not None:
+                        gcmc_sampler.restore_stats(self._dynamics_cache._gcmc_stats[i])
+
+                # Restore terminal flip sampler statistics.
+                if self._terminal_flip_samplers is not None:
+                    attempted, accepted = self._dynamics_cache._terminal_flip_stats[i]
+                    self._terminal_flip_samplers[i].reset(attempted, accepted)
 
         # Conversion factor for reduced potential.
         kT = (_sr.units.k_boltz * self._config.temperature).to(_sr.units.kcal_per_mol)
@@ -1190,6 +1209,7 @@ def run(self):
 
                     # Pickle the dynamics cache.
                     _logger.info("Saving replica exchange state")
+                    self._save_sampler_stats()
                     with open(self._repex_state, "wb") as f:
                         _pickle.dump(self._dynamics_cache, f)
 
@@ -1211,6 +1231,11 @@ def run(self):
 
             # Pickle final state of the dynamics cache.
             _logger.info("Saving final replica exchange state")
+            if self._terminal_flip_samplers is not None:
+                self._dynamics_cache._terminal_flip_stats = [
+                    [s.num_attempted, s.num_accepted]
+                    for s in self._terminal_flip_samplers
+                ]
             with open(self._repex_state, "wb") as f:
                 _pickle.dump(self._dynamics_cache, f)
 
@@ -1842,6 +1867,21 @@ def _mix_replicas(num_replicas, energy_matrix, proposed, accepted):
 
         return states
 
+    def _save_sampler_stats(self):
+        """
+        Save GCMC and terminal flip sampler statistics to the dynamics cache
+        prior to pickling.
+        """
+        for i in range(len(self._lambda_values)):
+            _, gcmc_sampler = self._dynamics_cache.get(i)
+            if gcmc_sampler is not None:
+                self._dynamics_cache._gcmc_stats[i] = gcmc_sampler.get_stats()
+
+        if self._terminal_flip_samplers is not None:
+            self._dynamics_cache._terminal_flip_stats = [
+                [s.num_attempted, s.num_accepted] for s in self._terminal_flip_samplers
+            ]
+
     def _save_transition_matrix(self):
         """
         Internal method to save the replica exchange transition matrix.
diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py
@@ -695,6 +695,16 @@ def generate_lam_vals(lambda_base, increment=0.001):
                 finally:
                     gcmc_sampler.pop()
 
+        # Restore sampler statistics from a previous run.
+        if self._is_restart:
+            stats = self._load_sampler_stats(index)
+            if stats is not None:
+                if gcmc_sampler is not None and "gcmc" in stats:
+                    gcmc_sampler.restore_stats(stats["gcmc"])
+                if terminal_flip_sampler is not None and "terminal_flip" in stats:
+                    attempted, accepted = stats["terminal_flip"]
+                    terminal_flip_sampler.reset(attempted, accepted)
+
         # Set the number of neighbours used for the energy calculation.
         # If not None, then we add one to account for the extra windows
         # used for finite-difference gradient analysis.
@@ -924,6 +934,11 @@ def generate_lam_vals(lambda_base, increment=0.001):
                         if error is not None:
                             raise error
 
+                        # Save sampler statistics alongside the checkpoint.
+                        self._save_sampler_stats(
+                            index, gcmc_sampler, terminal_flip_sampler
+                        )
+
                     # Delete all trajectory frames from the Sire system within the
                     # dynamics object.
                     dynamics._d._sire_mols.delete_all_frames()
@@ -1213,12 +1228,73 @@ def generate_lam_vals(lambda_base, increment=0.001):
                     _logger.error(msg)
                     raise RuntimeError(msg)
 
+                # Save sampler statistics alongside the final checkpoint.
+                self._save_sampler_stats(index, gcmc_sampler, terminal_flip_sampler)
+
             _logger.success(
                 f"{_lam_sym} = {lambda_value:.5f} complete, speed = {speed:.2f} ns day-1"
             )
 
         return time
 
+    def _save_sampler_stats(self, index, gcmc_sampler, terminal_flip_sampler):
+        """
+        Save GCMC and terminal flip sampler statistics to a pickle file.
+
+        Parameters
+        ----------
+
+        index : int
+            The index of the lambda value.
+
+        gcmc_sampler : GCMCSampler or None
+            The GCMC sampler for this replica.
+
+        terminal_flip_sampler : TerminalFlipSampler or None
+            The terminal flip sampler for this replica.
+        """
+        import pickle as _pickle
+
+        stats = {}
+        if gcmc_sampler is not None:
+            stats["gcmc"] = gcmc_sampler.get_stats()
+        if terminal_flip_sampler is not None:
+            stats["terminal_flip"] = [
+                terminal_flip_sampler.num_attempted,
+                terminal_flip_sampler.num_accepted,
+            ]
+        with open(self._filenames[index]["sampler_stats"], "wb") as f:
+            _pickle.dump(stats, f)
+
+    def _load_sampler_stats(self, index):
+        """
+        Load sampler statistics from a pickle file.
+
+        Parameters
+        ----------
+
+        index : int
+            The index of the lambda value.
+
+        Returns
+        -------
+
+        dict or None
+            The sampler statistics, or None if the file does not exist.
+        """
+        import pickle as _pickle
+        from pathlib import Path as _Path
+
+        path = _Path(self._filenames[index]["sampler_stats"])
+        if not path.exists():
+            return None
+        try:
+            with open(path, "rb") as f:
+                return _pickle.load(f)
+        except Exception as e:
+            _logger.warning(f"Could not load sampler stats for index {index}: {e}")
+            return None
+
     def _minimisation(
         self,
         system,
diff --git a/src/somd2/runner/_samplers/_terminal_flip.py b/src/somd2/runner/_samplers/_terminal_flip.py
@@ -539,3 +539,19 @@ def acceptance_rate(self):
         if self._num_attempted == 0:
             return 0.0
         return self._num_accepted / self._num_attempted
+
+    def reset(self, num_attempted=0, num_accepted=0):
+        """
+        Reset the move counters.
+
+        Parameters
+        ----------
+
+        num_attempted : int
+            Value to restore ``num_attempted`` to. Defaults to 0.
+
+        num_accepted : int
+            Value to restore ``num_accepted`` to. Defaults to 0.
+        """
+        self._num_attempted = num_attempted
+        self._num_accepted = num_accepted

Original file line number	Diff line number	Diff line change
`@@ -1197,6 +1197,7 @@ def increment_filename(base_filename, suffix):`
`1197`	`1197`	`output_directory / f"energy_components_{lam}.txt"`
`1198`	`1198`	`)`
`1199`	`1199`	`filenames["gcmc_ghosts"] = str(output_directory / f"gcmc_ghosts_{lam}.txt")`
	`1200`	`+ filenames["sampler_stats"] = str(output_directory / f"sampler_stats_{lam}.pkl")`
`1200`	`1201`	`if restart:`
`1201`	`1202`	`filenames["config"] = str(`
`1202`	`1203`	`output_directory / increment_filename("config", "yaml")`