Skip to content

Commit 12f4e6b

Browse files
authored
Merge pull request #17 from OpenBioSim/feature_vsites
Handle XED force field virtual sites
2 parents 40098c3 + 8151866 commit 12f4e6b

3 files changed

Lines changed: 283 additions & 13 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Changelog
55
-------------------------------------------------------------------------------------
66

77
* Add support for getting and restoring sampling statistics.
8+
* Handle XED force field virtual sites.
89

910
[2025.2.0](https://github.com/openbiosim/loch/compare/2025.1.0...2025.2.0) - Feb 2026
1011
-------------------------------------------------------------------------------------

src/loch/_sampler.py

Lines changed: 122 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,33 @@ def __init__(
566566
except Exception as e:
567567
raise ValueError(f"Could not prepare the system for GCMC sampling: {e}")
568568

569+
# Compute per-molecule virtual site information. Virtual sites are
570+
# appended after the real atoms of each molecule in the OpenMM system,
571+
# so all subsequent molecules have their OpenMM particle indices shifted
572+
# by the cumulative number of virtual sites in preceding molecules.
573+
(
574+
self._total_vsites,
575+
self._vsite_atom_offsets,
576+
self._mol_vsite_charges,
577+
) = self._get_vsite_offsets(self._system)
578+
579+
if self._total_vsites > 0:
580+
# Offset water oxygen indices from Sire atom indices to OpenMM
581+
# particle indices.
582+
self._water_indices = (
583+
self._water_indices + self._vsite_atom_offsets[self._water_indices]
584+
)
585+
586+
# Apply the same correction to the reference atom indices.
587+
if self._reference is not None:
588+
self._reference_indices = (
589+
self._reference_indices
590+
+ self._vsite_atom_offsets[self._reference_indices]
591+
)
592+
593+
# Update the total atom count to include virtual site particles.
594+
self._num_atoms = self._system.num_atoms() + self._total_vsites
595+
569596
# Validate the platform parameter.
570597
valid_platforms = {"auto", "cuda", "opencl"}
571598

@@ -1102,6 +1129,13 @@ def reset(self) -> None:
11021129
self._num_accepted_insertions = 0
11031130
self._num_accepted_deletions = 0
11041131

1132+
# Clear the forces.
1133+
self._nonbonded_force = None
1134+
self._custom_nonbonded_force = None
1135+
1136+
# Clear the OpenMM context.
1137+
self._openmm_context = None
1138+
11051139
def restore_stats(self, stats: dict) -> None:
11061140
"""
11071141
Restore sampler statistics from a dictionary.
@@ -1140,17 +1174,11 @@ def get_stats(self) -> dict:
11401174
"num_accepted_deletions": self._num_accepted_deletions,
11411175
}
11421176

1143-
# Clear the forces.
1144-
self._nonbonded_force = None
1145-
self._custom_nonbonded_force = None
1146-
1147-
# Clear the OpenMM context.
1148-
self._openmm_context = None
1149-
11501177
def ghost_residues(self) -> _np.ndarray:
11511178
"""
1152-
Return the current indices of the ghost water residues in the OpenMM
1153-
context.
1179+
Return the residue indices of the current ghost waters in the input
1180+
topology. These are Sire/BioSimSpace residue indices and do not
1181+
include any virtual site particles that were added on context creation.
11541182
11551183
Returns
11561184
-------
@@ -1785,6 +1813,75 @@ def _get_box_information(self, space):
17851813

17861814
return cell_matrix, cell_matrix_inverse, M
17871815

1816+
@staticmethod
1817+
def _get_vsite_offsets(system):
1818+
"""
1819+
Compute per-atom OpenMM index offsets due to virtual sites.
1820+
1821+
In OpenMM, virtual site particles are appended after the real atoms
1822+
of each molecule. Molecules that appear after a molecule with virtual
1823+
sites therefore have their OpenMM particle indices shifted relative
1824+
to their Sire atom indices.
1825+
1826+
Parameters
1827+
----------
1828+
1829+
system: sire.system.System
1830+
The molecular system.
1831+
1832+
Returns
1833+
-------
1834+
1835+
total_vsites: int
1836+
Total number of virtual site particles in the system.
1837+
1838+
atom_offsets: numpy.ndarray
1839+
Array of shape (num_sire_atoms,) where entry i is the
1840+
cumulative number of virtual sites in all molecules that
1841+
precede the molecule containing Sire atom i. Adding this
1842+
offset to a Sire atom index yields the corresponding OpenMM
1843+
particle index.
1844+
1845+
mol_vsite_charges: dict
1846+
Mapping from molecule number to a list of virtual site charges in
1847+
units of elementary charge. Only molecules that carry virtual sites
1848+
appear as keys; all other molecules are absent from the dict.
1849+
"""
1850+
n_sire_atoms = system.num_atoms()
1851+
atom_offsets = _np.zeros(n_sire_atoms, dtype=_np.int32)
1852+
mol_vsite_charges = {}
1853+
total_vsites = 0
1854+
1855+
try:
1856+
vsite_mols = system["property n_virtual_sites"].molecules()
1857+
except Exception:
1858+
# No molecules carry virtual sites.
1859+
return 0, atom_offsets, mol_vsite_charges
1860+
1861+
all_atoms = system.atoms()
1862+
1863+
for mol in vsite_mols:
1864+
n_vs = int(mol.property("n_virtual_sites"))
1865+
if n_vs <= 0:
1866+
continue
1867+
1868+
# Locate where this molecule's atoms sit in the global index space,
1869+
# then shift every subsequent atom's offset by n_vs in one operation.
1870+
mol_start = int(_np.array(all_atoms.find(mol.atoms()))[0])
1871+
mol_end = mol_start + mol.num_atoms()
1872+
atom_offsets[mol_end:] += n_vs
1873+
total_vsites += n_vs
1874+
1875+
try:
1876+
raw_charges = mol.property("vs_charges")
1877+
vs_charges = [float(raw_charges[k]) for k in range(n_vs)]
1878+
except Exception:
1879+
vs_charges = [0.0] * n_vs
1880+
1881+
mol_vsite_charges[mol.number()] = vs_charges
1882+
1883+
return total_vsites, atom_offsets, mol_vsite_charges
1884+
17881885
@staticmethod
17891886
def _get_reference_indices(system, reference):
17901887
"""
@@ -1937,6 +2034,10 @@ def _initialise_gpu_memory(self):
19372034
for q in mol.property("charge"):
19382035
charges[i] = q.value()
19392036
i += 1
2037+
# Append virtual site charges (zero LJ, non-zero charge).
2038+
for vc in self._mol_vsite_charges.get(mol.number(), []):
2039+
charges[i] = vc
2040+
i += 1
19402041

19412042
# Convert to a GPU array.
19422043
charges = self._backend.to_gpu(charges.astype(_np.float32))
@@ -1954,6 +2055,12 @@ def _initialise_gpu_memory(self):
19542055
sigmas[i] = lj.sigma().value()
19552056
epsilons[i] = lj.epsilon().value()
19562057
i += 1
2058+
# Virtual sites have zero LJ. Use sigma=1.0 Å as a
2059+
# nominal placeholder (epsilon=0 so it has no effect).
2060+
for _ in self._mol_vsite_charges.get(mol.number(), []):
2061+
sigmas[i] = 1.0
2062+
epsilons[i] = 0.0
2063+
i += 1
19572064

19582065
# Convert to GPU arrays.
19592066
sigmas = self._backend.to_gpu(sigmas.astype(_np.float32))
@@ -2063,8 +2170,9 @@ def _initialise_gpu_memory(self):
20632170

20642171
# This is a null LJ parameter.
20652172
if _np.isclose(lj.epsilon().value(), 0.0):
2066-
idx = atoms.find(atom)
2067-
is_ghost_fep[idx] = 1
2173+
sire_idx = atoms.find(atom)
2174+
omm_idx = sire_idx + int(self._vsite_atom_offsets[sire_idx])
2175+
is_ghost_fep[omm_idx] = 1
20682176

20692177
# The charge at the perturbed state is zero.
20702178
elif _np.isclose(charge1, 0.0):
@@ -2073,8 +2181,9 @@ def _initialise_gpu_memory(self):
20732181

20742182
# This is a null LJ parameter.
20752183
if _np.isclose(lj.epsilon().value(), 0.0):
2076-
idx = atoms.find(atom)
2077-
is_ghost_fep[idx] = 1
2184+
sire_idx = atoms.find(atom)
2185+
omm_idx = sire_idx + int(self._vsite_atom_offsets[sire_idx])
2186+
is_ghost_fep[omm_idx] = 1
20782187

20792188
# Convert to GPU array.
20802189
is_ghost_fep = self._backend.to_gpu(is_ghost_fep.astype(_np.int32))

tests/test_vsites.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import os
2+
3+
import pytest
4+
import sire as sr
5+
6+
from loch import GCMCSampler
7+
8+
9+
# ---------------------------------------------------------------------------
10+
# Helpers
11+
# ---------------------------------------------------------------------------
12+
13+
14+
def _add_vsites_to_mol(mol, n_vs):
15+
"""
16+
Return a copy of mol with n_vs zero-charge virtual sites all parented to
17+
atom 0.
18+
19+
Using atom 0 for all parent references works for any molecule regardless
20+
of its size. Virtual sites have zero charge so they do not affect energies,
21+
making it possible to compare sampler results with and without virtual sites.
22+
"""
23+
n_atoms = mol.num_atoms()
24+
25+
# LocalCoordinatesSite requires three parent atoms; clamp to valid range so
26+
# this helper works even for single-atom molecules (e.g. ions).
27+
p1 = min(1, n_atoms - 1)
28+
p2 = min(2, n_atoms - 1)
29+
30+
vsite_dict = {
31+
str(k): {
32+
"vs_indices": [0, p1, p2],
33+
"vs_ows": [1, 0, 0],
34+
"vs_xs": [1, -1, 0],
35+
"vs_ys": [0, 1, -1],
36+
# Offset each vsite slightly so their positions are distinct.
37+
"vs_local": [(k + 1) * 0.03, 0, 0],
38+
}
39+
for k in range(n_vs)
40+
}
41+
42+
# All vsites are children of atom 0.
43+
parents = {str(i): [] for i in range(n_atoms)}
44+
parents["0"] = list(range(n_vs))
45+
46+
cursor = mol.cursor()
47+
cursor.set("n_virtual_sites", n_vs)
48+
cursor.set("vs_charges", [0.0] * n_vs)
49+
cursor.set("virtual_sites", vsite_dict)
50+
cursor.set("parents", parents)
51+
return cursor.commit()
52+
53+
54+
# ---------------------------------------------------------------------------
55+
# Unit tests for _get_vsite_offsets (no GPU required)
56+
# ---------------------------------------------------------------------------
57+
58+
59+
def test_get_vsite_offsets_no_vsites():
60+
"""
61+
_get_vsite_offsets on a system with no virtual sites returns all-zero
62+
offsets and empty per-molecule charge lists.
63+
"""
64+
mols = sr.load_test_files("bpti.prm7", "bpti.rst7")
65+
66+
total_vsites, offsets, mol_vsite_charges = GCMCSampler._get_vsite_offsets(mols)
67+
68+
assert total_vsites == 0
69+
assert offsets.shape == (mols.num_atoms(),)
70+
assert (offsets == 0).all()
71+
assert mol_vsite_charges == {}
72+
73+
74+
def test_get_vsite_offsets_with_vsites():
75+
"""
76+
Adding N virtual sites to molecule 0 gives a zero offset for every atom
77+
inside that molecule and an offset of N for all atoms in subsequent
78+
molecules.
79+
"""
80+
mols = sr.load_test_files("bpti.prm7", "bpti.rst7")
81+
82+
n_vs = 2
83+
n_atoms_mol0 = mols[0].num_atoms()
84+
85+
mols_with_vs = mols.clone()
86+
mols_with_vs.update(_add_vsites_to_mol(mols_with_vs[0], n_vs))
87+
88+
total_vsites, offsets, mol_vsite_charges = GCMCSampler._get_vsite_offsets(
89+
mols_with_vs
90+
)
91+
92+
assert total_vsites == n_vs
93+
94+
# Atoms in molecule 0 precede no vsite-bearing molecule, so offset is 0.
95+
assert (offsets[:n_atoms_mol0] == 0).all()
96+
97+
# All atoms in molecules 1..N follow molecule 0 and are shifted by n_vs.
98+
assert (offsets[n_atoms_mol0:] == n_vs).all()
99+
100+
# Only molecule 0 appears in the dict, with n_vs zero charges.
101+
assert len(mol_vsite_charges) == 1
102+
assert list(mol_vsite_charges.values())[0] == [0.0] * n_vs
103+
104+
105+
# ---------------------------------------------------------------------------
106+
# Integration tests (GPU required)
107+
# ---------------------------------------------------------------------------
108+
109+
110+
@pytest.mark.skipif(
111+
"CUDA_VISIBLE_DEVICES" not in os.environ,
112+
reason="Requires CUDA enabled GPU.",
113+
)
114+
@pytest.mark.parametrize("platform", ["cuda", "opencl"])
115+
def test_vsite_index_offsets(bpti, platform):
116+
"""
117+
GCMCSampler correctly offsets _water_indices and _reference_indices when
118+
virtual sites are present on a preceding molecule.
119+
120+
In BPTI the protein is molecule 0 and all water molecules follow it.
121+
Adding N vsites to the protein shifts every water's OpenMM particle index
122+
by N. The reference atoms also live in the protein, so their OpenMM
123+
indices are not shifted (no vsites precede molecule 0).
124+
"""
125+
mols, reference = bpti
126+
127+
common_kwargs = dict(
128+
reference=reference,
129+
cutoff_type="rf",
130+
cutoff="10 A",
131+
ghost_file=None,
132+
log_file=None,
133+
test=True,
134+
platform=platform,
135+
seed=42,
136+
)
137+
138+
# Baseline: no virtual sites.
139+
baseline = GCMCSampler(mols, **common_kwargs)
140+
141+
# Add 2 zero-charge virtual sites to molecule 0 (the protein). All
142+
# water molecules follow the protein, so their OpenMM indices shift by 2.
143+
n_vs = 2
144+
mols_with_vs = mols.clone()
145+
mols_with_vs.update(_add_vsites_to_mol(mols_with_vs[0], n_vs))
146+
147+
sampler = GCMCSampler(mols_with_vs, **common_kwargs)
148+
149+
# Total vsite count must match what we added.
150+
assert sampler._total_vsites == n_vs
151+
152+
# _num_atoms must include the virtual site particles.
153+
assert sampler._num_atoms == baseline._num_atoms + n_vs
154+
155+
# Every water oxygen index is shifted by n_vs (waters follow the protein).
156+
assert (sampler._water_indices == baseline._water_indices + n_vs).all()
157+
158+
# Reference atoms are inside molecule 0 (the protein), which has no
159+
# preceding vsites, so their OpenMM indices are unchanged.
160+
assert (sampler._reference_indices == baseline._reference_indices).all()

0 commit comments

Comments
 (0)