Skip to content

Commit 3c8a500

Browse files
committed
Backport fixes from PR #506. [ci skip]
1 parent e77c6fd commit 3c8a500

4 files changed

Lines changed: 325 additions & 441 deletions

File tree

src/BioSimSpace/Align/_merge.py

Lines changed: 146 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def merge(
9090
The merged molecule.
9191
"""
9292
from sire.legacy import CAS as _SireCAS
93+
from sire.legacy import IO as _SireIO
9394
from sire.legacy import MM as _SireMM
9495
from sire.legacy import Base as _SireBase
9596
from sire.legacy import Mol as _SireMol
@@ -1053,23 +1054,62 @@ def merge(
10531054

10541055
# Check that the merge hasn't modified the connectivity.
10551056
if roi is None:
1057+
# Pre-compute ring membership as frozensets of integer indices for O(1)
1058+
# lookup in the inner loops, avoiding repeated Sire in_ring calls.
1059+
n_merged = edit_mol.info().num_atoms()
1060+
c0_ring = frozenset(
1061+
x for x in range(molecule0.num_atoms()) if c0.in_ring(_SireMol.AtomIdx(x))
1062+
)
1063+
c1_ring = frozenset(
1064+
x for x in range(molecule1.num_atoms()) if c1.in_ring(_SireMol.AtomIdx(x))
1065+
)
1066+
conn0_ring = frozenset(
1067+
i for i in range(n_merged) if conn0.in_ring(_SireMol.AtomIdx(i))
1068+
)
1069+
conn1_ring = frozenset(
1070+
i for i in range(n_merged) if conn1.in_ring(_SireMol.AtomIdx(i))
1071+
)
1072+
1073+
# Pre-compute merged-space AtomIdx arrays so the inner loops avoid
1074+
# repeated dict lookups and AtomIdx construction.
1075+
mol0_map = [
1076+
mol0_merged_mapping[_SireMol.AtomIdx(i)]
1077+
for i in range(molecule0.num_atoms())
1078+
]
1079+
mol1_map = [
1080+
mol1_merged_mapping[_SireMol.AtomIdx(i)]
1081+
for i in range(molecule1.num_atoms())
1082+
]
1083+
10561084
# molecule0
1057-
for x in range(0, molecule0.num_atoms()):
1058-
# Convert to an AtomIdx.
1085+
for x in range(molecule0.num_atoms()):
10591086
idx = _SireMol.AtomIdx(x)
1060-
1061-
# Map the index to its position in the merged molecule.
1062-
idx_map = mol0_merged_mapping[idx]
1087+
idx_map = mol0_map[x]
1088+
x_in_ring = x in c0_ring or idx_map.value() in conn1_ring
10631089

10641090
for y in range(x + 1, molecule0.num_atoms()):
1065-
# Convert to an AtomIdx.
10661091
idy = _SireMol.AtomIdx(y)
1092+
idy_map = mol0_map[y]
1093+
1094+
# Check whether the connectivity type has changed.
1095+
conn_type_changed = c0.connection_type(
1096+
idx, idy
1097+
) != conn1.connection_type(idx_map, idy_map)
10671098

1068-
# Map the index to its position in the merged molecule.
1069-
idy_map = mol0_merged_mapping[idy]
1099+
# Fast path: if neither atom is in a ring in either end state
1100+
# and the connectivity hasn't changed, nothing to check.
1101+
if (
1102+
not x_in_ring
1103+
and y not in c0_ring
1104+
and idy_map.value() not in conn1_ring
1105+
and not conn_type_changed
1106+
):
1107+
continue
10701108

1071-
# Was a ring opened/closed?
1072-
is_ring_broken = _is_ring_broken(c0, conn1, idx, idy, idx_map, idy_map)
1109+
# Combined ring check — calls find_paths once per connectivity.
1110+
is_ring_broken, is_ring_size_change = _check_ring(
1111+
c0, conn1, idx, idy, idx_map, idy_map
1112+
)
10731113

10741114
# A ring was broken and it is not allowed.
10751115
if is_ring_broken and not allow_ring_breaking:
@@ -1079,11 +1119,6 @@ def merge(
10791119
"to 'True'."
10801120
)
10811121

1082-
# Did a ring change size?
1083-
is_ring_size_change = _is_ring_size_changed(
1084-
c0, conn1, idx, idy, idx_map, idy_map
1085-
)
1086-
10871122
# A ring changed size and it is not allowed.
10881123
if (
10891124
not is_ring_broken
@@ -1098,36 +1133,49 @@ def merge(
10981133
"preferable."
10991134
)
11001135

1101-
# The connectivity has changed.
1102-
if c0.connection_type(idx, idy) != conn1.connection_type(
1103-
idx_map, idy_map
1136+
# The connectivity changed for an unknown reason.
1137+
if (
1138+
conn_type_changed
1139+
and not (is_ring_broken or is_ring_size_change)
1140+
and not force
11041141
):
1105-
# The connectivity changed for an unknown reason.
1106-
if not (is_ring_broken or is_ring_size_change) and not force:
1107-
raise _IncompatibleError(
1108-
"The merge has changed the molecular connectivity "
1109-
"but a ring didn't open/close or change size. "
1110-
"If you want to proceed with this mapping pass "
1111-
"'force=True'. You are warned that the resulting "
1112-
"perturbation will likely be unstable."
1113-
)
1142+
raise _IncompatibleError(
1143+
"The merge has changed the molecular connectivity "
1144+
"but a ring didn't open/close or change size. "
1145+
"If you want to proceed with this mapping pass "
1146+
"'force=True'. You are warned that the resulting "
1147+
"perturbation will likely be unstable."
1148+
)
1149+
11141150
# molecule1
1115-
for x in range(0, molecule1.num_atoms()):
1116-
# Convert to an AtomIdx.
1151+
for x in range(molecule1.num_atoms()):
11171152
idx = _SireMol.AtomIdx(x)
1118-
1119-
# Map the index to its position in the merged molecule.
1120-
idx_map = mol1_merged_mapping[idx]
1153+
idx_map = mol1_map[x]
1154+
x_in_ring = x in c1_ring or idx_map.value() in conn0_ring
11211155

11221156
for y in range(x + 1, molecule1.num_atoms()):
1123-
# Convert to an AtomIdx.
11241157
idy = _SireMol.AtomIdx(y)
1158+
idy_map = mol1_map[y]
11251159

1126-
# Map the index to its position in the merged molecule.
1127-
idy_map = mol1_merged_mapping[idy]
1160+
# Check whether the connectivity type has changed.
1161+
conn_type_changed = c1.connection_type(
1162+
idx, idy
1163+
) != conn0.connection_type(idx_map, idy_map)
11281164

1129-
# Was a ring opened/closed?
1130-
is_ring_broken = _is_ring_broken(c1, conn0, idx, idy, idx_map, idy_map)
1165+
# Fast path: if neither atom is in a ring in either end state
1166+
# and the connectivity hasn't changed, nothing to check.
1167+
if (
1168+
not x_in_ring
1169+
and y not in c1_ring
1170+
and idy_map.value() not in conn0_ring
1171+
and not conn_type_changed
1172+
):
1173+
continue
1174+
1175+
# Combined ring check — calls find_paths once per connectivity.
1176+
is_ring_broken, is_ring_size_change = _check_ring(
1177+
c1, conn0, idx, idy, idx_map, idy_map
1178+
)
11311179

11321180
# A ring was broken and it is not allowed.
11331181
if is_ring_broken and not allow_ring_breaking:
@@ -1137,11 +1185,6 @@ def merge(
11371185
"to 'True'."
11381186
)
11391187

1140-
# Did a ring change size?
1141-
is_ring_size_change = _is_ring_size_changed(
1142-
c1, conn0, idx, idy, idx_map, idy_map
1143-
)
1144-
11451188
# A ring changed size and it is not allowed.
11461189
if (
11471190
not is_ring_broken
@@ -1156,19 +1199,19 @@ def merge(
11561199
"preferable."
11571200
)
11581201

1159-
# The connectivity has changed.
1160-
if c1.connection_type(idx, idy) != conn0.connection_type(
1161-
idx_map, idy_map
1202+
# The connectivity changed for an unknown reason.
1203+
if (
1204+
conn_type_changed
1205+
and not (is_ring_broken or is_ring_size_change)
1206+
and not force
11621207
):
1163-
# The connectivity changed for an unknown reason.
1164-
if not (is_ring_broken or is_ring_size_change) and not force:
1165-
raise _IncompatibleError(
1166-
"The merge has changed the molecular connectivity "
1167-
"but a ring didn't open/close or change size. "
1168-
"If you want to proceed with this mapping pass "
1169-
"'force=True'. You are warned that the resulting "
1170-
"perturbation will likely be unstable."
1171-
)
1208+
raise _IncompatibleError(
1209+
"The merge has changed the molecular connectivity "
1210+
"but a ring didn't open/close or change size. "
1211+
"If you want to proceed with this mapping pass "
1212+
"'force=True'. You are warned that the resulting "
1213+
"perturbation will likely be unstable."
1214+
)
11721215

11731216
# Set the "connectivity" property. If the end state connectivity is the same,
11741217
# then we can just set the "connectivity" property.
@@ -1177,24 +1220,22 @@ def merge(
11771220
else:
11781221
edit_mol.set_property("connectivity0", conn0)
11791222
edit_mol.set_property("connectivity1", conn1)
1180-
1181-
# Create the CLJNBPairs matrices.
1182-
ff = molecule0.property(ff0)
1183-
1184-
# Create the new intrascale matrices.
1185-
scale_factor_14 = _SireMM.CLJScaleFactor(
1186-
ff.electrostatic14_scale_factor(), ff.vdw14_scale_factor()
1223+
# Merge the intrascale properties of the two molecules.
1224+
merged_intrascale = _SireIO.mergeIntrascale(
1225+
molecule0.property("intrascale"),
1226+
molecule1.property("intrascale"),
1227+
edit_mol.info(),
1228+
mol0_merged_mapping,
1229+
mol1_merged_mapping,
11871230
)
1188-
clj_nb_pairs0 = _SireMM.CLJNBPairs(conn0, scale_factor_14)
1189-
clj_nb_pairs1 = _SireMM.CLJNBPairs(conn1, scale_factor_14)
11901231

11911232
# Store the two molecular components.
11921233
edit_mol.set_property("molecule0", molecule0)
11931234
edit_mol.set_property("molecule1", molecule1)
11941235

11951236
# Set the "intrascale" properties.
1196-
edit_mol.set_property("intrascale0", clj_nb_pairs0)
1197-
edit_mol.set_property("intrascale1", clj_nb_pairs1)
1237+
edit_mol.set_property("intrascale0", merged_intrascale[0])
1238+
edit_mol.set_property("intrascale1", merged_intrascale[1])
11981239

11991240
# Set the "forcefield" properties.
12001241
edit_mol.set_property("forcefield0", molecule0.property(ff0))
@@ -1217,14 +1258,14 @@ def merge(
12171258
return mol
12181259

12191260

1220-
def _is_ring_broken(conn0, conn1, idx0, idy0, idx1, idy1, max_path=50):
1261+
def _check_ring(conn0, conn1, idx0, idy0, idx1, idy1, max_path=50, max_ring_size=12):
12211262
"""
1222-
Internal function to test whether a perturbation changes the connectivity
1223-
around two atoms such that a ring is broken.
1263+
Internal function to test whether a perturbation opens/closes a ring or
1264+
changes its size for a given pair of atoms.
12241265
1225-
Two atoms share a ring if and only if there are at least two distinct paths
1226-
between them in the connectivity graph. A ring is broken when this changes
1227-
between end states.
1266+
Combines the work of the former _is_ring_broken and _is_ring_size_changed
1267+
into a single function, calling find_paths only once per connectivity
1268+
object rather than twice.
12281269
12291270
Parameters
12301271
----------
@@ -1251,18 +1292,31 @@ def _is_ring_broken(conn0, conn1, idx0, idy0, idx1, idy1, max_path=50):
12511292
Maximum path length used when searching for rings. The default of 50
12521293
covers typical macrocycles. Increase if larger rings need to be
12531294
detected.
1295+
1296+
max_ring_size : int
1297+
The maximum ring size considered when checking for ring size changes.
1298+
1299+
Returns
1300+
-------
1301+
1302+
(is_ring_broken, is_ring_size_changed) : (bool, bool)
12541303
"""
12551304

1305+
# Find all paths between the two atoms in each end state. A single
1306+
# find_paths call covers both the ring-broken and ring-size-changed checks.
1307+
paths0 = conn0.find_paths(idx0, idy0, max_path)
1308+
paths1 = conn1.find_paths(idx1, idy1, max_path)
1309+
n0 = len(paths0)
1310+
n1 = len(paths1)
1311+
1312+
# --- Ring broken check ---
1313+
12561314
# Two atoms share a ring iff there are ≥2 distinct paths between them.
12571315
# This also handles atoms adjacent to a ring (not members themselves):
12581316
# substituents on neighbouring ring atoms have ≥2 paths between them
12591317
# through the ring, which collapses to 1 when the ring is broken.
1260-
n0 = len(conn0.find_paths(idx0, idy0, max_path))
1261-
n1 = len(conn1.find_paths(idx1, idy1, max_path))
1262-
1263-
# Ring break/open: one state has ≥2 paths, the other doesn't.
12641318
if (n0 >= 2) != (n1 >= 2):
1265-
return True
1319+
return True, False
12661320

12671321
# Supplementary check for rings larger than max_path: find_paths may only
12681322
# find the direct-bond path and miss the long way around the ring, giving
@@ -1271,67 +1325,33 @@ def _is_ring_broken(conn0, conn1, idx0, idy0, idx1, idy1, max_path=50):
12711325
if (conn0.in_ring(idx0) and conn0.in_ring(idy0)) != (
12721326
conn1.in_ring(idx1) and conn1.in_ring(idy1)
12731327
):
1274-
return True
1328+
return True, False
12751329

12761330
# A direct bond was replaced by a ring path (or vice versa), leaving the
12771331
# atoms completely disconnected in one topology (0 paths). This occurs
12781332
# when a new ring forms and the old direct bond between two atoms is not
12791333
# present in the ring topology.
1280-
return (n0 == 0) != (n1 == 0)
1281-
1282-
1283-
def _is_ring_size_changed(conn0, conn1, idx0, idy0, idx1, idy1, max_ring_size=12):
1284-
"""
1285-
Internal function to test whether a perturbation changes the connectivity
1286-
around two atoms such that a ring changes size.
1287-
1288-
The size of the smallest ring containing two atoms equals the length of
1289-
the shortest path between them. If the shortest path changes between end
1290-
states, the ring has changed size.
1291-
1292-
Parameters
1293-
----------
1294-
1295-
conn0 : Sire.Mol.Connectivity
1296-
The connectivity object for the first end state.
1297-
1298-
conn1 : Sire.Mol.Connectivity
1299-
The connectivity object for the second end state.
1300-
1301-
idx0 : Sire.Mol.AtomIdx
1302-
The index of the first atom in the first state.
1303-
1304-
idy0 : Sire.Mol.AtomIdx
1305-
The index of the second atom in the first state.
1306-
1307-
idx1 : Sire.Mol.AtomIdx
1308-
The index of the first atom in the second state.
1309-
1310-
idy1 : Sire.Mol.AtomIdx
1311-
The index of the second atom in the second state.
1312-
1313-
max_ring_size : int
1314-
The maximum size of what is considered to be a ring.
1315-
"""
1334+
if (n0 == 0) != (n1 == 0):
1335+
return True, False
13161336

1317-
# Work out the paths connecting the atoms in the two end states.
1318-
paths0 = conn0.find_paths(idx0, idy0, max_ring_size)
1319-
paths1 = conn1.find_paths(idx1, idy1, max_ring_size)
1337+
# --- Ring size changed check ---
13201338

1321-
# No ring in state 0 — nothing to compare.
1322-
if len(paths0) < 2:
1323-
return False
1339+
# Filter to paths within max_ring_size to avoid flagging macrocycle size
1340+
# changes for rings beyond the threshold.
1341+
short0 = [p for p in paths0 if len(p) <= max_ring_size]
13241342

1325-
ring0 = min(len(p) for p in paths0)
1343+
# No ring of relevant size in state 0 — nothing to compare.
1344+
if len(short0) < 2:
1345+
return False, False
13261346

1327-
# No ring in state 1 — the ring was broken rather than resized; let
1328-
# _is_ring_broken handle this case.
1329-
if len(paths1) < 2:
1330-
return False
1347+
short1 = [p for p in paths1 if len(p) <= max_ring_size]
13311348

1332-
ring1 = min(len(p) for p in paths1)
1349+
# No ring of relevant size in state 1 — the ring was broken rather than
1350+
# resized; let the ring-broken check handle this case.
1351+
if len(short1) < 2:
1352+
return False, False
13331353

1334-
return ring0 != ring1
1354+
return False, min(len(p) for p in short0) != min(len(p) for p in short1)
13351355

13361356

13371357
def _removeDummies(molecule, is_lambda1):

0 commit comments

Comments
 (0)