From c39308288ba2746451072ee3f65b611c2b223808 Mon Sep 17 00:00:00 2001 From: Nauman Ullah Khan <43783619+nnuk@users.noreply.github.com> Date: Thu, 28 May 2026 06:20:38 +0000 Subject: [PATCH 01/10] Update molecular inorganics bond handling (GHI # 218) --- INCHI-1-SRC/INCHI_BASE/src/strutil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/INCHI-1-SRC/INCHI_BASE/src/strutil.c b/INCHI-1-SRC/INCHI_BASE/src/strutil.c index 753c5450..2e292c1a 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/strutil.c +++ b/INCHI-1-SRC/INCHI_BASE/src/strutil.c @@ -6658,7 +6658,7 @@ int MolecularInorganicsPreprocessing(ORIG_ATOM_DATA *orig_at_data, INPUT_PARMS * ligand_elem_array[ligand_type_count++] = neigh_elem; } - if (at[i].bond_type[n] > 1 || is_el_a_metal(at[neigh_idx].el_number)) + if ((at[i].bond_type[n] > 1 && at[i].bond_type[n] != 9) || is_el_a_metal(at[neigh_idx].el_number)) { must_keep_neighbor = 1; } @@ -6699,7 +6699,7 @@ int MolecularInorganicsPreprocessing(ORIG_ATOM_DATA *orig_at_data, INPUT_PARMS * /* Check if the neighboring atom has more than 1 bond connected to the metal atom or * if the neighbour is also a metal atom. In both cases no disconnection has to be done */ - if (at[i].bond_type[n] > 1 || is_el_a_metal(at[neighbor_idx].el_number)) + if ((at[i].bond_type[n] > 1 && at[i].bond_type[n] != 9) || is_el_a_metal(at[neighbor_idx].el_number)) { ip->bMolecularInorganicsReconnectedInChI = 1; continue; /* Skip disconnection for this bond */ From 4d37db2c024b932f02b6c0be8c83f876053de606 Mon Sep 17 00:00:00 2001 From: Nauman Ullah Khan <43783619+nnuk@users.noreply.github.com> Date: Thu, 28 May 2026 12:09:00 +0000 Subject: [PATCH 02/10] Update molecular inorganics unit test with GHI # 218 --- .../test_unit/test_molecularInorganics.cpp | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp b/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp index 98b3ae9e..5c8f7259 100644 --- a/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp +++ b/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp @@ -199,3 +199,128 @@ M END FreeINCHI(poutput); } + +TEST(test_molecularInorganics, test_MI_GHI_218) +{ + const char* molblock = R"(stereochemistry.ds/mol_stereochemistry.ds_145.mol +Generated by WebMolKit + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 45 52 0 0 1 +M V30 BEGIN ATOM +M V30 1 Ti 7.3506 0.0000 0.0000 0 +M V30 2 O 6.1371 1.3858 0.0000 0 +M V30 3 O 8.5641 -1.3858 0.0000 0 +M V30 4 O 8.5641 1.3858 0.0000 0 +M V30 5 O 6.1371 -1.3858 0.0000 0 +M V30 6 O 8.1318 5.5124 0.0000 0 +M V30 7 O 6.5694 -5.5124 0.0000 0 +M V30 8 O 3.4466 2.8024 0.0000 0 +M V30 9 O 11.2546 -2.8024 0.0000 0 +M V30 10 O 3.3839 8.2243 0.0000 0 +M V30 11 O 11.3173 -8.2243 0.0000 0 +M V30 12 O 10.3725 1.5381 0.0000 0 +M V30 13 O 4.3287 -1.5381 0.0000 0 +M V30 14 O 14.9023 4.0839 0.0000 0 +M V30 15 O -0.2011 -4.0839 0.0000 0 +M V30 16 C 6.6006 2.8123 0.0000 0 +M V30 17 C 8.1006 -2.8123 0.0000 0 +M V30 18 C 8.1006 2.8123 0.0000 0 +M V30 19 C 6.6006 -2.8123 0.0000 0 +M V30 20 C 5.7985 4.1534 0.0000 0 +M V30 21 C 8.9025 -4.1534 0.0000 0 +M V30 22 C 8.9025 4.1534 0.0000 0 +M V30 23 C 5.7985 -4.1534 0.0000 0 +M V30 24 C 6.5694 5.5124 0.0000 0 +M V30 25 C 8.1318 -5.5124 0.0000 0 +M V30 26 C 10.4025 4.1360 0.0000 0 +M V30 27 C 4.2987 -4.1360 0.0000 0 +M V30 28 C 4.1715 4.1157 0.0000 0 +M V30 29 C 10.5297 -4.1157 0.0000 0 +M V30 30 C 5.7664 6.9281 0.0000 0 +M V30 31 C 8.9346 -6.9281 0.0000 0 +M V30 32 C 3.3361 5.5124 0.0000 0 +M V30 33 C 11.3651 -5.5124 0.0000 0 +M V30 34 C 4.1389 6.9281 0.0000 0 +M V30 35 C 10.5621 -6.9281 0.0000 0 +M V30 36 C 11.1374 2.8284 0.0000 0 +M V30 37 C 3.5636 -2.8284 0.0000 0 +M V30 38 C 11.1676 5.4263 0.0000 0 +M V30 39 C 3.5336 -5.4263 0.0000 0 +M V30 40 C 12.6374 2.8110 0.0000 0 +M V30 41 C 2.0638 -2.8110 0.0000 0 +M V30 42 C 12.6674 5.4089 0.0000 0 +M V30 43 C 2.0338 -5.4089 0.0000 0 +M V30 44 C 13.4023 4.1013 0.0000 0 +M V30 45 C 1.2988 -4.1013 0.0000 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 2 2 16 +M V30 2 2 3 17 +M V30 3 1 4 18 +M V30 4 1 5 19 +M V30 5 1 6 22 +M V30 6 1 6 24 +M V30 7 1 7 23 +M V30 8 1 7 25 +M V30 9 1 8 28 +M V30 10 1 9 29 +M V30 11 1 10 34 +M V30 12 1 11 35 +M V30 13 1 12 36 +M V30 14 1 13 37 +M V30 15 1 14 44 +M V30 16 1 15 45 +M V30 17 1 16 18 +M V30 18 1 16 20 +M V30 19 1 17 19 +M V30 20 1 17 21 +M V30 21 2 18 22 +M V30 22 2 19 23 +M V30 23 1 20 24 +M V30 24 2 20 28 +M V30 25 1 21 25 +M V30 26 2 21 29 +M V30 27 1 22 26 +M V30 28 1 23 27 +M V30 29 2 24 30 +M V30 30 2 25 31 +M V30 31 2 26 36 +M V30 32 1 26 38 +M V30 33 2 27 37 +M V30 34 1 27 39 +M V30 35 1 28 32 +M V30 36 1 29 33 +M V30 37 1 30 34 +M V30 38 1 31 35 +M V30 39 2 32 34 +M V30 40 2 33 35 +M V30 41 1 36 40 +M V30 42 1 37 41 +M V30 43 2 38 42 +M V30 44 2 39 43 +M V30 45 2 40 44 +M V30 46 2 41 45 +M V30 47 1 42 44 +M V30 48 1 43 45 +M V30 49 1 1 5 +M V30 50 1 1 4 CFG=3 +M V30 51 9 1 2 CFG=1 DISP=COORD +M V30 52 9 1 3 DISP=COORD +M V30 END BOND +M V30 END CTAB +M END)"; + + char options[] = "-MolecularInorganics"; + inchi_Output output; + inchi_Output* poutput = &output; + memset(poutput, 0, sizeof(*poutput)); + + const char expected_inchi[] = "InChI=1B/2C15H10O7.Ti/c2*16-6-1-2-8(9(18)3-6)15-14(21)13(20)12-10(19)4-7(17)5-11(12)22-15;/h2*1-5,16-19,21H;/q2*-1;+4/p-2"; + + EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); + EXPECT_STREQ(poutput->szInChI, expected_inchi); + + FreeINCHI(poutput); +} From d0f9d554e50a31f6fa9a801cae530ec7596b8727 Mon Sep 17 00:00:00 2001 From: Nauman Ullah Khan <43783619+nnuk@users.noreply.github.com> Date: Tue, 9 Jun 2026 13:48:08 +0000 Subject: [PATCH 03/10] Update MI preprocessing check with helper function --- INCHI-1-SRC/INCHI_BASE/src/strutil.c | 25 +++++++++++++++++++++---- INCHI-1-SRC/INCHI_BASE/src/strutil.h | 27 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/INCHI-1-SRC/INCHI_BASE/src/strutil.c b/INCHI-1-SRC/INCHI_BASE/src/strutil.c index 2e292c1a..71c6db7b 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/strutil.c +++ b/INCHI-1-SRC/INCHI_BASE/src/strutil.c @@ -6429,6 +6429,23 @@ void updateNeighborListMolecularInorganics(inp_ATOM *at, int atom_idx, int neigh } } +/************************************************************************ + * @nnuk + * @brief Determine whether a metal-ligand bond must always be preserved + * during Molecular Inorganics preprocessing. + ***********************************************************************/ +int MolecularInorganicsKeepBond(inp_ATOM *at, int metal_idx, int neigh_idx, int bond_pos) +{ + int bond_type = at[metal_idx].bond_type[bond_pos]; + + if (is_el_a_metal(at[neigh_idx].el_number) || (bond_type > 1 && bond_type != COORDINATIVE_BOND)) + { + return 1; + } + + return 0; +} + /***************************************************************************** * (@nnuk :: Nauman Ullah Khan) * @brief Function to preprocess molecular inorganics structures by disconnecting metal bonds and handling salts + ammonium salts. @@ -6472,7 +6489,7 @@ int MolecularInorganicsPreprocessing(ORIG_ATOM_DATA *orig_at_data, INPUT_PARMS * int i, j, n, k, t; int binaryValue; int disconnectDecision; - int neighbor_idx, neigh_pos; + int neigh_pos; int num_metals, current_component; /* memory allocation */ @@ -6658,7 +6675,7 @@ int MolecularInorganicsPreprocessing(ORIG_ATOM_DATA *orig_at_data, INPUT_PARMS * ligand_elem_array[ligand_type_count++] = neigh_elem; } - if ((at[i].bond_type[n] > 1 && at[i].bond_type[n] != 9) || is_el_a_metal(at[neigh_idx].el_number)) + if (MolecularInorganicsKeepBond(at, i, neigh_idx, n)) { must_keep_neighbor = 1; } @@ -6695,11 +6712,11 @@ int MolecularInorganicsPreprocessing(ORIG_ATOM_DATA *orig_at_data, INPUT_PARMS * /* Proceed with electronegativity and disconnection logic */ for (n = at[i].valence - 1; n >= 0; n--) { - neighbor_idx = at[i].neighbor[n]; + int neighbor_idx = at[i].neighbor[n]; /* Check if the neighboring atom has more than 1 bond connected to the metal atom or * if the neighbour is also a metal atom. In both cases no disconnection has to be done */ - if ((at[i].bond_type[n] > 1 && at[i].bond_type[n] != 9) || is_el_a_metal(at[neighbor_idx].el_number)) + if (MolecularInorganicsKeepBond(at, i, neighbor_idx, n)) { ip->bMolecularInorganicsReconnectedInChI = 1; continue; /* Skip disconnection for this bond */ diff --git a/INCHI-1-SRC/INCHI_BASE/src/strutil.h b/INCHI-1-SRC/INCHI_BASE/src/strutil.h index 57e2c4ab..d7d884b8 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/strutil.h +++ b/INCHI-1-SRC/INCHI_BASE/src/strutil.h @@ -111,6 +111,33 @@ extern "C" int getElValenceforMolecularInorganics(int nPeriodicNum, int charge, int val_num); /* Function retrieves element type value for molecular inorganics functionality*/ int getElTypeforMolecularInorganics(int nPeriodicNum); + + /** + * @nnuk + * + * @brief Determine whether a metal-ligand bond must always be preserved + * during Molecular Inorganics preprocessing. Some description on + * how InChI sees the bond types: + * Bond type 1 = Single Bond + * Bond type 2 = Double Bond + * Bond type 3 = Triple Bond + * Bond type 4 = Aromatic Bond + * Bond type 9 = Coordinative Bond + * + * + * @param at Input atom array. + * @param metal_idx Index of the metal atom. + * @param neigh_idx Index of the neighboring atom. + * @param bond_pos Position of the bond in the metal neighbor list. + * + * @return int + * 1 if the bond must be preserved. + * 0 if the bond may still be evaluated for disconnection. + */ + int MolecularInorganicsKeepBond(inp_ATOM* at, + int metal_idx, + int neigh_idx, + int bond_pos); /** * @brief Set the enhanced stereochemistry for t- and m-layers From c15d53f9507e00db0e7a016f3b469acd43f09614 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=A4nsch?= <168176402+fbaensch-beilstein@users.noreply.github.com> Date: Wed, 10 Jun 2026 16:01:05 +0200 Subject: [PATCH 04/10] fix: treat MOLfile VAL as valence value for coordinative-bond donors --- INCHI-1-SRC/INCHI_BASE/src/mol2atom.c | 24 ++++++++++++++++++++---- INCHI-1-SRC/INCHI_BASE/src/strutil.c | 9 +++++++++ INCHI-1-SRC/INCHI_BASE/src/util.c | 14 +++++++++++--- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/INCHI-1-SRC/INCHI_BASE/src/mol2atom.c b/INCHI-1-SRC/INCHI_BASE/src/mol2atom.c index 21af044f..487685fd 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/mol2atom.c +++ b/INCHI-1-SRC/INCHI_BASE/src/mol2atom.c @@ -1046,10 +1046,26 @@ void calculate_valences(MOL_FMT_DATA *mfdata, if (!is_el_a_metal(at[a1].el_number) && additional_H) { - /*If the atom is a non - metal and has coordination bonds, adjust valence* / - /* (@fbaensch) : Get new valence based on element number, charge, and valence defined in input file */ - newValence = get_el_valence(at[a1].el_number, at[a1].charge, mfdata->ctab.atoms[a1].valence); - newValence += additional_H; + /* (@fbaensch) Non-metal donor of one or more coordinative (type 9) + * bonds. Treat the MOLfile VAL field as the donor's covalent + * valence VALUE (consistent with the else-branch below), falling + * back to the element's standard valence when VAL is 0. Then add + * one unit per coordinative bond so the dative bonds do not consume + * hydrogen-filling slots: num_H = base_val - (covalent bonds present). + * + * NOTE for input authors: a type-9 bond models a NEUTRAL lone-pair + * donor. For a coordinative bond to an ANIONIC donor (e.g. the O of + * an M-OH), set the donor's covalent valence via the VAL field + * (and/or its formal charge); otherwise the donor is completed to + * its neutral valence (e.g. O -> H2O). A genuinely ionic M-O(H) + * bond such as NaOH is best drawn with a plain single bond - the + * standard metal disconnection already yields Na+ + OH-. */ + int base_val = mfdata->ctab.atoms[a1].valence; + if (!base_val) + { + base_val = get_el_valence(at[a1].el_number, at[a1].charge, 0); + } + newValence = base_val + additional_H; } else { diff --git a/INCHI-1-SRC/INCHI_BASE/src/strutil.c b/INCHI-1-SRC/INCHI_BASE/src/strutil.c index 71c6db7b..52105805 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/strutil.c +++ b/INCHI-1-SRC/INCHI_BASE/src/strutil.c @@ -6389,6 +6389,15 @@ int shouldBondBeCut(int atom1, int atom2) { int index1, index2, binaryValue; + /* Bounds-check the 1-based periodic numbers before indexing the + * NUM_ELEMENTS x NUM_ELEMENTS table. An out-of-range value (e.g. a + * pseudo-atom with el_number 0, or a number beyond the table) would + * otherwise read out of bounds. Default to 0 = keep the bond. */ + if (atom1 < 1 || atom1 > NUM_ELEMENTS || atom2 < 1 || atom2 > NUM_ELEMENTS) + { + return 0; + } + /* Get the indices corresponding to the atomic numbers */ index1 = atom1 - 1; index2 = atom2 - 1; diff --git a/INCHI-1-SRC/INCHI_BASE/src/util.c b/INCHI-1-SRC/INCHI_BASE/src/util.c index 011884b5..28740c40 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/util.c +++ b/INCHI-1-SRC/INCHI_BASE/src/util.c @@ -406,13 +406,21 @@ int if_skip_add_H( int nPeriodicNum ) ****************************************************************************/ int get_el_valence( int nPeriodicNum, int charge, int val_num ) { - if ( charge < MIN_ATOM_CHARGE || charge > MAX_ATOM_CHARGE || val_num >= MAX_NUM_VALENCES ) + int idx = ( nPeriodicNum > 1 ) ? nPeriodicNum + 1 : 0; + + /* Bounds-check every index before touching ElData[].cValence[][]. + * The original code guarded only the upper bound of val_num and the + * charge range; a negative nPeriodicNum/val_num or an element number + * beyond the table would read out of bounds. Return 0 (no known + * valence) for any out-of-range input. */ + if ( nPeriodicNum < 0 || idx > nElDataLen || + val_num < 0 || val_num >= MAX_NUM_VALENCES || + charge < MIN_ATOM_CHARGE || charge > MAX_ATOM_CHARGE ) { return 0; } - return - ElData[nPeriodicNum > 1 ? nPeriodicNum + 1 : 0].cValence[NEUTRAL_STATE + charge][val_num]; + return ElData[idx].cValence[NEUTRAL_STATE + charge][val_num]; } From c55c3a4181a61a5ac73ededc1d150ba6ef377109 Mon Sep 17 00:00:00 2001 From: Nauman Ullah Khan <43783619+nnuk@users.noreply.github.com> Date: Thu, 11 Jun 2026 06:46:07 +0000 Subject: [PATCH 05/10] Add unit tests --- INCHI-1-SRC/INCHI_BASE/src/strutil.h | 11 +- .../test_unit/test_molecularInorganics.cpp | 133 ++++++++++++++++++ 2 files changed, 141 insertions(+), 3 deletions(-) diff --git a/INCHI-1-SRC/INCHI_BASE/src/strutil.h b/INCHI-1-SRC/INCHI_BASE/src/strutil.h index d7d884b8..79db0065 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/strutil.h +++ b/INCHI-1-SRC/INCHI_BASE/src/strutil.h @@ -114,9 +114,14 @@ extern "C" /** * @nnuk - * + * * @brief Determine whether a metal-ligand bond must always be preserved - * during Molecular Inorganics preprocessing. Some description on + * during Molecular Inorganics preprocessing. The metal ligand + * bond must always be kept when the neighbour is another metal + * atom or the bond is Coordinative bond or the bond type is + * greater than 1. + * + * Some description on * how InChI sees the bond types: * Bond type 1 = Single Bond * Bond type 2 = Double Bond @@ -138,7 +143,7 @@ extern "C" int metal_idx, int neigh_idx, int bond_pos); - + /** * @brief Set the enhanced stereochemistry for t- and m-layers * diff --git a/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp b/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp index 5c8f7259..ce9d3208 100644 --- a/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp +++ b/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp @@ -324,3 +324,136 @@ M END)"; FreeINCHI(poutput); } + +TEST(test_molecularInorganics, test_MI_Na_H2O_CoordBond) +{ + const char* molblock = R"( + ACCLDraw06092610472D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 2 1 0 0 0 +M V30 BEGIN ATOM +M V30 1 O 3.3082 -5.8926 0 0 VAL=1 +M V30 2 Na 2.2853 -6.4832 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 9 1 2 +M V30 END BOND +M V30 END CTAB +M END +)"; + + char options[] = "-MolecularInorganics"; + inchi_Output output; + inchi_Output* poutput = &output; + memset(poutput, 0, sizeof(*poutput)); + + const char expected_inchi[] = "InChI=1B/Na.H2O/h;1H2/q+1;/p-1"; + + EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); + EXPECT_STREQ(poutput->szInChI, expected_inchi); + + FreeINCHI(poutput); +} + +TEST(test_molecularInorganics, test_MI_Na_H2O_CoordBond_Charged) +{ + const char* molblock = R"( + ACCLDraw06092610502D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 2 1 0 0 0 +M V30 BEGIN ATOM +M V30 1 Na 1.9415 -11.2332 0 0 CHG=1 +M V30 2 O 2.9644 -10.6426 0 0 CHG=-1 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 9 2 1 +M V30 END BOND +M V30 END CTAB +M END +)"; + + char options[] = "-MolecularInorganics"; + inchi_Output output; + inchi_Output* poutput = &output; + memset(poutput, 0, sizeof(*poutput)); + + const char expected_inchi[] = "InChI=1B/Na.H2O/h;1H2/q+1;/p-1"; + + EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); + EXPECT_STREQ(poutput->szInChI, expected_inchi); + + FreeINCHI(poutput); +} + +TEST(test_molecularInorganics, test_MI_Na_Methoxide_Charged) +{ + const char molblock[] = R"( + ACCLDraw06092600112D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 3 2 0 0 0 +M V30 BEGIN ATOM +M V30 1 Na 3.2848 -9.6749 0 0 CHG=1 +M V30 2 O 4.3077 -9.0843 0 0 CHG=-1 +M V30 3 C 5.3309 -9.675 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 9 2 1 +M V30 2 1 2 3 +M V30 END BOND +M V30 END CTAB +M END +)"; + + char options[] = "-MolecularInorganics"; + inchi_Output output; + inchi_Output* poutput = &output; + memset(poutput, 0, sizeof(*poutput)); + + const char expected_inchi[] = "InChI=1B/CH3O.Na/c1-2;/h1H3;/q-1;+1"; + + EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); + EXPECT_STREQ(poutput->szInChI, expected_inchi); + + FreeINCHI(poutput); +} + +TEST(test_molecularInorganics, test_MI_Na_Methoxide_Valence1) +{ + const char molblock[] = R"( + ACCLDraw06092600112D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 3 2 0 0 0 +M V30 BEGIN ATOM +M V30 1 Na 3.5661 -6.1124 0 0 +M V30 2 O 4.589 -5.5218 0 0 VAL=1 +M V30 3 C 5.6121 -6.1125 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 9 2 1 +M V30 2 1 2 3 +M V30 END BOND +M V30 END CTAB +M END +)"; + + char options[] = "-MolecularInorganics"; + inchi_Output output; + inchi_Output* poutput = &output; + memset(poutput, 0, sizeof(*poutput)); + + const char expected_inchi[] = "InChI=1B/CH3O.Na/c1-2;/h1H3;/q-1;+1"; + + EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); + EXPECT_STREQ(poutput->szInChI, expected_inchi); + + FreeINCHI(poutput); +} + From d1becd3410415bb5e51c63e1bef45614568f7d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=A4nsch?= <168176402+fbaensch-beilstein@users.noreply.github.com> Date: Fri, 12 Jun 2026 11:50:46 +0200 Subject: [PATCH 06/10] fix: convert coordinative bonds to single bonds and cancel paired charges Realize each type-9 (coordinative) bond as a single bond at the start of MolecularInorganicsPreprocessing, and cancel the paired +/- formal charges of the two bonded atoms (the donated lone pair becomes the shared bonding pair). This unifies the two equivalent coordinative-bond drawings - donor VAL set, or explicit +/- charges - so both yield the same InChI (e.g. NaOH drawn either way -> Na+ + OH-). Fixes the previously failing charged-input cases in test_molecularInorganics (now 11/11). --- INCHI-1-SRC/INCHI_BASE/src/strutil.c | 64 ++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/INCHI-1-SRC/INCHI_BASE/src/strutil.c b/INCHI-1-SRC/INCHI_BASE/src/strutil.c index 52105805..d339ca30 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/strutil.c +++ b/INCHI-1-SRC/INCHI_BASE/src/strutil.c @@ -6455,6 +6455,65 @@ int MolecularInorganicsKeepBond(inp_ATOM *at, int metal_idx, int neigh_idx, int return 0; } +/***************************************************************************** + * Convert coordinative (type 9) bonds into normal single bonds. + * + * A coordinative bond is the zero-order, charge-separated equivalent of a + * single bond: the two bonded atoms carry equal and opposite formal charges + * (e.g. M(+) ... L(-)). Realizing it as a single bond turns the donated lone + * pair into the shared bonding pair, so the +/- charges cancel. This routine + * performs that change in place: every COORDINATIVE_BOND becomes + * BOND_TYPE_SINGLE and, when the two atoms carry opposite-sign charges, each is + * moved one step toward neutral (one charge pair neutralized per bond). + *****************************************************************************/ +static void ConvertCoordinativeBondsToSingle(inp_ATOM *at, int num_atoms) +{ + int i, k, k2, j; + + for (i = 0; i < num_atoms; i++) + { + for (k = 0; k < at[i].valence; k++) + { + if (at[i].bond_type[k] != COORDINATIVE_BOND) + { + continue; + } + + j = at[i].neighbor[k]; + if (j < 0 || j >= num_atoms) + { + continue; + } + + /* Realize the bond as single on both endpoints. Converting both + * sides here means the reverse half-bond is no longer type 9, so + * the same bond is not processed (or its charge cancelled) twice. */ + at[i].bond_type[k] = BOND_TYPE_SINGLE; + for (k2 = 0; k2 < at[j].valence; k2++) + { + if (at[j].neighbor[k2] == i && at[j].bond_type[k2] == COORDINATIVE_BOND) + { + at[j].bond_type[k2] = BOND_TYPE_SINGLE; + break; + } + } + + /* Cancel the paired +/- charges: the lone pair becomes the bonding + * pair, so each atom moves one unit toward neutral. */ + if (at[i].charge > 0 && at[j].charge < 0) + { + at[i].charge--; + at[j].charge++; + } + else if (at[i].charge < 0 && at[j].charge > 0) + { + at[i].charge++; + at[j].charge--; + } + } + } +} + /***************************************************************************** * (@nnuk :: Nauman Ullah Khan) * @brief Function to preprocess molecular inorganics structures by disconnecting metal bonds and handling salts + ammonium salts. @@ -6526,6 +6585,11 @@ int MolecularInorganicsPreprocessing(ORIG_ATOM_DATA *orig_at_data, INPUT_PARMS * } } + /* Realize coordinative (type 9) bonds as single bonds and cancel the + * paired +/- formal charges, so the rest of the pipeline treats them + * exactly like the equivalent single-bonded structure. */ + ConvertCoordinativeBondsToSingle(at, num_at); + /* Function call to Mark ring systems */ MarkRingSystemsInp(at, num_at, 0); From 8e98baf9e1f211093b57f811412773717d131b17 Mon Sep 17 00:00:00 2001 From: Nauman Ullah Khan <43783619+nnuk@users.noreply.github.com> Date: Fri, 12 Jun 2026 16:00:08 +0000 Subject: [PATCH 07/10] Add unit test for MI bond handling --- .../test_unit/test_molecularInorganics.cpp | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp b/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp index ce9d3208..b2401491 100644 --- a/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp +++ b/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp @@ -457,3 +457,29 @@ M END FreeINCHI(poutput); } + +TEST(test_molecularInorganics, test_MI_Na_H2O_V2000_Charged) +{ + const char molblock[] = R"( + ACCLDraw06092611092D + + 2 1 0 0 0 0 0 0 0 0999 V2000 + 11.6670 -10.9290 0.0000 Na 0 3 0 0 0 0 0 0 0 0 0 0 + 12.6899 -10.3384 0.0000 O 0 5 0 0 0 2 0 0 0 0 0 0 + 2 1 1 0 0 0 0 +M CHG 2 1 1 2 -1 +M END +)"; + + char options[] = "-MolecularInorganics"; + inchi_Output output; + inchi_Output* poutput = &output; + memset(poutput, 0, sizeof(*poutput)); + + const char expected_inchi[] = "InChI=1B/Na.H2O/h;1H2/q+1;/p-1"; + + EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); + EXPECT_STREQ(poutput->szInChI, expected_inchi); + + FreeINCHI(poutput); +} From fff017262aea4fe196ce4a61a315398a02a65ec1 Mon Sep 17 00:00:00 2001 From: Nauman Ullah Khan <43783619+nnuk@users.noreply.github.com> Date: Fri, 12 Jun 2026 16:13:22 +0000 Subject: [PATCH 08/10] Fix for charged atom with just single bond --- INCHI-1-SRC/INCHI_BASE/src/strutil.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/INCHI-1-SRC/INCHI_BASE/src/strutil.c b/INCHI-1-SRC/INCHI_BASE/src/strutil.c index d339ca30..ed13c4a8 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/strutil.c +++ b/INCHI-1-SRC/INCHI_BASE/src/strutil.c @@ -6474,13 +6474,17 @@ static void ConvertCoordinativeBondsToSingle(inp_ATOM *at, int num_atoms) { for (k = 0; k < at[i].valence; k++) { - if (at[i].bond_type[k] != COORDINATIVE_BOND) + j = at[i].neighbor[k]; + if (j < 0 || j >= num_atoms) { continue; } - j = at[i].neighbor[k]; - if (j < 0 || j >= num_atoms) + if (at[i].bond_type[k] != COORDINATIVE_BOND && + !(at[i].bond_type[k] == BOND_TYPE_SINGLE && + is_el_a_metal(at[i].el_number) != is_el_a_metal(at[j].el_number) && + ((at[i].charge > 0 && at[j].charge < 0) || + (at[i].charge < 0 && at[j].charge > 0)))) { continue; } @@ -6488,10 +6492,12 @@ static void ConvertCoordinativeBondsToSingle(inp_ATOM *at, int num_atoms) /* Realize the bond as single on both endpoints. Converting both * sides here means the reverse half-bond is no longer type 9, so * the same bond is not processed (or its charge cancelled) twice. */ + at[i].bond_type[k] = BOND_TYPE_SINGLE; + for (k2 = 0; k2 < at[j].valence; k2++) { - if (at[j].neighbor[k2] == i && at[j].bond_type[k2] == COORDINATIVE_BOND) + if (at[j].neighbor[k2] == i) { at[j].bond_type[k2] = BOND_TYPE_SINGLE; break; From 26fdf300cadfc716a4c2b200f64217c778cd8060 Mon Sep 17 00:00:00 2001 From: Nauman Ullah Khan <43783619+nnuk@users.noreply.github.com> Date: Tue, 16 Jun 2026 11:42:51 +0000 Subject: [PATCH 09/10] Add a helper function for the MolecularInorganics unit tests --- INCHI-1-SRC/INCHI_BASE/src/strutil.c | 6 +- .../test_unit/test_molecularInorganics.cpp | 166 ++++-------------- 2 files changed, 39 insertions(+), 133 deletions(-) diff --git a/INCHI-1-SRC/INCHI_BASE/src/strutil.c b/INCHI-1-SRC/INCHI_BASE/src/strutil.c index ed13c4a8..e8bd80aa 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/strutil.c +++ b/INCHI-1-SRC/INCHI_BASE/src/strutil.c @@ -6480,6 +6480,9 @@ static void ConvertCoordinativeBondsToSingle(inp_ATOM *at, int num_atoms) continue; } + /* Accept coordinative bonds and equivalent charge-separated + * metal–ligand single bonds with opposite formal charges. + */ if (at[i].bond_type[k] != COORDINATIVE_BOND && !(at[i].bond_type[k] == BOND_TYPE_SINGLE && is_el_a_metal(at[i].el_number) != is_el_a_metal(at[j].el_number) && @@ -6491,7 +6494,8 @@ static void ConvertCoordinativeBondsToSingle(inp_ATOM *at, int num_atoms) /* Realize the bond as single on both endpoints. Converting both * sides here means the reverse half-bond is no longer type 9, so - * the same bond is not processed (or its charge cancelled) twice. */ + * the same bond is not processed (or its charge cancelled) twice. + */ at[i].bond_type[k] = BOND_TYPE_SINGLE; diff --git a/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp b/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp index b2401491..3b70f18f 100644 --- a/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp +++ b/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp @@ -6,6 +6,28 @@ extern "C" #include "../../../INCHI-1-SRC/INCHI_BASE/src/mode.h" } +/* +* (NaumanUllahKhan :: @nnuk) +* helper function to run the molfiles below as unit tests. +*/ +static void ExpectedMolecularInorganicsInChI(const char* molblock, + const char* expected_inchi, + int expected_retcode) +{ + char options[] = "-MolecularInorganics"; + + inchi_Output output; + inchi_Output* poutput = &output; + memset(poutput, 0, sizeof(*poutput)); + + int ret = MakeINCHIFromMolfileText(molblock, options, poutput); + + EXPECT_EQ(ret, expected_retcode); + ASSERT_NE(poutput->szInChI, nullptr); + EXPECT_STREQ(poutput->szInChI, expected_inchi); + + FreeINCHI(poutput); +} /* (NaumanUllahKhan :: @nnuk) * below are some tests with MI (MolecularInorganics) tags specific molfiles that go through the MI code pipeline. @@ -28,17 +50,7 @@ TEST(test_molecularInorganics, test_MI_1_VOF3) M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/F3OV/c1-5(2,3)4"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 0); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/F3OV/c1-5(2,3)4", 0); } TEST(test_molecularInorganics, test_MI_2_FeF4) @@ -59,17 +71,7 @@ TEST(test_molecularInorganics, test_MI_2_FeF4) M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/F4Fe/c1-5(2,3)4"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 0); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/F4Fe/c1-5(2,3)4", 0); } TEST(test_molecularInorganics, test_MI_3_FeF3) @@ -88,17 +90,7 @@ TEST(test_molecularInorganics, test_MI_3_FeF3) M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/3FH.Fe/h3*1H;/q;;;+3/p-3"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/3FH.Fe/h3*1H;/q;;;+3/p-3", 1); } TEST(test_molecularInorganics, test_MI_4_FeCl3) @@ -117,17 +109,7 @@ TEST(test_molecularInorganics, test_MI_4_FeCl3) M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/Cl3Fe/c1-4(2)3"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 0); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/Cl3Fe/c1-4(2)3", 0); } TEST(test_molecularInorganics, test_MI_5_hydrido_dimethyl_iron) @@ -146,17 +128,7 @@ TEST(test_molecularInorganics, test_MI_5_hydrido_dimethyl_iron) M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/C2H7Fe/c1-3(2)4/h1-2H3"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 0); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/C2H7Fe/c1-3(2)4/h1-2H3", 0); } TEST(test_molecularInorganics, test_MI_6_Pt_haptic) @@ -187,17 +159,7 @@ M V30 END CTAB M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/C2H4Cl3Pt/c3-6(4,5)1-2-6/h1-2H2/q-1"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/C2H4Cl3Pt/c3-6(4,5)1-2-6/h1-2H2/q-1", 1); } TEST(test_molecularInorganics, test_MI_GHI_218) @@ -312,17 +274,7 @@ M V30 END BOND M V30 END CTAB M END)"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/2C15H10O7.Ti/c2*16-6-1-2-8(9(18)3-6)15-14(21)13(20)12-10(19)4-7(17)5-11(12)22-15;/h2*1-5,16-19,21H;/q2*-1;+4/p-2"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/2C15H10O7.Ti/c2*16-6-1-2-8(9(18)3-6)15-14(21)13(20)12-10(19)4-7(17)5-11(12)22-15;/h2*1-5,16-19,21H;/q2*-1;+4/p-2", 1); } TEST(test_molecularInorganics, test_MI_Na_H2O_CoordBond) @@ -344,17 +296,7 @@ M V30 END CTAB M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/Na.H2O/h;1H2/q+1;/p-1"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/Na.H2O/h;1H2/q+1;/p-1", 1); } TEST(test_molecularInorganics, test_MI_Na_H2O_CoordBond_Charged) @@ -376,17 +318,7 @@ M V30 END CTAB M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/Na.H2O/h;1H2/q+1;/p-1"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/Na.H2O/h;1H2/q+1;/p-1", 1); } TEST(test_molecularInorganics, test_MI_Na_Methoxide_Charged) @@ -410,17 +342,7 @@ M V30 END CTAB M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/CH3O.Na/c1-2;/h1H3;/q-1;+1"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/CH3O.Na/c1-2;/h1H3;/q-1;+1", 1); } TEST(test_molecularInorganics, test_MI_Na_Methoxide_Valence1) @@ -444,17 +366,7 @@ M V30 END CTAB M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/CH3O.Na/c1-2;/h1H3;/q-1;+1"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/CH3O.Na/c1-2;/h1H3;/q-1;+1", 1); } @@ -471,15 +383,5 @@ M CHG 2 1 1 2 -1 M END )"; - char options[] = "-MolecularInorganics"; - inchi_Output output; - inchi_Output* poutput = &output; - memset(poutput, 0, sizeof(*poutput)); - - const char expected_inchi[] = "InChI=1B/Na.H2O/h;1H2/q+1;/p-1"; - - EXPECT_EQ(MakeINCHIFromMolfileText(molblock, options, poutput), 1); - EXPECT_STREQ(poutput->szInChI, expected_inchi); - - FreeINCHI(poutput); + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/Na.H2O/h;1H2/q+1;/p-1", 1); } From 7837660cf555523b835d1f5d4dd4175582b54b00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=A4nsch?= <168176402+fbaensch-beilstein@users.noreply.github.com> Date: Thu, 18 Jun 2026 12:28:54 +0200 Subject: [PATCH 10/10] fix: disconnect charge-separated metal-ligand bonds preserving charges In MolecularInorganics mode, a metal and ligand carrying opposite formal charges across a single (type 1) or coordinative (type 9) bond depict an ionic interaction (e.g. M(2+) ... L(2-)) and must split into the drawn ions. Two heuristics in the disconnection loop prevented this: the per-element valence gate (which rejects e.g. Na+ bearing a bond) and the "keep all bonds when several metals share a component" rule (which kept Na-O-Na connected). Add a dedicated pass in MolecularInorganicsPreprocessing that disconnects such bonds up front via DisconnectInpAtBond, which also adapts valence and chem_bonds_valence. No charge is added or removed: each ion keeps exactly the charge drawn in the input. Higher-order bonds (e.g. a drawn M=O double bond) are genuine covalent bonds and are left intact. ConvertCoordinativeBondsToSingle is restricted back to type-9 bonds only; the charge cancellation that previously fired for plain charge-separated single bonds is removed, since those charges are intrinsic to the single-bond depiction. Standard InChI is unaffected (the code runs only under -MolecularInorganics). Adds unit tests for the singly/multiply-charged single and coordinative metal-ligand cases. --- INCHI-1-SRC/INCHI_BASE/src/strutil.c | 80 ++++++++++--- .../test_unit/test_molecularInorganics.cpp | 111 ++++++++++++++++++ 2 files changed, 175 insertions(+), 16 deletions(-) diff --git a/INCHI-1-SRC/INCHI_BASE/src/strutil.c b/INCHI-1-SRC/INCHI_BASE/src/strutil.c index e8bd80aa..305aa04b 100644 --- a/INCHI-1-SRC/INCHI_BASE/src/strutil.c +++ b/INCHI-1-SRC/INCHI_BASE/src/strutil.c @@ -6465,6 +6465,16 @@ int MolecularInorganicsKeepBond(inp_ATOM *at, int metal_idx, int neigh_idx, int * performs that change in place: every COORDINATIVE_BOND becomes * BOND_TYPE_SINGLE and, when the two atoms carry opposite-sign charges, each is * moved one step toward neutral (one charge pair neutralized per bond). + * + * Only type-9 bonds are touched. A plain single bond is left alone even when + * its endpoints carry opposite formal charges: there the charges are an + * intrinsic part of the single-bond Lewis structure, not the zero-order + * artifact of a dative bond, so cancelling them would corrupt a self-consistent + * depiction (and leave the atoms at unusual valences). + * + * Each undirected bond is processed exactly once, from its lower-indexed + * endpoint (the j > i guard), so a multiply-charged pair (e.g. M(2+)-L(2-)) + * cannot have its charges cancelled once from each stored half-bond. *****************************************************************************/ static void ConvertCoordinativeBondsToSingle(inp_ATOM *at, int num_atoms) { @@ -6480,28 +6490,27 @@ static void ConvertCoordinativeBondsToSingle(inp_ATOM *at, int num_atoms) continue; } - /* Accept coordinative bonds and equivalent charge-separated - * metal–ligand single bonds with opposite formal charges. - */ - if (at[i].bond_type[k] != COORDINATIVE_BOND && - !(at[i].bond_type[k] == BOND_TYPE_SINGLE && - is_el_a_metal(at[i].el_number) != is_el_a_metal(at[j].el_number) && - ((at[i].charge > 0 && at[j].charge < 0) || - (at[i].charge < 0 && at[j].charge > 0)))) + /* Visit each undirected bond once, from the lower-indexed atom, so + * a multiply-charged pair cannot be neutralized once from each end. */ + if (j <= i) { continue; } - /* Realize the bond as single on both endpoints. Converting both - * sides here means the reverse half-bond is no longer type 9, so - * the same bond is not processed (or its charge cancelled) twice. - */ + /* Only coordinative (type 9) bonds are converted; plain single + * bonds keep their intrinsic formal charges untouched. */ + if (at[i].bond_type[k] != COORDINATIVE_BOND) + { + continue; + } + /* Realize the bond as single on both endpoints so the rest of the + * pipeline sees a plain single bond from either atom. */ at[i].bond_type[k] = BOND_TYPE_SINGLE; for (k2 = 0; k2 < at[j].valence; k2++) { - if (at[j].neighbor[k2] == i) + if (at[j].neighbor[k2] == i) { at[j].bond_type[k2] = BOND_TYPE_SINGLE; break; @@ -6595,9 +6604,48 @@ int MolecularInorganicsPreprocessing(ORIG_ATOM_DATA *orig_at_data, INPUT_PARMS * } } - /* Realize coordinative (type 9) bonds as single bonds and cancel the - * paired +/- formal charges, so the rest of the pipeline treats them - * exactly like the equivalent single-bonded structure. */ + /* Disconnect charge-separated metal-ligand bonds, preserving the drawn + * formal charges. A metal and ligand carrying opposite formal charges + * across a single (type 1) or coordinative (type 9) bond depict an ionic + * interaction (e.g. M(2+) ... L(2-)) and must split into the drawn ions + * regardless of the metal's nominal valence or the presence of other + * metals in the same component - the heuristics in the disconnection loop + * below would otherwise keep these bonds connected. DisconnectInpAtBond + * also decrements valence and chem_bonds_valence on both atoms (a type-9 + * bond counts as single), so no charge is added or removed here: each ion + * keeps exactly the charge drawn in the input. Higher-order bonds (e.g. a + * drawn M=O double bond) are genuine covalent bonds and are left intact. */ + for (i = 0; i < num_at; i++) + { + for (k = 0; k < at[i].valence; ) + { + j = at[i].neighbor[k]; + + /* Process each undirected bond once, from its lower-indexed atom, + * and only charge-separated metal/non-metal single or coordinative + * bonds. */ + if (j <= i || j >= num_at || + (at[i].bond_type[k] != COORDINATIVE_BOND && + at[i].bond_type[k] != BOND_TYPE_SINGLE) || + is_el_a_metal(at[i].el_number) == is_el_a_metal(at[j].el_number) || + !((at[i].charge > 0 && at[j].charge < 0) || + (at[i].charge < 0 && at[j].charge > 0))) + { + k++; + continue; + } + + DisconnectInpAtBond(at, nOldCompNumber, i, k); + num_disconnected++; + ip->bMolecularInorganicsReconnectedInChI = 1; + /* neighbor k was removed; the next neighbor shifted into its slot, + * so do not advance k here. */ + } + } + + /* Realize the remaining (uncharged) coordinative (type 9) bonds as single + * bonds, so the rest of the pipeline treats them like the equivalent + * single-bonded structure. */ ConvertCoordinativeBondsToSingle(at, num_at); /* Function call to Mark ring systems */ diff --git a/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp b/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp index 3b70f18f..bdebfe9c 100644 --- a/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp +++ b/INCHI-1-TEST/tests/test_unit/test_molecularInorganics.cpp @@ -385,3 +385,114 @@ M END ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/Na.H2O/h;1H2/q+1;/p-1", 1); } + +/* (@fbaensch) Charge accounting for multiply-charged coordinative (type 9) + * metal-ligand bonds: the charge cancellation happens only at the bond- + * replacement site (type 9 -> type 1); disconnection must not alter charges. + * Each coordinative bond therefore sheds one charge unit per endpoint, and + * the resulting ions disconnect with the remaining formal charges intact. */ + +TEST(test_molecularInorganics, test_MI_NaOH_charged_disconnected) +{ + const char* molblock = R"( + ACCLDraw06152616342D + + 2 0 0 0 0 0 0 0 0 0999 V2000 + 16.3750 -5.1250 0.0000 Na 0 3 0 0 0 0 0 0 0 0 0 0 + 17.5313 -3.7500 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0 +M CHG 2 1 1 2 -1 +M END +)"; + + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/Na.H2O/h;1H2/q+1;/p-1", 1); +} + +TEST(test_molecularInorganics, test_MI_NaOH_charged_coordinative) +{ + const char* molblock = R"( + -INDIGO-06172612032D + + 0 0 0 0 0 0 0 0 0 0 0 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 2 1 0 0 0 +M V30 BEGIN ATOM +M V30 1 Na 4.675 -6.475 0.0 0 CHG=2 +M V30 2 O 6.25711 -4.84289 0.0 0 CHG=-2 VAL=2 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 9 2 1 +M V30 END BOND +M V30 END CTAB +M END +)"; + + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/Na.H2O/h;1H2/q+2;-2", 1); +} + +TEST(test_molecularInorganics, test_MI_NaO_charged_coordinative) +{ + const char* molblock = R"( + -INDIGO-06172612042D + + 0 0 0 0 0 0 0 0 0 0 0 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 2 1 0 0 0 +M V30 BEGIN ATOM +M V30 1 Na 4.675 -6.475 0.0 0 CHG=2 +M V30 2 O 6.25711 -4.84289 0.0 0 CHG=-2 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 9 2 1 +M V30 END BOND +M V30 END CTAB +M END +)"; + + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/Na.O/q+2;-2", 1); +} + +TEST(test_molecularInorganics, test_MI_NaONa_charged_coordinative) +{ + const char* molblock = R"( + -INDIGO-06172612062D + + 0 0 0 0 0 0 0 0 0 0 0 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 3 2 0 0 0 +M V30 BEGIN ATOM +M V30 1 Na 4.675 -6.475 0.0 0 CHG=1 +M V30 2 O 6.25711 -4.84289 0.0 0 CHG=-2 +M V30 3 Na 7.48921 -6.225 0.0 0 CHG=1 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 9 2 1 +M V30 2 9 2 3 +M V30 END BOND +M V30 END CTAB +M END +)"; + + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/2Na.O/q2*+1;-2", 1); +} + +TEST(test_molecularInorganics, test_MI_MgO_charged_coordinative) +{ + const char* molblock = R"( + -INDIGO-06172612352D + + 0 0 0 0 0 0 0 0 0 0 0 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 2 1 0 0 0 +M V30 BEGIN ATOM +M V30 1 Mg 6.48135 -5.51875 0.0 0 CHG=2 +M V30 2 O 8.63135 -5.49375 0.0 0 CHG=-2 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 9 2 1 +M V30 END BOND +M V30 END CTAB +M END +)"; + + ExpectedMolecularInorganicsInChI(molblock, "InChI=1B/Mg.O/q+2;-2", 1); +}