Merge pull request #22 from libefp2/jack

Jacklaw441 · web-flow · commit 9beb96826122 · 2024-12-03T10:22:05.000-05:00
modified:   doc/applied_efp.rst
diff --git a/doc/applied_efp.rst b/doc/applied_efp.rst
@@ -244,7 +244,7 @@ octupole, and polarization point sections. Sample execution:
 
 ``python make_MM.py shell_bchl361-79002.g96 bchl361-79002.g96 topol.top``
 
-The topology file is necessary for atomic charges, and both structure files are read to so that only MM atoms will be 
+The topology file is necessary for atomic charges, and both structure files are read so that only MM atoms will be 
 included (both QM and EFP atoms are omitted).
 
 Stripping QM atoms from EFP files
diff --git a/examples/flex-EFP/Scripts/fragment_RMSD.py b/examples/flex-EFP/Scripts/fragment_RMSD.py
@@ -9,65 +9,87 @@
 import os
 
 directory = '/depot/lslipche/data/yb_boss/flexible_efp/efpdb/'
-cutoff=0.15
+ang_cutoff=0.20                     # Minimum RMSD allowed for "good" match
+cutoff=ang_cutoff*1.8897259886      # Cutoff convert to Bohr
 
-inp=sys.argv[1]
+inp=sys.argv[1]                     # a_22_304.inp for example. Input file with fragment coords
 with open(inp, "r") as orig:
     orgs = orig.readlines()
-
-efp=sys.argv[2]
-with open(efp, "r") as term:
-    terms = term.readlines()
     
-outname=inp.replace('.inp','.efp')
+outname=inp.replace('.inp','.efp')  # Output file name, same as input but changed extension.
+                                    # If no match is found, then outname will not be used.
 
 def rotate_vector(vector, rotation_matrix):
     #Different from coords because dipoles assumed centered on coordinate. NOT placed in Cartesian space
-    return np.dot(rotation_matrix, vector)
+    return np.dot(vector,rotation_matrix)
 
 def rotate_quadrupole(quadrupole, rotation_matrix):
-    #RxQxR_t
+    #R x Q x R_t
+    #Quadrupoles are 3x3s. But .efp give only six values, order is assumed xx, xy, xz, yy, yz, zz
+    #With symmetric off-diagonal elements.
     q = np.array([
-        [quadrupole[0], quadrupole[1], quadrupole[2]],
-        [quadrupole[1], quadrupole[3], quadrupole[4]],
-        [quadrupole[2], quadrupole[4], quadrupole[5]],
+        [quadrupole[0], quadrupole[3], quadrupole[4]],
+        [quadrupole[3], quadrupole[1], quadrupole[5]],
+        [quadrupole[4], quadrupole[5], quadrupole[2]],
     ])
-    q_rotated = rotation_matrix @ q @ rotation_matrix.T
-    return [q_rotated[0, 0], q_rotated[0, 1], q_rotated[0, 2],
-            q_rotated[1, 1], q_rotated[1, 2], q_rotated[2, 2]]
+    q_rotated = rotation_matrix.T @ q @ rotation_matrix
+    return [q_rotated[0, 0], q_rotated[1, 1], q_rotated[2, 2],
+            q_rotated[0, 1], q_rotated[0, 2], q_rotated[1, 2]]
 
 def rotate_octupole(octupole, rotation_matrix):
     #Oxyz=Rxi*Ryj*Rzk*Oijk summed over i=x,y,z;j=x,y,z;z=x,y,z;
     #Each term Oxyz of new tensor is composed of 
     #27 pieces generated from old tensor and rotation matrix!
+    #The indexes are hard-coded for some better readiability
     o = np.array([
-        [[octupole[0], octupole[1], octupole[2]], [octupole[1], octupole[3], octupole[4]], [octupole[2], octupole[4], octupole[5]]],
-        [[octupole[1], octupole[3], octupole[4]], [octupole[3], octupole[6], octupole[7]], [octupole[4], octupole[7], octupole[8]]],
-        [[octupole[2], octupole[4], octupole[5]], [octupole[4], octupole[7], octupole[8]], [octupole[5], octupole[8], octupole[9]]],
+        [[octupole[0], octupole[3], octupole[4]], [octupole[3], octupole[5], octupole[9]], [octupole[4], octupole[9], octupole[7]]],
+        [[octupole[3], octupole[5], octupole[9]], [octupole[5], octupole[1], octupole[6]], [octupole[9], octupole[6], octupole[8]]],
+        [[octupole[4], octupole[9], octupole[7]], [octupole[9], octupole[6], octupole[8]], [octupole[7], octupole[8], octupole[2]]],
     ])
-    o_rotated = np.einsum('ij,jkl,lm->ikm', rotation_matrix, o, rotation_matrix.T)
+    #fancy function below does what we want. Look up np.einsum if you need clarification.
+    #o_rotated = np.einsum('ij,jkl,lm->ikm', rotation_matrix, o, rotation_matrix.T)
+    o_rotated = np.zeros((3, 3, 3))
+    for p in range(0,3):
+        for q in range(0,3):
+            for r in range(0,3):
+                o_rotated[p][q][r]=single_term_oct(p,q,r,o,rotation_matrix.T)
     return [
-        o_rotated[0, 0, 0], o_rotated[0, 0, 1], o_rotated[0, 0, 2],
-        o_rotated[0, 1, 1], o_rotated[0, 1, 2], o_rotated[0, 2, 2],
-        o_rotated[1, 1, 1], o_rotated[1, 1, 2], o_rotated[1, 2, 2],
-        o_rotated[2, 2, 2]
+        o_rotated[0, 0, 0], o_rotated[1, 1, 1], o_rotated[2, 2, 2],
+        o_rotated[0, 0, 1], o_rotated[0, 0, 2], o_rotated[0, 1, 1],
+        o_rotated[1, 1, 2], o_rotated[0, 2, 2], o_rotated[1, 2, 2],
+        o_rotated[0, 1, 2]
     ]
 
+def single_term_oct(p,q,r,octupole,rot_mat):
+    term=0.0
+    for i in range(0,3):
+        for j in range(0,3):
+            for k in range(0,3):
+                term+=rot_mat[p][i]*rot_mat[q][j]*rot_mat[r][k]*octupole[i][j][k]
+    return term
+
 def rotate_polarizability(polarizability, rotation_matrix):
         #RxQxR_t
     q = np.array([
-        [polarizability[0], polarizability[1], polarizability[2]],
-        [polarizability[3], polarizability[4], polarizability[5]],
-        [polarizability[6], polarizability[7], polarizability[8]],])
-    q_rotated = rotation_matrix @ q @ rotation_matrix.T
-    return q_rotated.flatten()
+        [polarizability[0], polarizability[3], polarizability[4]],
+        [polarizability[6], polarizability[1], polarizability[5]],
+        [polarizability[7], polarizability[8], polarizability[2]]])
+    q_rotated = rotation_matrix.T @ q @ rotation_matrix
+    return [q_rotated[0, 0],q_rotated[1, 1],q_rotated[2, 2],
+            q_rotated[0, 1],q_rotated[0, 2],q_rotated[1, 2],
+            q_rotated[1, 0],q_rotated[2, 0],q_rotated[2, 1]]
+    #return q_rotated.flatten()
 
-def get_RMSD(coords1,coords2):
+def get_RMSD(coords1,coords2,mass):
     tot=0.0
-    for i in range(3):
-        for j in range(3):
-            tot+=(coords1[i][j]-coords2[i][j])**2
-    rmsd=np.sqrt((len(coords1)-1)*tot)
+    length=len(coords1)
+    dim=len(coords1[0])
+    totmass=0.0
+    for i in range(length):
+        for j in range(dim):
+            tot+=mass[i]*(coords1[i][j]-coords2[i][j])**2
+        totmass+=mass[i]
+    rmsd=np.sqrt(tot)/np.sqrt(totmass)
     return rmsd
 
 def kabsch_algorithm(coords, coords2):
@@ -84,8 +106,8 @@ def kabsch_algorithm(coords, coords2):
     u, _, vh = np.linalg.svd(covariance_matrix)
     rotation_matrix = np.dot(u, vh)
 
-    if np.linalg.det(rotation_matrix) < 0:
-        rotation_matrix[:, -1] *= -1
+    #if np.linalg.det(rotation_matrix) < 0:
+    #    rotation_matrix[:, -1] *= -1
     return rotation_matrix, com1, com2
 
 def apply_transform(coords, rotation_matrix, com1, com2):
@@ -100,39 +122,46 @@ def apply_transform(coords, rotation_matrix, com1, com2):
                    'l':'leu','k':'lys','m':'met','f':'phe','p':'pro',
                    's':'ser','t':'thr','w':'trp','y':'tyr','v':'val',
                    'hp':'hip','hd':'hid','he':'hie'}
+atom_weights = {'H':1.0,'O':15.9949100,'C':12.0000000,'N':12.0030700,'S':32.0650000,'M':23.3040000}
 
-
-res=amino_acid_dict[inp[0]]
+try:
+    res=amino_acid_dict[inp[0]]
+except:
+    print('No library folder for: '+inp)
 #directory = '/depot/lslipche/data/yb_boss/flexible_efp/efpdb/'+res
 directory+=res
 file_extension = '.efp'  # change this to your desired file extension
 
+start=0
+xyz=[]
+orig_coords=[]
+weights=[]
+for line in orgs:
+    if '$end' in line:
+        start=0
+    elif(start==1):
+        xyz.append(float(line.split()[2])/0.529177249)
+        xyz.append(float(line.split()[3])/0.529177249)
+        xyz.append(float(line.split()[4])/0.529177249)
+        orig_coords.append(xyz)
+        xyz=[]
+        weights.append(atom_weights[line.split()[0][0]])
+    elif 'C1' in line:
+        start=1
+
 minrmsd=20.0
 
 # Loop through files in the directory
 for filename in os.listdir(directory):
     if filename.endswith(file_extension):
         full_path = os.path.join(directory, filename)
-        #full_path='ala0001.efp'
+        #full_path=directory+'/gly5817.efp'
         #print(f"Processing file: {full_path}")
         with open(full_path, "r") as term:
             terms = term.readlines()
 
     start=0
     xyz=[]
-    orig_coords=[]
-    for line in orgs:
-        if '$end' in line:
-            start=0
-        elif(start==1):
-            xyz.append(float(line.split()[2])/0.529177249)
-            xyz.append(float(line.split()[3])/0.529177249)
-            xyz.append(float(line.split()[4])/0.529177249)
-            orig_coords.append(xyz)
-            xyz=[]
-        elif 'C1' in line:
-            start=1
-    start=0
     term_coords=[]
     for line in terms:
         if 'BO21' in line:
@@ -150,18 +179,27 @@ def apply_transform(coords, rotation_matrix, com1, com2):
     new_coords=np.array(term_coords)
     coords2=np.array(orig_coords)
     rotation_matrix, com1, com2 = kabsch_algorithm(new_coords, coords2)
-    aligned_new_coords = apply_transform(new_coords, rotation_matrix, com1,com2,coords2)
-    rmsd=(get_RMSD(aligned_new_coords,coords2))
+    aligned_new_coords = apply_transform(new_coords, rotation_matrix, com1,com2)
+    rmsd=(get_RMSD(aligned_new_coords,coords2,weights))
+    #print(rmsd)
     if(rmsd<minrmsd):
         minrmsd=rmsd
-        match=full_path
+        if(rmsd<cutoff):    
+            match=full_path
+            #break
+    #print(minrmsd)
 #%%
 
 #If good match found, transform that .efp to coords in .inp
 # Coords, dipoles, quadrupoles, octupoles, polarizable points and tensor
 # Monopoles and screen/screen2 do not need changed
 
-if(minrmsd<cutoff):
+#print(rmsd/1.8897259886, full_path, inp)
+#print(get_RMSD(aligned_new_coords,coords2,weights))
+#print(aligned_new_coords)
+#print(coords2)
+
+if(rmsd<cutoff):
     with open(match, "r") as efp:
         f0 = efp.readlines()
     start=0
@@ -278,4 +316,4 @@ def apply_transform(coords, rotation_matrix, com1, com2):
             outfile.write(line1)
 else:
     with open('no_matches.txt','a') as outfile2:
-        outfile2.write(res)
+        outfile2.write(inp+'\n')
diff --git a/examples/flex-EFP/Scripts/make_AAs.py b/examples/flex-EFP/Scripts/make_AAs.py
@@ -12,38 +12,29 @@
 import numpy as np
 import sys
 
-#efp_g96='shell_bchl361-79002.g96'
+#efp_g96='efp_pair53004.g96'
 efp_g96=sys.argv[1]
 with open(efp_g96, 'r') as inp:
     f0 = inp.readlines()
 
-#full_g96='bchl361-79002.g96'
+#full_g96='confout_pair53004.g96'
 full_g96=sys.argv[2]
 with open(full_g96, 'r') as g96:
     g0 = g96.readlines()
 
-#settings_file='user_set.txt'
+#settings_file='user_defined.txt'
 settings_file=sys.argv[3]
 
-#topol_file='topol.top'
+#topol_file='edit_topol.itp'
 topol_file=sys.argv[4]
 
 # Data and parameters
-Rings = [
-    'MG', 'CHA', 'CHB', 'HB', 'CHC', 'HC', 'CHD', 'HD', 'NA', 'C1A', 'C2A', 'H2A', 'C3A', 'H3A', 
-    'C4A', 'CMA', 'HMA1', 'HMA2', 'HMA3', 'NB', 'C1B', 'C2B', 'C3B', 'C4B', 'CMB', 'HMB1', 
-    'HMB2', 'HMB3', 'CAB', 'OBB', 'CBB', 'HBB1', 'HBB2', 'HBB3', 'NC', 'C1C', 'C2C', 'H2C', 
-    'C3C', 'H3C', 'C4C', 'CMC', 'HMC1', 'HMC2', 'HMC3', 'CAC', 'HAC1', 'HAC2', 'CBC', 'HBC1', 
-    'HBC2', 'HBC3', 'ND', 'C1D', 'C2D', 'C3D', 'C4D', 'CMD', 'HMD1', 'HMD2', 'HMD3', 'CAD', 
-    'OBD', 'CBD', 'HBD', 'CGD', 'O1D', 'O2D', 'CED', 'HED1', 'HED2', 'HED3'
-]
 
+#Charges of residues. NVAL and CGLN should be redundant, now. New logic assumes and N-terminal is +1 and
+# any C-terminal is -1. spec_AAs are residues that have non-zero charge, AA-charge is the dictionary with
+#that non-zero charge.
 AA_charge = {'ASP': '-1', 'GLU': '-1', 'NVAL': '1', 'HIP': '1', 'LYS': '1', 'ARG': '1', 'CGLN': '-1'}
 spec_AAs = ['ASP', 'GLU', 'NVAL', 'HIP', 'LYS', 'ARG', 'CGLN']
-CLA_resids = [
-    '359', '360', '361', '362', '363', '364', '365', '366', '725', '726', '727', '728', '729', '730', '731', '732',
-    '1091', '1092', '1093', '1094', '1095', '1096', '1097', '1098'
-]
 
 amino_acid_dict = {
     'ALA': 'a', 'ARG': 'r', 'ASN': 'n', 'ASP': 'd', 'CYS': 'c', 'GLN': 'q', 'GLU': 'e', 'GLY': 'g', 'HIS': 'h', 
@@ -110,6 +101,11 @@ def make_inp(fragment, QMs, POLs):
         return
     if fragment[4].split()[1] in spec_AAs:
         charge = AA_charge[fragment[4].split()[1]]
+    elif len(fragment[4].split()[1])==4:
+        if(fragment[4].split()[1][0]=='N'):
+            charge=1
+        elif(fragment[4].split()[1][0]=='C'):
+            charge=-1
     else:
         charge = '0'
     
@@ -271,7 +267,9 @@ def QM_MM_covalent(MM,QMs,topol_file):
 
         if line.split()[0] == efp_resis[i]:
             if len(prev_co) > 1 and line.split()[1] in known_amino_acids:
-                frag.extend(prev_co)
+                #frag.extend(prev_co)
+                frag.append(prev_co[-2])
+                frag.append(prev_co[-1])
                 prev_co = []
             frag.append(line)
     elif 'POSITION' in line:
diff --git a/examples/flex-EFP/Scripts/make_bcls.py b/examples/flex-EFP/Scripts/make_bcls.py
@@ -9,11 +9,16 @@
 """
 
 import numpy as np
-import sys
 
 #site='359'
-g96_file=sys.argv[1]
-site=sys.argv[2]
+#g96_file=sys.argv[1]
+g96_file='efp_pair53004.g96'
+#site=sys.argv[2]
+
+tailside='C6'
+headside='C5'
+site=['752','754']
+oldres='752'
 
 def cut_frag(head,tail):
     desired_dist=1.07886
@@ -31,7 +36,7 @@ def cut_frag(head,tail):
 
 def make_inp(fragment):
     txt=[]
-    filename='t_'+fragment[0].split()[0]+'.inp'
+    filename='cla_t_'+fragment[0].split()[0]+'.inp'
     txt.append(' $contrl units=angs local=boys runtyp=makefp \n'+
 '       mult=1 icharg=0 coord=cart icut=11 $end\n'+
 ' $system timlim=99999   mwords=200 $end\n'+
@@ -49,7 +54,7 @@ def make_inp(fragment):
             if(atom.split()[2][0]=='M'):
                 col1=' '+atom.split()[2][0:2]+atom.split()[3]+' '
                 col2=at_sym['MG']
-                filename='h_'+atom.split()[0]+'.inp'
+                filename='cla_h_'+atom.split()[0]+'.inp'
             else:
                 col1=' '+atom.split()[2][0]+atom.split()[3]+'   '
                 col2=at_sym[atom.split()[2][0]]
@@ -77,32 +82,33 @@ def make_inp(fragment):
         'NA':'11.0','CL':'17.0'
       }
 
-Rings=['MG','CHA','CHB','HB','CHC','HC','CHD','HD','NA','C1A','C2A','H2A','C3A','H3A','C4A',
-       'CMA','HMA1','HMA2','HMA3','NB','C1B','C2B','C3B','C4B','CMB','HMB1','HMB2','HMB3',
-       'CAB','OBB','CBB','HBB1','HBB2','HBB3','NC','C1C','C2C','H2C','C3C','H3C','C4C',
-       'CMC','HMC1','HMC2','HMC3','CAC','HAC1','HAC2','CBC','HBC1','HBC2','HBC3','ND',
-       'C1D','C2D','C3D','C4D','CMD','HMD1','HMD2','HMD3','CAD','OBD','CBD','HBD','CGD',
-       'O1D','O2D','CED','HED1','HED2','HED3']
+Rings=['MG','CHA','CHB','CHC','CHD','NA','C1A','C2A','C3A','C4A','CMA','NB','C1B','C2B','C3B',
+       'C4B','CMB','CAB','CBB','NC','C1C','C2C','C3C','C4C','CMC','CAC','CBC','ND','C1D','C2D',
+       'C3D','C4D','CMD','CAD','OBD','CBD','CGD','O1D','O2D','CED','CAA','CBA','CGA','O1A',
+       'O2A','C1','C2','C3','C4','C5','H1','H2','H3','H4','H5','H6','H7','H8','H9','H10','H11',
+       'H12','H13','H14','H15','H16','H17','H18','H19','H20','H21','H22','H23','H24','H25',
+       'H26','H27','H28','H29','H30','H31','H32','H33','H34','H35','H36','H37','H38','H39',
+       'H40','H41']
 
 #with open('bchl359-50028.g96','r') as g96:
 #    g0=g96.readlines()
 
 with open(g96_file,'r') as g96:
     g0=g96.readlines()
     
-tailside='CAA'
-headside='C2A'
+#tailside and headside represent the atoms that are bound but are to be broken to create 
+#separate fragment files.
 curr_head=[]
 curr_tail=[]
 #BCLs=[]
-oldres='359'
 for line in g0:
-    if 'BCL' in line:
+    if 'CLA' in line:
         if(oldres!=line.split()[0]):
             head_coord,tail_coord = cut_frag(head_cut,tail_cut)
             curr_head.append(' H000    1.0      '+str("{:.8f}".format(head_coord[0]))+'       '+str("{:.8f}".format(head_coord[1]))+'       '+str("{:.8f}".format(head_coord[2]))+'\n')
             curr_tail.append(' H000    1.0      '+str("{:.8f}".format(tail_coord[0]))+'       '+str("{:.8f}".format(tail_coord[1]))+'       '+str("{:.8f}".format(tail_coord[2]))+'\n')
-            if(oldres!=site):
+            #if(oldres!=site):
+            if oldres not in site:
                 make_inp(curr_head)
                 make_inp(curr_tail)
             curr_head=[]