Merge pull request #75 from avmarchenko/master

tjduigna · web-flow · commit b899373529c2 · 2016-08-02T12:08:16.000-04:00
Adds in basic support for nearest neighbor searches
diff --git a/exatomic/algorithms/distance.py b/exatomic/algorithms/distance.py
@@ -101,6 +101,46 @@ def periodic_pdist_euc_dxyz_idx(ux, uy, uz, rx, ry, rz, idxs, tol=10**-8):
     return dx, dy, dz, dr, idxi, idxj, px, py, pz
 
 
+def _compute(cx, cy, cz, rx, ry, rz, ox, oy, oz):
+    """
+    """
+    l = [-1, 0, 1]
+    m = len(cx)
+    dx = np.empty((m, ), dtype=np.float64)
+    dy = np.empty((m, ), dtype=np.float64)
+    dz = np.empty((m, ), dtype=np.float64)
+    px = np.empty((27, ), dtype=np.float64)
+    py = np.empty((27, ), dtype=np.float64)
+    pz = np.empty((27, ), dtype=np.float64)
+    pr = np.empty((27, ), dtype=np.float64)
+    h = 0
+    for i in range(m):
+        cxi = cx[i]
+        cyi = cy[i]
+        czi = cz[i]
+        hh = 0
+        for ii in l:
+            for jj in l:
+                for kk in l:
+                    sx = ii*rx
+                    sy = jj*ry
+                    sz = kk*rz
+                    xx = cxi + sx
+                    yy = cyi + sy
+                    zz = czi + sz
+                    pr[hh] = (ox - xx)**2 + (oy - yy)**2 + (oz - zz)**2
+                    px[hh] = sx
+                    py[hh] = sy
+                    pz[hh] = sz
+                    hh += 1
+        hh = np.argmin(pr)
+        dx[h] = px[hh]
+        dy[h] = py[hh]
+        dz[h] = pz[hh]
+        h += 1
+    return dx, dy, dz
+
+
 if config['dynamic']['numba'] == 'true':
     from numba import jit, vectorize
     from exa.math.vector.cartesian import magnitude_xyz
@@ -109,3 +149,4 @@ def periodic_pdist_euc_dxyz_idx(ux, uy, uz, rx, ry, rz, idxs, tol=10**-8):
     minimal_image_counts = jit(nopython=True, cache=True, nogil=True)(minimal_image_counts)
     minimal_image = vectorize(types3, nopython=True)(minimal_image)
     periodic_pdist_euc_dxyz_idx = jit(nopython=True, cache=True, nogil=True)(periodic_pdist_euc_dxyz_idx)
+    _compute = jit(nopython=True, cache=True, nogil=True)(_compute)
diff --git a/exatomic/algorithms/neighbors.py b/exatomic/algorithms/neighbors.py
@@ -14,7 +14,8 @@
 and :func:`~exatomic.molecule.Molecule.classify`.
 """
 import numpy as np
-from exatomic.container import Universe
+import pandas as pd
+from exatomic.algorithms.distance import _compute
 
 
 def nearest_molecules(universe, n, sources, restrictions=None, how='atom',
@@ -46,13 +47,17 @@ def nearest_molecules(universe, n, sources, restrictions=None, how='atom',
     Returns:
         unis (dict): Dictionary of number of neighbors keys, universe values
     """
-    source_atoms, other_atoms, source_molecules, other_molecules, n = _slice_atoms_molecules(universe, sources, restrictions, how)
+    source_atoms, other_atoms, source_molecules, other_molecules, n = _slice_atoms_molecules(universe, sources, restrictions, n)
     ordered_molecules, ordered_twos = _compute_neighbors_by_atom(universe, source_atoms, other_atoms, source_molecules)
-    unis = {nn: _build_universe(universe, ordered_molecules, ordered_twos, nn) for nn in n}
-
-
-
-
+    unis = {}
+    if free_boundary == True:
+        for nn in n:
+            unis[nn] = _build_free_universe(universe, ordered_molecules,
+                                            ordered_twos, nn, source_atoms,
+                                            source_molecules)
+    else:
+        raise NotImplementedError()
+    return unis
 
 
 def _slice_atoms_molecules(universe, sources, restrictions, n):
@@ -66,7 +71,7 @@ def _slice_atoms_molecules(universe, sources, restrictions, n):
         sources = [sources]
     if not isinstance(restrictions, list) and restrictions is not None:
         restrictions = [restrictions]
-    if not isinstance(n, list):
+    if isinstance(n, (int, np.int32, np.int64)):
         n = [n]
     symbols = universe.atom['symbol'].unique()
     classification = universe.molecule['classification'].unique()
@@ -135,12 +140,67 @@ def _compute_neighbors_by_com(universe, source_molecules, other_molecules):
     raise NotImplementedError()
 
 
+def _build_free_universe(universe, ordered_molecules, ordered_twos, n,
+                         source_atoms, source_molecules):
+    """
+    """
+    molecule = np.concatenate([mcules[:n] for mcules in ordered_molecules])
+    molecule = np.concatenate((molecule, source_molecules.index.tolist()))
+    molecule = universe.molecule[universe.molecule.index.isin(molecule)].copy()
+    atom = universe.atom[universe.atom['molecule'].isin(molecule.index)].copy()
+    atom_two = universe.atom_two[(universe.atom_two['atom0'].isin(atom.index) &
+                                  universe.atom_two['atom1'].isin(atom.index))].copy()
+    frame = universe.frame[universe.frame.index.isin(atom['frame'])].copy()
+    frame['periodic'] = False
+    uni = universe.__class__(atom=atom, molecule=molecule, frame=frame, atom_two=atom_two)
+    if universe.frame.is_periodic():
+        uni.atom.update(universe.visual_atom)
+        uni.compute_molecule_com()
+        uni.atom._revert_categories()
+        mapper = uni.atom.drop_duplicates('molecule').set_index('molecule')['frame']
+        uni.atom._set_categories()
+        uni.molecule['frame'] = uni.molecule.index.map(lambda x: mapper[x])
+        sources = source_atoms.groupby('frame')
+        groups = uni.molecule.groupby('frame')
+        n = groups.ngroups
+        dx = np.empty((n, ), dtype=np.ndarray)
+        dy = np.empty((n, ), dtype=np.ndarray)
+        dz = np.empty((n, ), dtype=np.ndarray)
+        index = np.empty((n, ), dtype=np.ndarray)
+        for i, (frame, group) in enumerate(groups):
+            cx = group['cx'].values
+            cy = group['cy'].values
+            cz = group['cz'].values
+            ccx, ccy, ccz = sources.get_group(frame)[['x', 'y', 'z']].mean().values
+    #        ccx, ccy, ccz = mcules.ix[mcules['classification'] == 'solute', ['cx', 'cy', 'cz']].values[0]
+            rx, ry, rz = uni.frame.ix[frame, ['rx', 'ry', 'rz']].values
+            dxf, dyf, dzf = _compute(cx, cy, cz, rx, ry, rz, ccx, ccy, ccz)
+            dx[i] = dxf
+            dy[i] = dyf
+            dz[i] = dzf
+            index[i] = group.index.values
+        del uni.molecule['frame']
+        dx = np.concatenate(dx)
+        dy = np.concatenate(dy)
+        dz = np.concatenate(dz)
+        index = np.concatenate(index)
+        df = pd.DataFrame.from_dict({'x': dx, 'y': dy, 'z': dz, 'molecule': index})
+        df.set_index('molecule', inplace=True)
+        for molecule in df.index:
+            dx, dy, dz = df.ix[molecule].values
+            uni.atom.ix[uni.atom['molecule'] == molecule, 'x'] += dx
+            uni.atom.ix[uni.atom['molecule'] == molecule, 'y'] += dy
+            uni.atom.ix[uni.atom['molecule'] == molecule, 'z'] += dz
+    return uni
+
+
 def _build_universe(universe, ordered_molecules, ordered_twos, n):
     """
     """
+    raise NotImplementedError()
     # TODO CONVERT TO A GENERIC AND COMPLETE SLICER
-    molecules = np.concatenate([m[:n] for m in ordered_molecules.values])
-    twos = np.concatenate([t[:n] for t in ordered_twos.values])
+    molecules = np.concatenate([m[:n] for m in ordered_molecules])
+    twos = np.concatenate([t[:n] for t in ordered_twos])
     atom = universe.atom[universe.atom['molecule'].isin(molecules)].copy().sort_index()
     two = universe.atom_two[universe.atom_two['atom0'].isin(atom.index) &
                             universe.atom_two['atom1'].isin(atom.index)].copy().sort_index()
diff --git a/exatomic/container.py b/exatomic/container.py
@@ -21,13 +21,13 @@
 from exatomic.atom import Atom, UnitAtom, ProjectedAtom, VisualAtom
 from exatomic.two import (AtomTwo, MoleculeTwo, compute_atom_two,
                           compute_bond_count, compute_molecule_two)
-from exatomic.molecule import Molecule, compute_molecule, compute_com
+from exatomic.molecule import (Molecule, compute_molecule, compute_molecule_com,
+                               compute_molecule_count)
 from exatomic.widget import UniverseWidget
 from exatomic.field import AtomicField
 from exatomic.orbital import Orbital, MOMatrix, DensityMatrix
 from exatomic.basis import (SphericalGTFOrder, CartesianGTFOrder, Overlap,
-                            BasisSetSummary, GaussianBasisSet, BasisSetOrder,
-                            Primitive)
+                            BasisSetSummary, GaussianBasisSet, BasisSetOrder)
 
 
 class Meta(TypedMeta):
@@ -49,7 +49,6 @@ class Meta(TypedMeta):
     orbital = Orbital
     overlap = Overlap
     momatrix = MOMatrix
-    primitive = Primitive
     density = DensityMatrix
     basis_set_order = BasisSetOrder
     basis_set_summary = BasisSetSummary
@@ -133,15 +132,19 @@ def compute_molecule(self):
         self.molecule = compute_molecule(self)
 
     def compute_molecule_com(self):
-        cx, cy, cz = compute_com(self)
+        cx, cy, cz = compute_molecule_com(self)
         self.molecule['cx'] = cx
         self.molecule['cy'] = cy
         self.molecule['cz'] = cz
 
     def compute_atom_count(self):
-        """Compute the atom count for each frame."""
+        """Compute number of atoms per frame."""
         self.frame['atom_count'] = self.atom.grouped().size()
 
+    def compute_molecule_count(self):
+        """Compute number of molecules per frame."""
+        self.frame['molecule_count'] = compute_molecule_count(self)
+
     def _custom_traits(self):
         """
         Build traits depending on multiple dataframes.
@@ -153,20 +156,6 @@ def _custom_traits(self):
             traits.update(self.atom_two._bond_traits(mapper))
         return traits
 
-    @classmethod
-    def from_small_molecule_data(cls, center=None, ligand=None, distance=None, geometry=None,
-                                 offset=None, plane=None, axis=None, domains=None, unit='A'):
-        '''
-        Build a universe from small molecule data
-
-        See
-            exatomic.algorithms.geometry.make_small_molecule
-        '''
-        return cls(atom=Atom.from_small_molecule_data(center=center, ligand=ligand,
-                                                      distance=distance, geometry=geometry,
-                                                      offset=offset, plane=plane, axis=axis,
-                                                      domains=domains, unit=unit))
-
     def __len__(self):
         return len(self.frame)
 
diff --git a/exatomic/molecule.py b/exatomic/molecule.py
@@ -140,28 +140,37 @@ def compute_molecule(universe):
 def compute_molecule_count(universe):
     """
     """
+    if 'molecule' not in universe.atom.columns:
+        universe.compute_molecule()
+    universe.atom._revert_categories()
     mapper = universe.atom.drop_duplicates('molecule').set_index('molecule')['frame']
+    universe.atom._set_categories()
     universe.molecule['frame'] = universe.molecule.index.map(lambda x: mapper[x])
     molecule_count = universe.molecule.groupby('frame').size()
     del universe.molecule['frame']
     return molecule_count
 
 
-def compute_com(universe):
+def compute_molecule_com(universe):
     """
     Compute molecules' centers of mass.
     """
     if 'molecule' not in universe.atom.columns:
         universe.compute_molecule()
-    xyz = universe.atom[['x', 'y', 'z', 'molecule']].copy()
-    xyz['mass'] = universe.atom.get_element_masses()
-    xyz.update(u.visual_atom)
-    xyz['xm'] = xyz['x'].mul(xyz['mass'])
-    xyz['ym'] = xyz['y'].mul(xyz['mass'])
-    xyz['zm'] = xyz['z'].mul(xyz['mass'])
-    xyz['rm'] = xyz['xm'].add(xyz['ym']).add(xyz['zm'])
-    grps = xyz.groupby('molecule')
-    sums = grps.sum()
+    mass = universe.atom.get_element_masses()
+    if universe.frame.is_periodic():
+        xyz = universe.atom[['x', 'y', 'z']].copy()
+        xyz.update(u.visual_atom)
+    else:
+        xyz = universe.atom[['x', 'y', 'z']]
+    xm = xyz['x'].mul(mass)
+    ym = xyz['y'].mul(mass)
+    zm = xyz['z'].mul(mass)
+    rm = xm.add(ym).add(zm)
+    df = pd.DataFrame.from_dict({'xm': xm, 'ym': ym, 'zm': zm, 'mass': mass,
+                                 'molecule': universe.atom['molecule']})
+    groups = df.groupby('molecule')
+    sums = groups.sum()
     cx = sums['xm'].div(sums['mass'])
     cy = sums['ym'].div(sums['mass'])
     cz = sums['zm'].div(sums['mass'])