Skip to content

Commit efa82f8

Browse files
committed
Refactored molcas.Orb, improved performance, documentation, reduced complexity.
1 parent deca419 commit efa82f8

1 file changed

Lines changed: 51 additions & 75 deletions

File tree

exatomic/molcas/output.py

Lines changed: 51 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import six
1414
import pandas as pd
1515
import numpy as np
16-
from io import StringIO
16+
from six import StringIO
1717
from exa import TypedMeta
1818
from .editor import Editor
1919
from exatomic import Atom
@@ -30,84 +30,60 @@ class OrbMeta(TypedMeta):
3030

3131

3232
class Orb(six.with_metaclass(OrbMeta, Editor)):
33-
33+
"""
34+
Parser for molcas coefficient matrix dumps (e.g. RasOrb).
35+
36+
Note:
37+
This parser assumes the file contains data from a single
38+
calculation (i.e. a single frame).
39+
"""
3440
def to_universe(self):
3541
raise NotImplementedError("This editor has no parse_atom method.")
3642

37-
def _one_el(self, starts, step, ncol):
38-
func = pd.read_csv
39-
kwargs = {'header': None}
40-
if ncol == 1:
41-
func = pd.read_fwf
42-
kwargs['widths'] = [18] * 4
43-
else:
44-
kwargs['delim_whitespace'] = True
45-
return [func(StringIO('\n'.join(self[start:start + step])),
46-
**kwargs).stack().values for start in starts]
47-
4843
def parse_momatrix(self):
49-
dim = int(self[5])
50-
#ndim = dim * dim
51-
_re_orb = 'ORBITAL'
52-
_re_occ = 'OCCUPATION NUMBERS'
53-
_re_ens = 'ONE ELECTRON ENERGIES'
54-
found = self.find(_re_orb, _re_occ,
55-
_re_ens, keys_only=True)
56-
skips = found[_re_orb]
57-
start = skips[0]
58-
occs = [i + 1 for i in found[_re_occ]]
59-
ens = [i + 1 for i in found[_re_ens]]
60-
if not found[_re_ens]: ens = False
61-
ncol = len(self[start + 1].split())
62-
cols = 4 if ncol == 1 else ncol
63-
chnk = np.ceil(dim / cols).astype(np.int64)
64-
orbdx = np.repeat(range(dim), chnk)
65-
osh = False
66-
if len(occs) == 2:
67-
osh = True
68-
skips.insert(dim, skips[dim] - 1)
69-
orbdx = np.concatenate([orbdx, orbdx])
70-
skips = [i - skips[0] for i in skips]
71-
if ncol == 1:
72-
coefs = pd.read_fwf(StringIO('\n'.join(self[start:occs[0]-2])),
73-
skiprows=skips, header=None, widths=[18]*4)
74-
if ens: ens = self._one_el(ens, chnk, ncol)
75-
else:
76-
coefs = self.pandas_dataframe(start, occs[0]-2, ncol,
77-
**{'skiprows': skips})
78-
if ens:
79-
echnk = np.ceil(dim / len(self[ens[0] + 1].split())).astype(np.int64)
80-
ens = self._one_el(ens, echnk, ncol)
81-
occs = self._one_el(occs, chnk, ncol)
82-
coefs['idx'] = orbdx
83-
coefs = coefs.groupby('idx').apply(pd.DataFrame.stack).drop(
84-
'idx', level=2).values
85-
mo = {'orbital': np.repeat(range(dim), dim), 'frame': 0,
86-
'chi': np.tile(range(dim), dim)}
87-
if ens:
88-
orb = {'frame': 0, 'group': 0}
89-
if len(occs) == 2:
90-
mo['coef'] = coefs[:len(coefs)//2]
91-
mo['coef1'] = coefs[len(coefs)//2:]
92-
self.occupation_vector = {'coef': occs[0], 'coef1': occs[1]}
93-
if ens:
94-
orb['occupation'] = np.concatenate(occs)
95-
orb['energy'] = np.concatenate(ens)
96-
orb['vector'] = np.concatenate([range(dim), range(dim)])
97-
orb['spin'] = np.concatenate([np.zeros(dim), np.ones(dim)])
98-
else:
99-
mo['coef'] = coefs
100-
self.occupation_vector = occs[0]
101-
if ens:
102-
orb['occupation'] = occs[0]
103-
orb['energy'] = ens[0]
104-
orb['vector'] = range(dim)
105-
orb['spin'] = np.zeros(dim)
106-
self.momatrix = pd.DataFrame.from_dict(mo)
107-
if ens:
108-
self.orbital = pd.DataFrame.from_dict(orb)
109-
else:
110-
self.orbital = Orbital.from_occupation_vector(occs[0], os=osh)
44+
"""Wrapper for :func:`~exatomic.molcas.output.Orb.parse`."""
45+
self.parse()
46+
47+
def parse_orbital(self):
48+
"""Wrapper for :func:`~exatomic.molcas.output.Orb.parse`."""
49+
self.parse()
50+
51+
def parse(self):
52+
"""Parse all information contained in the orbital/coefficient matrix dump."""
53+
nmo = int(self[5])
54+
mo_coef_segments = self.find(" ORBITAL ", " OCCUPATION NUMBERS",
55+
" ONE ELECTRON ENERGIES", "#INDEX", keys_only=True)
56+
start = mo_coef_segments[' ORBITAL '][0]
57+
stop = mo_coef_segments[' OCCUPATION NUMBERS'][0]
58+
start1 = mo_coef_segments[" ONE ELECTRON ENERGIES"][0]
59+
stop1 = mo_coef_segments["#INDEX"][0]
60+
try:
61+
end = mo_coef_segments[' OCCUPATION NUMBERS'][1]
62+
except IndexError:
63+
end = start1
64+
df = pd.read_fwf(StringIO("\n".join(self[start:stop])), widths=[22]*5, names=range(5))
65+
starting_points = df[df[0].str.contains(" ORBITAL")].index.values + 1
66+
didx = starting_points[1] - 2
67+
coef = []
68+
for i in starting_points:
69+
coef.append(df.iloc[i:i+didx].values.ravel()[:nmo])
70+
coef = np.concatenate(coef).astype(float)
71+
orb = [i for i in range(nmo) for _ in range(nmo)]
72+
chi = [j for _ in range(nmo) for j in range(nmo)]
73+
momatrix = pd.DataFrame.from_dict({'coef': coef, 'chi': chi, 'orbital': orb, 'chi': chi})
74+
momatrix['frame'] = 0
75+
occ = pd.read_fwf(StringIO("\n".join(self[stop+1:end-1])), widths=[22]*5, names=range(5)).values.ravel().astype(float)
76+
occ = occ[~np.isnan(occ)]
77+
nrg = pd.read_fwf(StringIO("\n".join(self[start1+1:stop1])), widths=[12]*10, names=range(10)).values.ravel().astype(float)
78+
nrg = nrg[~np.isnan(nrg)]
79+
orbital = pd.DataFrame.from_dict({'occupation': occ, 'energy': nrg})
80+
orbital['frame'] = 0
81+
orbital['spin'] = 0
82+
orbital['group'] = 0
83+
orbital['vector'] = range(nmo)
84+
self.momatrix = momatrix
85+
self.orbital = orbital
86+
self.occupation_vector = occ
11187

11288
def __init__(self, *args, **kwargs):
11389
super(Orb, self).__init__(*args, **kwargs)

0 commit comments

Comments
 (0)