1313import six
1414import pandas as pd
1515import numpy as np
16- from io import StringIO
16+ from six import StringIO
1717from exa import TypedMeta
1818from .editor import Editor
1919from exatomic import Atom
@@ -30,84 +30,60 @@ class OrbMeta(TypedMeta):
3030
3131
3232class Orb (six .with_metaclass (OrbMeta , Editor )):
33-
33+ """
34+ Parser for molcas coefficient matrix dumps (e.g. RasOrb).
35+
36+ Note:
37+ This parser assumes the file contains data from a single
38+ calculation (i.e. a single frame).
39+ """
3440 def to_universe (self ):
3541 raise NotImplementedError ("This editor has no parse_atom method." )
3642
37- def _one_el (self , starts , step , ncol ):
38- func = pd .read_csv
39- kwargs = {'header' : None }
40- if ncol == 1 :
41- func = pd .read_fwf
42- kwargs ['widths' ] = [18 ] * 4
43- else :
44- kwargs ['delim_whitespace' ] = True
45- return [func (StringIO ('\n ' .join (self [start :start + step ])),
46- ** kwargs ).stack ().values for start in starts ]
47-
4843 def parse_momatrix (self ):
49- dim = int (self [5 ])
50- #ndim = dim * dim
51- _re_orb = 'ORBITAL'
52- _re_occ = 'OCCUPATION NUMBERS'
53- _re_ens = 'ONE ELECTRON ENERGIES'
54- found = self .find (_re_orb , _re_occ ,
55- _re_ens , keys_only = True )
56- skips = found [_re_orb ]
57- start = skips [0 ]
58- occs = [i + 1 for i in found [_re_occ ]]
59- ens = [i + 1 for i in found [_re_ens ]]
60- if not found [_re_ens ]: ens = False
61- ncol = len (self [start + 1 ].split ())
62- cols = 4 if ncol == 1 else ncol
63- chnk = np .ceil (dim / cols ).astype (np .int64 )
64- orbdx = np .repeat (range (dim ), chnk )
65- osh = False
66- if len (occs ) == 2 :
67- osh = True
68- skips .insert (dim , skips [dim ] - 1 )
69- orbdx = np .concatenate ([orbdx , orbdx ])
70- skips = [i - skips [0 ] for i in skips ]
71- if ncol == 1 :
72- coefs = pd .read_fwf (StringIO ('\n ' .join (self [start :occs [0 ]- 2 ])),
73- skiprows = skips , header = None , widths = [18 ]* 4 )
74- if ens : ens = self ._one_el (ens , chnk , ncol )
75- else :
76- coefs = self .pandas_dataframe (start , occs [0 ]- 2 , ncol ,
77- ** {'skiprows' : skips })
78- if ens :
79- echnk = np .ceil (dim / len (self [ens [0 ] + 1 ].split ())).astype (np .int64 )
80- ens = self ._one_el (ens , echnk , ncol )
81- occs = self ._one_el (occs , chnk , ncol )
82- coefs ['idx' ] = orbdx
83- coefs = coefs .groupby ('idx' ).apply (pd .DataFrame .stack ).drop (
84- 'idx' , level = 2 ).values
85- mo = {'orbital' : np .repeat (range (dim ), dim ), 'frame' : 0 ,
86- 'chi' : np .tile (range (dim ), dim )}
87- if ens :
88- orb = {'frame' : 0 , 'group' : 0 }
89- if len (occs ) == 2 :
90- mo ['coef' ] = coefs [:len (coefs )// 2 ]
91- mo ['coef1' ] = coefs [len (coefs )// 2 :]
92- self .occupation_vector = {'coef' : occs [0 ], 'coef1' : occs [1 ]}
93- if ens :
94- orb ['occupation' ] = np .concatenate (occs )
95- orb ['energy' ] = np .concatenate (ens )
96- orb ['vector' ] = np .concatenate ([range (dim ), range (dim )])
97- orb ['spin' ] = np .concatenate ([np .zeros (dim ), np .ones (dim )])
98- else :
99- mo ['coef' ] = coefs
100- self .occupation_vector = occs [0 ]
101- if ens :
102- orb ['occupation' ] = occs [0 ]
103- orb ['energy' ] = ens [0 ]
104- orb ['vector' ] = range (dim )
105- orb ['spin' ] = np .zeros (dim )
106- self .momatrix = pd .DataFrame .from_dict (mo )
107- if ens :
108- self .orbital = pd .DataFrame .from_dict (orb )
109- else :
110- self .orbital = Orbital .from_occupation_vector (occs [0 ], os = osh )
44+ """Wrapper for :func:`~exatomic.molcas.output.Orb.parse`."""
45+ self .parse ()
46+
47+ def parse_orbital (self ):
48+ """Wrapper for :func:`~exatomic.molcas.output.Orb.parse`."""
49+ self .parse ()
50+
51+ def parse (self ):
52+ """Parse all information contained in the orbital/coefficient matrix dump."""
53+ nmo = int (self [5 ])
54+ mo_coef_segments = self .find (" ORBITAL " , " OCCUPATION NUMBERS" ,
55+ " ONE ELECTRON ENERGIES" , "#INDEX" , keys_only = True )
56+ start = mo_coef_segments [' ORBITAL ' ][0 ]
57+ stop = mo_coef_segments [' OCCUPATION NUMBERS' ][0 ]
58+ start1 = mo_coef_segments [" ONE ELECTRON ENERGIES" ][0 ]
59+ stop1 = mo_coef_segments ["#INDEX" ][0 ]
60+ try :
61+ end = mo_coef_segments [' OCCUPATION NUMBERS' ][1 ]
62+ except IndexError :
63+ end = start1
64+ df = pd .read_fwf (StringIO ("\n " .join (self [start :stop ])), widths = [22 ]* 5 , names = range (5 ))
65+ starting_points = df [df [0 ].str .contains (" ORBITAL" )].index .values + 1
66+ didx = starting_points [1 ] - 2
67+ coef = []
68+ for i in starting_points :
69+ coef .append (df .iloc [i :i + didx ].values .ravel ()[:nmo ])
70+ coef = np .concatenate (coef ).astype (float )
71+ orb = [i for i in range (nmo ) for _ in range (nmo )]
72+ chi = [j for _ in range (nmo ) for j in range (nmo )]
73+ momatrix = pd .DataFrame .from_dict ({'coef' : coef , 'chi' : chi , 'orbital' : orb , 'chi' : chi })
74+ momatrix ['frame' ] = 0
75+ occ = pd .read_fwf (StringIO ("\n " .join (self [stop + 1 :end - 1 ])), widths = [22 ]* 5 , names = range (5 )).values .ravel ().astype (float )
76+ occ = occ [~ np .isnan (occ )]
77+ nrg = pd .read_fwf (StringIO ("\n " .join (self [start1 + 1 :stop1 ])), widths = [12 ]* 10 , names = range (10 )).values .ravel ().astype (float )
78+ nrg = nrg [~ np .isnan (nrg )]
79+ orbital = pd .DataFrame .from_dict ({'occupation' : occ , 'energy' : nrg })
80+ orbital ['frame' ] = 0
81+ orbital ['spin' ] = 0
82+ orbital ['group' ] = 0
83+ orbital ['vector' ] = range (nmo )
84+ self .momatrix = momatrix
85+ self .orbital = orbital
86+ self .occupation_vector = occ
11187
11288 def __init__ (self , * args , ** kwargs ):
11389 super (Orb , self ).__init__ (* args , ** kwargs )
0 commit comments