1+ import base64
12import pandas as pd
2- import logging as l
3+ import logging
34import re
5+ import io
6+ from functools import wraps
47
5- logger = l .getLogger ('ct.conv' )
6- logger .setLevel (l .DEBUG )
7-
8- # create console handler and set level to debug
9- ch = l .StreamHandler ()
10- ch .setLevel (l .DEBUG )
11-
12- # create formatter
13- formatter = l .Formatter ('%(asctime)s - %(name)s - %(levelname)s - %(message)s' )
14-
15- # add formatter to ch
8+ # ---------------- Logging Setup ----------------
9+ logger = logging .getLogger ('ct.conv' )
10+ logger .setLevel (logging .DEBUG )
11+ ch = logging .StreamHandler ()
12+ ch .setLevel (logging .DEBUG )
13+ formatter = logging .Formatter ('%(asctime)s - %(name)s - %(levelname)s - %(message)s' )
1614ch .setFormatter (formatter )
17-
18- # add ch to logger
1915logger .addHandler (ch )
2016
21- from functools import wraps
22- def logM (f ):
17+ # ---------------- Logging Decorator ----------------
18+ def log_call (f ):
2319 @wraps (f )
2420 def wrapper (* args , ** kwargs ):
25-
26- dd = locals ()
27- # print(dd['args'].keys())
28- n = f .__code__ .co_argcount
29- ids1 = f .__code__ .co_varnames [0 :n ]
30- ids = f .__code__ .co_varnames [0 :n ]
31- print (n )
32- print (len (dd ))
33- print (len (dd ) == n )
34- print (f'ids: { ids } ' )
35- print (f'ids1: { ids1 } ' )
36- print (f'locals: { dd .keys ()} ' )
37- dat = ids [0 :len (dd )]
38- print (dat )
39- logger .debug ('calling %s%s' % (f .__name__ , ids ))
40- for i in range (n ):
41- if len (f .__dict__ ) > 0 :
42- logger .debug ('\t %s = %s' % (ids [i ], f .__dict__ [ids [i ]]))
43- else :
44- logger .debug ('\t %s = %s' % (ids [i ], None ))
21+ logger .debug (f"Calling { f .__name__ } with args={ args } kwargs={ kwargs } " )
4522 return f (* args , ** kwargs )
4623 return wrapper
4724
48-
49- # read targeted ms data into file or python workspace
50- # ds=readTarMS(path, 'barw', 'collectedData.csv')
51- # def readTarMS(path, pattern=[], filename=None):
52- # import os
53- # import re
54- # import pandas as pd
55- # import io
56- #
57- # def readMat(path, f):
58- # fh = os.path.join(path, f)
59- #
60- # cpds = {}
61- # collect = False
62- # with open(fh, 'r', encoding='utf-8', errors='ignore') as file:
63- # data = file.readlines()
64- # for l in data:
65- # if bool(re.search('Compound [0-9]', l)):
66- # cpd = re.sub('Compound [0-9]+:\s+', '', l.strip())
67- # cpds[cpd] = []
68- # collect = True
69- # continue
70- # if collect and l.strip() != '':
71- # cpds[cpd].append(l)
72- #
73- # mets = []
74- # for fid, d in cpds.items():
75- # df = pd.read_table(io.StringIO(''.join(d)))
76- # df['cpd'] = fid
77- # df['path'] = fh
78- # mets.append(df)
79- #
80- # return pd.concat(mets)
81- #
82- # ds = pd.read_table(io.StringIO(''.join(cpds['Compound 1: tryptophan'])))
83- # return ds
84- #
85- # flist = [fn for fn in os.listdir(path) if any([bool(re.search(st.lower(), fn.lower())) for st in pattern])]
86- #
87- # dfs=[]
88- # for f in flist:
89- # print(f)
90- # dfs.append(readMat(path, f))
91- #
92- # ds = pd.concat(dfs)
93- # # remove QCs
94- # ds=ds[ds.Type == 'Analyte']
95- # ds= ds[~ ds.cpd.str.contains('^SIL')]
96- #
97- # ds['id'] = ds['path']+ds['Name']+ds['Sample Text']
98- # ds.set_index(['id'])
99- # # ts=ds.pivot_table(index = 'id', columns='cpd', values='Conc.', aggfunc='mean')
100- # ts=ds.pivot_table(index = 'id', columns='cpd', values='Conc.')
101- #
102- # if filename is not None:
103- # ts.to_csv(os.path.join(path, filename))
104- # else:
105- # return ts
106-
107- @logM
25+ # ---------------- Data Reading Functions ----------------
26+ @log_call
10827def readMat (content , filename ):
109- import base64 , re , io
110- import pandas as pd
11128 data = base64 .b64decode (content )
112-
11329 cpds = {}
11430 collect = False
115- for l in data .splitlines ():
116- l = l .decode ('unicode_escape' )
117- if bool (re .search ('Compound [0-9]' , l )):
118- cpd = re .sub ('Compound [0-9]+:\s+' , '' , l .strip ())
31+
32+ for line in data .splitlines ():
33+ line = line .decode ('unicode_escape' )
34+ if re .search (r'Compound \d+' , line ):
35+ cpd = re .sub (r'Compound \d+:\s+' , '' , line .strip ())
11936 cpds [cpd ] = []
12037 collect = True
12138 continue
122- if collect and l .strip () != '' :
123- cpds [cpd ].append (l )
39+ if collect and line .strip ():
40+ cpds [cpd ].append (line )
12441
12542 mets = []
126- for fid , d in cpds .items ():
127- df = pd .read_table (io .StringIO ('\n ' .join (d )))
43+ for fid , entries in cpds .items ():
44+ df = pd .read_table (io .StringIO ('\n ' .join (entries )))
12845 df ['cpd' ] = fid
12946 df ['path' ] = filename
13047 mets .append (df )
131- # import pickle
132- # pickle.dump(pd.concat(mets), open('/Users/TKimhofer/Downloads/RE__Trp_data_from_PAT01/Barwon_adult.p', 'wb'))
48+
13349 return pd .concat (mets )
13450
135- @logM
51+ @log_call
13652def readbin (contents , filenames , varType = 'Conc.' , featureType = 'analyte|standard' , sil = True ):
137- ''' converts base64 binary encoded string to desired output format
138- filename is string, content is base64 encoded binary string
139- varType is desired variable in data file (typically 'Conc.' or 'Response')
140- featureType is desired feature information in regex with ignore case flag (e.g. 'analyte' or 'analyte|cal')
141- '''
142- # import pickle
143- # pickle.dump([contents, filenames, varType, featureType, sil], open('/Users/tk/PycharmProjects/convtar/check.pi', 'wb'))
144- # contents, filenames, varType, featureType, sil = pickle.load(open('/Users/tk/PycharmProjects/convtar/check.pi', 'rb'))
53+ """
54+ Converts base64 binary encoded strings into structured data.
55+ Filters by analyte type and optionally removes SIL compounds.
56+ """
57+ collected_data = {'data' : [], 'failed' : []}
14558
146- dfs = {'data' : [], 'fn' : []}
147- for c , f in zip (contents , filenames ):
59+ for content , fname in zip (contents , filenames ):
14860 try :
149- imm = readMat (c , f )
150- dfs ['data' ].append (imm )
151- except :
152- dfs ['fn' ].append (f )
153- print (f'Import failed for { f } ' )
154- continue
155- try :
156- ds = pd .concat (dfs ['data' ])
157- except :
158- print (filenames )
159- print (f'Conversion failed: Could not combine data from all files.' )
61+ df = readMat (content , fname )
62+ collected_data ['data' ].append (df )
63+ except Exception as e :
64+ logger .warning (f"Import failed for { fname } : { e } " )
65+ collected_data ['failed' ].append (fname )
66+
67+ if not collected_data ['data' ]:
68+ logger .error ("No data could be loaded from provided files." )
16069 return None
16170
162- # remove QCs and internal standards
163- # print(ds['Sample Text'].value_counts())
16471 try :
165- ds = ds [ds .Type .str .contains (featureType , regex = True , flags = re .IGNORECASE )]
166- except :
167- pass
168- # print(ds.Type.str.contains(featureType, regex=True, flags=re.IGNORECASE).value_counts())
169- # print(ds[ds.Type.str.contains(featureType, regex=True, flags=re.IGNORECASE)])
170- # print(ds[ds.Type.str.contains(featureType, regex=True, flags=re.IGNORECASE)].iloc[0].T)
171- if not sil :
172- ds = ds [~ ds .cpd .str .contains ('^SIL' )]
173- # print(ds.columns)
174- # ds.Type.value_counts()
175- # ds['id'] = ds['path']+ds['Name']+ds['Sample Text']
176- # ds.set_index(['id'])
177- # np.array([x[2] for x in tf.index])
178- tf = ds .pivot_table (index = ['path' ,'Name' ], columns = 'cpd' , values = varType )
179- # print(tf[0:4])
180- tf = tf .astype (float )
181- tff = tf .reset_index ()
182- # tf.insert(loc=0, column='id', value=tf.index)
183- return tff
72+ ds = pd .concat (collected_data ['data' ])
73+ except Exception as e :
74+ logger .error (f"Failed to concatenate data: { e } " )
75+ return None
18476
77+ try :
78+ ds = ds [ds .Type .str .contains (featureType , regex = True , flags = re .IGNORECASE )]
79+ except Exception as e :
80+ logger .warning (f"Feature filtering failed: { e } " )
18581
82+ if not sil :
83+ ds = ds [~ ds .cpd .str .contains ('^SIL' )]
18684
187- # #
188- # # #
189- # # # #
190- # fh= '/Users/TKimhofer/Downloads/RE__Trp_data_from_PAT01/Barwon_adult_Plate9.TXT'
191- # import pandas as pd
192- # pd.read_csv(fh, encoding='latin1')
193- # import base64
194- # with open(fh, 'rb') as file:
195- # dat = file.read()
196- # # import pickle
197- # # df=pickle.load(open('/Users/TKimhofer/Downloads/RE__Trp_data_from_PAT01/Barwon_adult.p', 'rb'))
198- # # ds=df
199- # # # #
200- # # # content=base64.b64encode(dat)
201- # # # # content=base64.decodebytes(r)
202- # # [x[2] for x in tf.index]
203- # # [x[2] for x in tf.index if 'Cal' in x]
204- # # df['Sample Text'].
85+ tf = ds .pivot_table (index = ['path' , 'Name' ], columns = 'cpd' , values = varType )
86+ tf = tf .astype (float )
87+ return tf .reset_index ()
0 commit comments