Skip to content

Commit 3b867e4

Browse files
authored
Update conv.py
refactored for maintainablity and readabiolity
1 parent 2f78fd9 commit 3b867e4

1 file changed

Lines changed: 53 additions & 170 deletions

File tree

scr/conv.py

Lines changed: 53 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -1,204 +1,87 @@
1+
import base64
12
import pandas as pd
2-
import logging as l
3+
import logging
34
import re
5+
import io
6+
from functools import wraps
47

5-
logger = l.getLogger('ct.conv')
6-
logger.setLevel(l.DEBUG)
7-
8-
# create console handler and set level to debug
9-
ch = l.StreamHandler()
10-
ch.setLevel(l.DEBUG)
11-
12-
# create formatter
13-
formatter = l.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
14-
15-
# add formatter to ch
8+
# ---------------- Logging Setup ----------------
9+
logger = logging.getLogger('ct.conv')
10+
logger.setLevel(logging.DEBUG)
11+
ch = logging.StreamHandler()
12+
ch.setLevel(logging.DEBUG)
13+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
1614
ch.setFormatter(formatter)
17-
18-
# add ch to logger
1915
logger.addHandler(ch)
2016

21-
from functools import wraps
22-
def logM(f):
17+
# ---------------- Logging Decorator ----------------
18+
def log_call(f):
2319
@wraps(f)
2420
def wrapper(*args, **kwargs):
25-
26-
dd= locals()
27-
# print(dd['args'].keys())
28-
n = f.__code__.co_argcount
29-
ids1 = f.__code__.co_varnames[0:n]
30-
ids = f.__code__.co_varnames[0:n]
31-
print(n)
32-
print(len(dd))
33-
print(len(dd) == n)
34-
print(f'ids: {ids}')
35-
print(f'ids1: {ids1}')
36-
print(f'locals: {dd.keys()}')
37-
dat = ids[0:len(dd)]
38-
print(dat)
39-
logger.debug('calling %s%s' % (f.__name__, ids))
40-
for i in range(n):
41-
if len(f.__dict__) > 0:
42-
logger.debug('\t%s = %s' % (ids[i], f.__dict__[ids[i]]))
43-
else:
44-
logger.debug('\t%s = %s' % (ids[i], None))
21+
logger.debug(f"Calling {f.__name__} with args={args} kwargs={kwargs}")
4522
return f(*args, **kwargs)
4623
return wrapper
4724

48-
49-
# read targeted ms data into file or python workspace
50-
# ds=readTarMS(path, 'barw', 'collectedData.csv')
51-
# def readTarMS(path, pattern=[], filename=None):
52-
# import os
53-
# import re
54-
# import pandas as pd
55-
# import io
56-
#
57-
# def readMat(path, f):
58-
# fh = os.path.join(path, f)
59-
#
60-
# cpds = {}
61-
# collect = False
62-
# with open(fh, 'r', encoding='utf-8', errors='ignore') as file:
63-
# data = file.readlines()
64-
# for l in data:
65-
# if bool(re.search('Compound [0-9]', l)):
66-
# cpd = re.sub('Compound [0-9]+:\s+', '', l.strip())
67-
# cpds[cpd] = []
68-
# collect = True
69-
# continue
70-
# if collect and l.strip() != '':
71-
# cpds[cpd].append(l)
72-
#
73-
# mets = []
74-
# for fid, d in cpds.items():
75-
# df = pd.read_table(io.StringIO(''.join(d)))
76-
# df['cpd'] = fid
77-
# df['path'] = fh
78-
# mets.append(df)
79-
#
80-
# return pd.concat(mets)
81-
#
82-
# ds = pd.read_table(io.StringIO(''.join(cpds['Compound 1: tryptophan'])))
83-
# return ds
84-
#
85-
# flist = [fn for fn in os.listdir(path) if any([bool(re.search(st.lower(), fn.lower())) for st in pattern])]
86-
#
87-
# dfs=[]
88-
# for f in flist:
89-
# print(f)
90-
# dfs.append(readMat(path, f))
91-
#
92-
# ds = pd.concat(dfs)
93-
# # remove QCs
94-
# ds=ds[ds.Type == 'Analyte']
95-
# ds= ds[~ ds.cpd.str.contains('^SIL')]
96-
#
97-
# ds['id'] = ds['path']+ds['Name']+ds['Sample Text']
98-
# ds.set_index(['id'])
99-
# # ts=ds.pivot_table(index = 'id', columns='cpd', values='Conc.', aggfunc='mean')
100-
# ts=ds.pivot_table(index = 'id', columns='cpd', values='Conc.')
101-
#
102-
# if filename is not None:
103-
# ts.to_csv(os.path.join(path, filename))
104-
# else:
105-
# return ts
106-
107-
@logM
25+
# ---------------- Data Reading Functions ----------------
26+
@log_call
10827
def readMat(content, filename):
109-
import base64, re, io
110-
import pandas as pd
11128
data = base64.b64decode(content)
112-
11329
cpds = {}
11430
collect = False
115-
for l in data.splitlines():
116-
l= l.decode('unicode_escape')
117-
if bool(re.search('Compound [0-9]', l)):
118-
cpd = re.sub('Compound [0-9]+:\s+', '', l.strip())
31+
32+
for line in data.splitlines():
33+
line = line.decode('unicode_escape')
34+
if re.search(r'Compound \d+', line):
35+
cpd = re.sub(r'Compound \d+:\s+', '', line.strip())
11936
cpds[cpd] = []
12037
collect = True
12138
continue
122-
if collect and l.strip() != '':
123-
cpds[cpd].append(l)
39+
if collect and line.strip():
40+
cpds[cpd].append(line)
12441

12542
mets = []
126-
for fid, d in cpds.items():
127-
df = pd.read_table(io.StringIO('\n'.join(d)))
43+
for fid, entries in cpds.items():
44+
df = pd.read_table(io.StringIO('\n'.join(entries)))
12845
df['cpd'] = fid
12946
df['path'] = filename
13047
mets.append(df)
131-
# import pickle
132-
# pickle.dump(pd.concat(mets), open('/Users/TKimhofer/Downloads/RE__Trp_data_from_PAT01/Barwon_adult.p', 'wb'))
48+
13349
return pd.concat(mets)
13450

135-
@logM
51+
@log_call
13652
def readbin(contents, filenames, varType='Conc.', featureType='analyte|standard', sil=True):
137-
''' converts base64 binary encoded string to desired output format
138-
filename is string, content is base64 encoded binary string
139-
varType is desired variable in data file (typically 'Conc.' or 'Response')
140-
featureType is desired feature information in regex with ignore case flag (e.g. 'analyte' or 'analyte|cal')
141-
'''
142-
# import pickle
143-
# pickle.dump([contents, filenames, varType, featureType, sil], open('/Users/tk/PycharmProjects/convtar/check.pi', 'wb'))
144-
# contents, filenames, varType, featureType, sil = pickle.load(open('/Users/tk/PycharmProjects/convtar/check.pi', 'rb'))
53+
"""
54+
Converts base64 binary encoded strings into structured data.
55+
Filters by analyte type and optionally removes SIL compounds.
56+
"""
57+
collected_data = {'data': [], 'failed': []}
14558

146-
dfs={'data': [], 'fn' : []}
147-
for c, f in zip(contents, filenames):
59+
for content, fname in zip(contents, filenames):
14860
try:
149-
imm = readMat(c, f)
150-
dfs['data'].append(imm)
151-
except:
152-
dfs['fn'].append(f)
153-
print(f'Import failed for {f}')
154-
continue
155-
try:
156-
ds = pd.concat(dfs['data'])
157-
except:
158-
print(filenames)
159-
print(f'Conversion failed: Could not combine data from all files.')
61+
df = readMat(content, fname)
62+
collected_data['data'].append(df)
63+
except Exception as e:
64+
logger.warning(f"Import failed for {fname}: {e}")
65+
collected_data['failed'].append(fname)
66+
67+
if not collected_data['data']:
68+
logger.error("No data could be loaded from provided files.")
16069
return None
16170

162-
# remove QCs and internal standards
163-
# print(ds['Sample Text'].value_counts())
16471
try:
165-
ds=ds[ds.Type.str.contains(featureType, regex=True, flags=re.IGNORECASE)]
166-
except:
167-
pass
168-
# print(ds.Type.str.contains(featureType, regex=True, flags=re.IGNORECASE).value_counts())
169-
# print(ds[ds.Type.str.contains(featureType, regex=True, flags=re.IGNORECASE)])
170-
# print(ds[ds.Type.str.contains(featureType, regex=True, flags=re.IGNORECASE)].iloc[0].T)
171-
if not sil:
172-
ds= ds[~ ds.cpd.str.contains('^SIL')]
173-
# print(ds.columns)
174-
# ds.Type.value_counts()
175-
# ds['id'] = ds['path']+ds['Name']+ds['Sample Text']
176-
# ds.set_index(['id'])
177-
# np.array([x[2] for x in tf.index])
178-
tf=ds.pivot_table(index = ['path','Name'], columns='cpd', values=varType)
179-
# print(tf[0:4])
180-
tf = tf.astype(float)
181-
tff = tf.reset_index()
182-
# tf.insert(loc=0, column='id', value=tf.index)
183-
return tff
72+
ds = pd.concat(collected_data['data'])
73+
except Exception as e:
74+
logger.error(f"Failed to concatenate data: {e}")
75+
return None
18476

77+
try:
78+
ds = ds[ds.Type.str.contains(featureType, regex=True, flags=re.IGNORECASE)]
79+
except Exception as e:
80+
logger.warning(f"Feature filtering failed: {e}")
18581

82+
if not sil:
83+
ds = ds[~ds.cpd.str.contains('^SIL')]
18684

187-
# #
188-
# # #
189-
# # # #
190-
# fh= '/Users/TKimhofer/Downloads/RE__Trp_data_from_PAT01/Barwon_adult_Plate9.TXT'
191-
# import pandas as pd
192-
# pd.read_csv(fh, encoding='latin1')
193-
# import base64
194-
# with open(fh, 'rb') as file:
195-
# dat = file.read()
196-
# # import pickle
197-
# # df=pickle.load(open('/Users/TKimhofer/Downloads/RE__Trp_data_from_PAT01/Barwon_adult.p', 'rb'))
198-
# # ds=df
199-
# # # #
200-
# # # content=base64.b64encode(dat)
201-
# # # # content=base64.decodebytes(r)
202-
# # [x[2] for x in tf.index]
203-
# # [x[2] for x in tf.index if 'Cal' in x]
204-
# # df['Sample Text'].
85+
tf = ds.pivot_table(index=['path', 'Name'], columns='cpd', values=varType)
86+
tf = tf.astype(float)
87+
return tf.reset_index()

0 commit comments

Comments
 (0)