Skip to content

Commit 947ef2d

Browse files
authored
Merge pull request #9 from ufosoftwarellc/main
import gcms qp file | needs testing
2 parents 49a356b + dd6aa82 commit 947ef2d

2 files changed

Lines changed: 33 additions & 2 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,4 @@ dmypy.json
127127

128128
# Pyre type checker
129129
.pyre/
130+
cannlytics/lims/.DS_Store

cannlytics/lims/instruments/import_data.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,18 @@ def movecol(df, cols_to_move=[], ref_col='', place='After'):
3030
# helper function to cleanup column names
3131
# this should be revisited to make a cleaner more general solution
3232
def clean_col_names(df):
33+
3334
df.columns = df.columns.str.strip()
3435
df.columns = df.columns.str.rstrip('.)]')
3536
df.columns = df.columns.str.replace('%', 'percent', regex=True)
3637
df.columns = df.columns.str.replace('#', 'number', regex=True)
3738
df.columns = df.columns.str.replace('[/,-]', '_', regex=True)
3839
df.columns = df.columns.str.replace('[.,(,)]', '', regex=True)
39-
df.columns = df.columns.str.replace(r"\'",'', regex=True)
40-
df.columns = df.columns.str.replace(r"[[]", '_', regex=True)
40+
df.columns = df.columns.str.replace("\'",'', regex=True)
41+
df.columns = df.columns.str.replace("[[]", '_', regex=True)
4142
df.columns = df.columns.str.replace(r"[]]", '', regex=True)
4243
df.columns = df.columns.str.replace(' ', '_', regex=True)
44+
df.columns = df.columns.str.lower()
4345

4446
return df
4547

@@ -128,3 +130,31 @@ def import_nexera(file_name):
128130

129131
return combined_df
130132

133+
def import_gcms_qp(file_name):
134+
135+
import pandas as pd
136+
137+
# read in the data and the header data
138+
qp_df = pd.read_csv(file_name, sep=' ', header = 6, engine ='python')
139+
#clean the column names
140+
qp_df = clean_col_names(qp_df)
141+
142+
# read in the info from the header
143+
header_df = pd.read_csv(file_name, nrows=5)
144+
145+
# get the number of ids
146+
n_ids = header_df.iloc[4,0].split()[3]
147+
148+
# drop the rows without an id
149+
qp_df.drop(index = [*range(int(n_ids), len(qp_df))], inplace = True)
150+
151+
# get the file name and add it to the dataframe
152+
qp_df['file_name'] = header_df.iloc[0,0].split()[3]
153+
154+
# get the output date and add it to the dataframe
155+
qp_df['date'] = header_df.iloc[1,0].split()[2]
156+
157+
# get the output time and add it to the dataframe
158+
qp_df['time'] = header_df.iloc[2,0].split()[2]
159+
160+
return qp_df

0 commit comments

Comments
 (0)