@@ -30,16 +30,18 @@ def movecol(df, cols_to_move=[], ref_col='', place='After'):
3030# helper function to cleanup column names
3131# this should be revisited to make a cleaner more general solution
3232def clean_col_names (df ):
33+
3334 df .columns = df .columns .str .strip ()
3435 df .columns = df .columns .str .rstrip ('.)]' )
3536 df .columns = df .columns .str .replace ('%' , 'percent' , regex = True )
3637 df .columns = df .columns .str .replace ('#' , 'number' , regex = True )
3738 df .columns = df .columns .str .replace ('[/,-]' , '_' , regex = True )
3839 df .columns = df .columns .str .replace ('[.,(,)]' , '' , regex = True )
39- df .columns = df .columns .str .replace (r "\'" ,'' , regex = True )
40- df .columns = df .columns .str .replace (r "[[]" , '_' , regex = True )
40+ df .columns = df .columns .str .replace ("\' " ,'' , regex = True )
41+ df .columns = df .columns .str .replace ("[[]" , '_' , regex = True )
4142 df .columns = df .columns .str .replace (r"[]]" , '' , regex = True )
4243 df .columns = df .columns .str .replace (' ' , '_' , regex = True )
44+ df .columns = df .columns .str .lower ()
4345
4446 return df
4547
@@ -128,3 +130,31 @@ def import_nexera(file_name):
128130
129131 return combined_df
130132
133+ def import_gcms_qp (file_name ):
134+
135+ import pandas as pd
136+
137+ # read in the data and the header data
138+ qp_df = pd .read_csv (file_name , sep = ' ' , header = 6 , engine = 'python' )
139+ #clean the column names
140+ qp_df = clean_col_names (qp_df )
141+
142+ # read in the info from the header
143+ header_df = pd .read_csv (file_name , nrows = 5 )
144+
145+ # get the number of ids
146+ n_ids = header_df .iloc [4 ,0 ].split ()[3 ]
147+
148+ # drop the rows without an id
149+ qp_df .drop (index = [* range (int (n_ids ), len (qp_df ))], inplace = True )
150+
151+ # get the file name and add it to the dataframe
152+ qp_df ['file_name' ] = header_df .iloc [0 ,0 ].split ()[3 ]
153+
154+ # get the output date and add it to the dataframe
155+ qp_df ['date' ] = header_df .iloc [1 ,0 ].split ()[2 ]
156+
157+ # get the output time and add it to the dataframe
158+ qp_df ['time' ] = header_df .iloc [2 ,0 ].split ()[2 ]
159+
160+ return qp_df
0 commit comments