66from scdata .device .process import clean_ts , baseline_als
77from scipy .stats import linregress
88import matplotlib .pyplot as plt
9- from pandas import date_range , DataFrame , Series , isnull
9+ from pandas import date_range , DataFrame , Series , isnull , Timedelta
1010from scdata .device .process .error_codes import StatusCode , ProcessResult
1111
1212def alphasense_803_04 (dataframe , ** kwargs ):
@@ -27,8 +27,8 @@ def alphasense_803_04(dataframe, **kwargs):
2727 use_alternative: boolean
2828 Default false
2929 Use alternative algorithm as shown in the AAN
30- resample_frequency : string
31- Resample we and ae electrodes with certain frequency
30+ rolling_frequency : string
31+ Rolling average frequency for we and ae electrodes
3232 return_all_cols: bool
3333 Return all columns intermediate to the calculation
3434 Returns
@@ -74,7 +74,7 @@ def comp_t(x, comp_lut):
7474 logger .error (f"Sensor { kwargs ['alphasense_id' ]} not in calibration data" )
7575 return ProcessResult (None , StatusCode .ERROR_CALIBRATION_NOT_FOUND )
7676
77- resample_frequency = kwargs .get ('resample_frequency ' , None )
77+ rolling_frequency = kwargs .get ('rolling_frequency ' , None )
7878 return_all_cols = kwargs .get ('return_all_cols' , False )
7979
8080 # Make copy
@@ -121,7 +121,7 @@ def comp_t(x, comp_lut):
121121 for electrode in ['we' , 'ae' ]:
122122 subkwargs = {'name' : kwargs [electrode ],
123123 'limits' : (0 , 5 ), # In V
124- 'window_size ' : None
124+ 'window ' : None
125125 }
126126
127127 cleaning_result = clean_ts (df , ** subkwargs )
@@ -137,10 +137,9 @@ def comp_t(x, comp_lut):
137137 df ['t' ] = df [kwargs ['t' ]]
138138
139139 # Resample if requested
140- if resample_frequency is not None :
141- df = df [~ df .index .duplicated (keep = 'first' )]
142- df ['we_t' ] = df ['we_t' ].rolling (resample_frequency ).mean ()
143- df ['ae_t' ] = df ['ae_t' ].rolling (resample_frequency ).mean ()
140+ if rolling_frequency is not None :
141+ df ['we_t' ] = df ['we_t' ].rolling (rolling_frequency ).mean ()
142+ df ['ae_t' ] = df ['ae_t' ].rolling (rolling_frequency ).mean ()
144143
145144 # Temperature compensation - done line by line as it has special conditions
146145 df [comp_type ] = df .apply (lambda x : comp_t (x , comp_lut ), axis = 1 ) # temperature correction factor
@@ -182,13 +181,11 @@ def comp_t(x, comp_lut):
182181
183182 result = df [cols ].copy ()
184183
185- rename_cols = {
186- "we_clean" : f"803_we_clean" ,
187- "ae_clean" : f"803_ae_clean" ,
188- "we_t" : f"803_we_t" ,
189- "ae_t" : f"803_ae_t" ,
190- "we_c" : f"803_we_c"
191- }
184+ rename_cols = {}
185+ for col in cols :
186+ if col == 'conc' : continue
187+ rename_cols [col ] = f'803_{ col } '
188+
192189 result .rename (columns = rename_cols , inplace = True )
193190 else :
194191 result = df ['conc' ].copy ()
@@ -217,7 +214,14 @@ def alphasense_als(dataframe, **kwargs):
217214 background_conc: bool
218215 Offset a background concentration on resulting value
219216 resample_frequency: string
220- Resample we and ae electrodes with certain frequency
217+ Resample we for gap detection with certain frequency
218+ n_gaps: int
219+ Number of rows that need to be NaN to be considered a gap
220+ Default: 5
221+ gap_window_removal_h: int
222+ Number of hours to remove data after a gap is found for baseline calculation
223+ rolling_frequency: string
224+ Roll average for we and ae electrodes with certain frequency
221225 return_all_cols: bool
222226 Return all columns intermediate to the calculation
223227 Returns
@@ -246,19 +250,26 @@ def alphasense_als(dataframe, **kwargs):
246250 return ProcessResult (None , StatusCode .ERROR_CALIBRATION_NOT_FOUND )
247251
248252 # Parse options
249- clip_negative_conc = kwargs .get ('clip_negative_conc' , _default_clip_negative_conc )
250- offset_negative_conc = kwargs .get ('offset_negative_conc' , _default_offset_negative_conc )
253+ clip_negative_conc = kwargs .get ('clip_negative_conc' , default_clip_negative_conc )
254+ offset_negative_conc = kwargs .get ('offset_negative_conc' , default_offset_negative_conc )
251255 background_conc = kwargs .get ('background_conc' , 0 )
252256
253257 lam = kwargs .get ('lam' , None )
254258 p = kwargs .get ('p' , None )
255259
256- resample_frequency = kwargs .get ('resample_frequency' , None )
257260 return_all_cols = kwargs .get ('return_all_cols' , False )
258261
259262 # Algorithm selection
260263 algorithm = kwargs .get ('algorithm' , 1 )
261264
265+ # Gaps cleaning
266+ n_gaps = kwargs .get ('n_gaps' , None )
267+ resample_frequency = kwargs .get ('resample_frequency' , None )
268+ gap_window_removal_h = kwargs .get ('gap_window_removal_h' , 6 )
269+
270+ # Rolling
271+ rolling_frequency = kwargs .get ('rolling_frequency' , None )
272+
262273 # Make copy
263274 df = dataframe .copy ()
264275
@@ -278,12 +289,12 @@ def alphasense_als(dataframe, **kwargs):
278289
279290 # Remove spurious voltages (0V < electrode < 5V)
280291 for electrode in ['we' , 'ae' ]:
281- subkwargs = {'name' : kwargs [electrode ],
292+ clean_ts_kwargs = {'name' : kwargs [electrode ],
282293 'limits' : (0 , 5 ), # In V
283- 'window_size ' : None
294+ 'window ' : None
284295 }
285296
286- cleaning_result = clean_ts (df , ** subkwargs )
297+ cleaning_result = clean_ts (df , ** clean_ts_kwargs )
287298 if 'SUCCESS' in cleaning_result .status_code .name :
288299 df [f'{ electrode } _clean' ] = cleaning_result .data
289300 else :
@@ -293,21 +304,55 @@ def alphasense_als(dataframe, **kwargs):
293304 df ['we_t' ] = df ['we_clean' ] - (cal_data ['we_electronic_zero_mv' ] / 1000 ) # in V
294305 df ['ae_t' ] = df ['ae_clean' ] - (cal_data ['ae_electronic_zero_mv' ] / 1000 ) # in V
295306
296- # Resample if requested
297- if resample_frequency is not None :
298- df = df [~ df .index .duplicated (keep = 'first' )]
299- df ['we_t' ] = df ['we_t' ].rolling (resample_frequency ).mean ()
300- df ['ae_t' ] = df ['ae_t' ].rolling (resample_frequency ).mean ()
307+ # Roll if requested
308+ if rolling_frequency is not None :
309+ logger .info (f'Rolling on sensor data { rolling_frequency } ' )
310+ df ['we_t' ] = df ['we_t' ].rolling (rolling_frequency ).mean ()
311+ df ['ae_t' ] = df ['ae_t' ].rolling (rolling_frequency ).mean ()
312+ else :
313+ logger .warning ('Not doing rolling on sensor data' )
314+
315+ # Find gaps
316+ found_gaps = False
317+ baseline_als_kwargs = {'name' : 'we_t' }
318+ if n_gaps and resample_frequency and gap_window_removal_h is not None :
319+ logger .info (f'Searching gaps of at least { n_gaps } ' )
320+ # logger.info(f'Resampling at {resample_frequency}')
321+ # df = df.resample(resample_frequency).mean() # This is important, otherwise we can't find gaps
322+ # df['we_na'] = df['we_t'].isna()
323+ # for i in range(n_gaps):
324+ # df[f'we_na_shift_{i+1}'] = df['we_na'].shift(i+1)
325+
326+ # # We consider a gap only when there is at least "n_gaps" consecutive gaps
327+ # cols = [c for c in df.columns if c.startswith("we_na_shift_")]
328+ # # Get a falling edge afte a n_gaps-long gap
329+ # df['we_gap_falling_edge'] = df[cols].all(axis=1) & (~df["we_na"])
330+
331+ df ['time' ] = df .index
332+ df ['time_lag' ] = df ['time' ].shift (1 )
333+ df ['time_delta' ] = df ['time' ] - df ['time_lag' ]
334+ df ['gap' ] = df ['time_delta' ] > Timedelta (minutes = n_gaps )
335+ df ['we_mask' ] = False
336+ # true_idx = df.index[df["we_gap_falling_edge"]]
337+ true_idx = df .index [df ["gap" ]]
338+
339+ for t in true_idx :
340+ end = t + Timedelta (hours = gap_window_removal_h )
341+ df .loc [t :end , "we_mask" ] = True
342+
343+ df ['we_t_filter' ] = df ['we_t' ][~ df ['we_mask' ]]
344+ # Replace baseline kwargs
345+ baseline_als_kwargs = {'name' : 'we_t_filter' }
346+ found_gaps = True
301347
302348 # Get requested temperature
303349 df ['t' ] = df [kwargs ['t' ]]
304350
305351 # Calculate WE baseline
306- subkwargs = {'name' : 'we_t' }
307- if lam is not None : subkwargs ['lam' ] = lam
308- if p is not None : subkwargs ['p' ] = p
352+ if lam is not None : baseline_als_kwargs ['lam' ] = lam
353+ if p is not None : baseline_als_kwargs ['p' ] = p
309354
310- baseline_result = baseline_als (df , ** subkwargs )
355+ baseline_result = baseline_als (df , ** baseline_als_kwargs )
311356 if 'SUCCESS' in baseline_result .status_code .name :
312357 df [f'we_baseline' ] = baseline_result .data
313358 else :
@@ -323,6 +368,8 @@ def alphasense_als(dataframe, **kwargs):
323368 # Correct Auxiliary electrode based on mean factor
324369 df ['ae_cor' ] = df ['baseline_t_mean' ] * df ['ae_t' ]
325370 df ['we_c' ] = df ['we_t' ] - df ['ae_cor' ]
371+ if found_gaps :
372+ df ['we_c' ] = df ['we_c' ][~ df ['we_mask' ]]
326373 elif algorithm == 2 : # +/-
327374 # Calculate baseline factor with the baseline mean
328375 mean_we = df ['we_baseline' ].quantile (0.05 )
@@ -333,6 +380,8 @@ def alphasense_als(dataframe, **kwargs):
333380 # Correct Auxiliary electrode based on mean factor
334381 df ['ae_cor' ] = df ['ae_t' ] + mean_we
335382 df ['we_c' ] = df ['we_t' ] - df ['ae_cor' ]
383+ if found_gaps :
384+ df ['we_c' ] = df ['we_c' ][~ df ['we_mask' ]]
336385
337386 # Verify if it has NO2 cross-sensitivity (in V)
338387 if cal_data ['we_cross_sensitivity_no2_mv_ppb' ] != float (0 ) and 'NO2' not in as_type :
@@ -357,8 +406,12 @@ def alphasense_als(dataframe, **kwargs):
357406 if return_all_cols :
358407 cols = [
359408 "we_clean" ,
409+ "we_na" ,
410+ "we_mask" ,
411+ "we_gap_falling_edge" ,
360412 "ae_clean" ,
361413 "we_t" ,
414+ "we_t_filter" ,
362415 "ae_t" ,
363416 "we_baseline" ,
364417 "baseline_t" ,
@@ -368,19 +421,15 @@ def alphasense_als(dataframe, **kwargs):
368421 "conc" # Reserved for metric name
369422 ]
370423
371- result = df [cols ].copy ()
424+ # Return those columns that are present in the df
425+ result = df [df .columns .intersection (cols )].copy ()
426+ # result = df[cols].copy()
427+
428+ rename_cols = {}
429+ for col in cols :
430+ if col == 'conc' : continue
431+ rename_cols [col ] = f'als_{ col } '
372432
373- rename_cols = {
374- "we_clean" : f"als_we_clean" ,
375- "ae_clean" : f"als_ae_clean" ,
376- "we_t" : f"als_we_t" ,
377- "ae_t" : f"als_ae_t" ,
378- "we_baseline" : f"als_we_baseline" ,
379- "baseline_t" : f"als_baseline_t" ,
380- "baseline_t_mean" : f"als_baseline_t_mean" ,
381- "ae_cor" : f"als_ae_cor" ,
382- "we_c" : f"als_we_c"
383- }
384433 result .rename (columns = rename_cols , inplace = True )
385434
386435 else :
0 commit comments