Skip to content

Commit ab6c2be

Browse files
committed
Add gap detection on als algorithm
1 parent f04bb92 commit ab6c2be

1 file changed

Lines changed: 93 additions & 44 deletions

File tree

scdata/device/process/alphasense.py

Lines changed: 93 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from scdata.device.process import clean_ts, baseline_als
77
from scipy.stats import linregress
88
import matplotlib.pyplot as plt
9-
from pandas import date_range, DataFrame, Series, isnull
9+
from pandas import date_range, DataFrame, Series, isnull, Timedelta
1010
from scdata.device.process.error_codes import StatusCode, ProcessResult
1111

1212
def alphasense_803_04(dataframe, **kwargs):
@@ -27,8 +27,8 @@ def alphasense_803_04(dataframe, **kwargs):
2727
use_alternative: boolean
2828
Default false
2929
Use alternative algorithm as shown in the AAN
30-
resample_frequency: string
31-
Resample we and ae electrodes with certain frequency
30+
rolling_frequency: string
31+
Rolling average frequency for we and ae electrodes
3232
return_all_cols: bool
3333
Return all columns intermediate to the calculation
3434
Returns
@@ -74,7 +74,7 @@ def comp_t(x, comp_lut):
7474
logger.error(f"Sensor {kwargs['alphasense_id']} not in calibration data")
7575
return ProcessResult(None, StatusCode.ERROR_CALIBRATION_NOT_FOUND)
7676

77-
resample_frequency = kwargs.get('resample_frequency', None)
77+
rolling_frequency = kwargs.get('rolling_frequency', None)
7878
return_all_cols = kwargs.get('return_all_cols', False)
7979

8080
# Make copy
@@ -121,7 +121,7 @@ def comp_t(x, comp_lut):
121121
for electrode in ['we', 'ae']:
122122
subkwargs = {'name': kwargs[electrode],
123123
'limits': (0, 5), # In V
124-
'window_size': None
124+
'window': None
125125
}
126126

127127
cleaning_result = clean_ts(df, **subkwargs)
@@ -137,10 +137,9 @@ def comp_t(x, comp_lut):
137137
df['t'] = df[kwargs['t']]
138138

139139
# Resample if requested
140-
if resample_frequency is not None:
141-
df = df[~df.index.duplicated(keep='first')]
142-
df['we_t'] = df['we_t'].rolling(resample_frequency).mean()
143-
df['ae_t'] = df['ae_t'].rolling(resample_frequency).mean()
140+
if rolling_frequency is not None:
141+
df['we_t'] = df['we_t'].rolling(rolling_frequency).mean()
142+
df['ae_t'] = df['ae_t'].rolling(rolling_frequency).mean()
144143

145144
# Temperature compensation - done line by line as it has special conditions
146145
df[comp_type] = df.apply(lambda x: comp_t(x, comp_lut), axis = 1) # temperature correction factor
@@ -182,13 +181,11 @@ def comp_t(x, comp_lut):
182181

183182
result = df[cols].copy()
184183

185-
rename_cols = {
186-
"we_clean": f"803_we_clean",
187-
"ae_clean": f"803_ae_clean",
188-
"we_t": f"803_we_t",
189-
"ae_t": f"803_ae_t",
190-
"we_c": f"803_we_c"
191-
}
184+
rename_cols = {}
185+
for col in cols:
186+
if col == 'conc': continue
187+
rename_cols[col] = f'803_{col}'
188+
192189
result.rename(columns=rename_cols, inplace=True)
193190
else:
194191
result = df['conc'].copy()
@@ -217,7 +214,14 @@ def alphasense_als(dataframe, **kwargs):
217214
background_conc: bool
218215
Offset a background concentration on resulting value
219216
resample_frequency: string
220-
Resample we and ae electrodes with certain frequency
217+
Resample we for gap detection with certain frequency
218+
n_gaps: int
219+
Number of rows that need to be NaN to be considered a gap
220+
Default: 5
221+
gap_window_removal_h: int
222+
Number of hours to remove data after a gap is found for baseline calculation
223+
rolling_frequency: string
224+
Roll average for we and ae electrodes with certain frequency
221225
return_all_cols: bool
222226
Return all columns intermediate to the calculation
223227
Returns
@@ -246,19 +250,26 @@ def alphasense_als(dataframe, **kwargs):
246250
return ProcessResult(None, StatusCode.ERROR_CALIBRATION_NOT_FOUND)
247251

248252
# Parse options
249-
clip_negative_conc = kwargs.get('clip_negative_conc', _default_clip_negative_conc)
250-
offset_negative_conc = kwargs.get('offset_negative_conc', _default_offset_negative_conc)
253+
clip_negative_conc = kwargs.get('clip_negative_conc', default_clip_negative_conc)
254+
offset_negative_conc = kwargs.get('offset_negative_conc', default_offset_negative_conc)
251255
background_conc = kwargs.get('background_conc', 0)
252256

253257
lam = kwargs.get('lam', None)
254258
p = kwargs.get('p', None)
255259

256-
resample_frequency = kwargs.get('resample_frequency', None)
257260
return_all_cols = kwargs.get('return_all_cols', False)
258261

259262
# Algorithm selection
260263
algorithm = kwargs.get('algorithm', 1)
261264

265+
# Gaps cleaning
266+
n_gaps = kwargs.get('n_gaps', None)
267+
resample_frequency = kwargs.get('resample_frequency', None)
268+
gap_window_removal_h = kwargs.get('gap_window_removal_h', 6)
269+
270+
# Rolling
271+
rolling_frequency = kwargs.get('rolling_frequency', None)
272+
262273
# Make copy
263274
df = dataframe.copy()
264275

@@ -278,12 +289,12 @@ def alphasense_als(dataframe, **kwargs):
278289

279290
# Remove spurious voltages (0V < electrode < 5V)
280291
for electrode in ['we', 'ae']:
281-
subkwargs = {'name': kwargs[electrode],
292+
clean_ts_kwargs = {'name': kwargs[electrode],
282293
'limits': (0, 5), # In V
283-
'window_size': None
294+
'window': None
284295
}
285296

286-
cleaning_result = clean_ts(df, **subkwargs)
297+
cleaning_result = clean_ts(df, **clean_ts_kwargs)
287298
if 'SUCCESS' in cleaning_result.status_code.name:
288299
df[f'{electrode}_clean'] = cleaning_result.data
289300
else:
@@ -293,21 +304,55 @@ def alphasense_als(dataframe, **kwargs):
293304
df['we_t'] = df['we_clean'] - (cal_data['we_electronic_zero_mv'] / 1000) # in V
294305
df['ae_t'] = df['ae_clean'] - (cal_data['ae_electronic_zero_mv'] / 1000) # in V
295306

296-
# Resample if requested
297-
if resample_frequency is not None:
298-
df = df[~df.index.duplicated(keep='first')]
299-
df['we_t'] = df['we_t'].rolling(resample_frequency).mean()
300-
df['ae_t'] = df['ae_t'].rolling(resample_frequency).mean()
307+
# Roll if requested
308+
if rolling_frequency is not None:
309+
logger.info(f'Rolling on sensor data {rolling_frequency}')
310+
df['we_t'] = df['we_t'].rolling(rolling_frequency).mean()
311+
df['ae_t'] = df['ae_t'].rolling(rolling_frequency).mean()
312+
else:
313+
logger.warning('Not doing rolling on sensor data')
314+
315+
# Find gaps
316+
found_gaps = False
317+
baseline_als_kwargs = {'name': 'we_t'}
318+
if n_gaps and resample_frequency and gap_window_removal_h is not None:
319+
logger.info(f'Searching gaps of at least {n_gaps}')
320+
# logger.info(f'Resampling at {resample_frequency}')
321+
# df = df.resample(resample_frequency).mean() # This is important, otherwise we can't find gaps
322+
# df['we_na'] = df['we_t'].isna()
323+
# for i in range(n_gaps):
324+
# df[f'we_na_shift_{i+1}'] = df['we_na'].shift(i+1)
325+
326+
# # We consider a gap only when there is at least "n_gaps" consecutive gaps
327+
# cols = [c for c in df.columns if c.startswith("we_na_shift_")]
328+
# # Get a falling edge afte a n_gaps-long gap
329+
# df['we_gap_falling_edge'] = df[cols].all(axis=1) & (~df["we_na"])
330+
331+
df['time'] = df.index
332+
df['time_lag'] = df['time'].shift(1)
333+
df['time_delta'] = df['time'] - df['time_lag']
334+
df['gap'] = df['time_delta'] > Timedelta(minutes=n_gaps)
335+
df['we_mask'] = False
336+
# true_idx = df.index[df["we_gap_falling_edge"]]
337+
true_idx = df.index[df["gap"]]
338+
339+
for t in true_idx:
340+
end = t + Timedelta(hours=gap_window_removal_h)
341+
df.loc[t:end, "we_mask"] = True
342+
343+
df['we_t_filter'] = df['we_t'][~df['we_mask']]
344+
# Replace baseline kwargs
345+
baseline_als_kwargs = {'name': 'we_t_filter'}
346+
found_gaps = True
301347

302348
# Get requested temperature
303349
df['t'] = df[kwargs['t']]
304350

305351
# Calculate WE baseline
306-
subkwargs = {'name': 'we_t'}
307-
if lam is not None: subkwargs['lam'] = lam
308-
if p is not None: subkwargs['p'] = p
352+
if lam is not None: baseline_als_kwargs['lam'] = lam
353+
if p is not None: baseline_als_kwargs['p'] = p
309354

310-
baseline_result = baseline_als(df, **subkwargs)
355+
baseline_result = baseline_als(df, **baseline_als_kwargs)
311356
if 'SUCCESS' in baseline_result.status_code.name:
312357
df[f'we_baseline'] = baseline_result.data
313358
else:
@@ -323,6 +368,8 @@ def alphasense_als(dataframe, **kwargs):
323368
# Correct Auxiliary electrode based on mean factor
324369
df['ae_cor'] = df['baseline_t_mean'] * df['ae_t']
325370
df['we_c'] = df['we_t'] - df['ae_cor']
371+
if found_gaps:
372+
df['we_c'] = df['we_c'][~df['we_mask']]
326373
elif algorithm == 2: # +/-
327374
# Calculate baseline factor with the baseline mean
328375
mean_we = df['we_baseline'].quantile(0.05)
@@ -333,6 +380,8 @@ def alphasense_als(dataframe, **kwargs):
333380
# Correct Auxiliary electrode based on mean factor
334381
df['ae_cor'] = df['ae_t'] + mean_we
335382
df['we_c'] = df['we_t'] - df['ae_cor']
383+
if found_gaps:
384+
df['we_c'] = df['we_c'][~df['we_mask']]
336385

337386
# Verify if it has NO2 cross-sensitivity (in V)
338387
if cal_data['we_cross_sensitivity_no2_mv_ppb'] != float (0) and 'NO2' not in as_type:
@@ -357,8 +406,12 @@ def alphasense_als(dataframe, **kwargs):
357406
if return_all_cols:
358407
cols = [
359408
"we_clean",
409+
"we_na",
410+
"we_mask",
411+
"we_gap_falling_edge",
360412
"ae_clean",
361413
"we_t",
414+
"we_t_filter",
362415
"ae_t",
363416
"we_baseline",
364417
"baseline_t",
@@ -368,19 +421,15 @@ def alphasense_als(dataframe, **kwargs):
368421
"conc" # Reserved for metric name
369422
]
370423

371-
result = df[cols].copy()
424+
# Return those columns that are present in the df
425+
result = df[df.columns.intersection(cols)].copy()
426+
# result = df[cols].copy()
427+
428+
rename_cols = {}
429+
for col in cols:
430+
if col == 'conc': continue
431+
rename_cols[col] = f'als_{col}'
372432

373-
rename_cols = {
374-
"we_clean": f"als_we_clean",
375-
"ae_clean": f"als_ae_clean",
376-
"we_t": f"als_we_t",
377-
"ae_t": f"als_ae_t",
378-
"we_baseline": f"als_we_baseline",
379-
"baseline_t": f"als_baseline_t",
380-
"baseline_t_mean": f"als_baseline_t_mean",
381-
"ae_cor": f"als_ae_cor",
382-
"we_c": f"als_we_c"
383-
}
384433
result.rename(columns=rename_cols, inplace=True)
385434

386435
else:

0 commit comments

Comments
 (0)