Skip to content

Commit 2332594

Browse files
committed
rename and refactor get_resampled_values in response to review
1 parent 0de304e commit 2332594

1 file changed

Lines changed: 27 additions & 33 deletions

File tree

aodntools/timeseries_products/velocity_hourly_timeseries.py

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,24 @@
1717

1818
TEMPLATE_JSON = resource_filename(__name__, 'velocity_hourly_timeseries_template.json')
1919
QC_FLAG_MAX = 2
20+
TIME_UNITS = "days since 1950-01-01 00:00:00 UTC"
21+
TIME_CALENDAR = "gregorian"
22+
TIME_EPOCH = np.datetime64("1950-01-01T00:00:00")
23+
ONE_DAY = np.timedelta64(1, 'D')
2024

21-
def cell_velocity_resample(df, binning_function, is_WCUR):
25+
26+
def cell_velocity_resample(df, binning_function):
2227
"""
2328
Resample a dataset to a specific time_interval.
2429
if WCUR not present, returns nan
2530
:param df: grouped dataframe
2631
:param binning_function: name of standard numpy function used for binning
27-
:param is_WCUR: True if WCUR is present in nc, False otherwise
2832
:return: binned U, v, W CUR according to the binning function
2933
"""
3034
df_binned = df.apply(binning_function)
3135
UCUR = np.array(df_binned['UCUR'])
3236
VCUR = np.array(df_binned['VCUR'])
33-
if is_WCUR:
37+
if 'WCUR' in df_binned:
3438
WCUR = np.array(df_binned['WCUR'])
3539
else:
3640
WCUR = np.full(len(df), np.nan)
@@ -39,42 +43,40 @@ def cell_velocity_resample(df, binning_function, is_WCUR):
3943
return UCUR, VCUR, WCUR, DEPTH
4044

4145

42-
def get_resampled_values(nc_cell, ds, slice_start, varlist, binning_function, epoch, one_day, is_WCUR):
46+
def append_resampled_values(nc_cell, ds, slice_start, binning_functions):
4347
"""
44-
get U, V, W current values resampled
45-
:param nc_cell: xarray DATASET
46-
:param ds: netcdf4 dataset
48+
Resample U, V, W current and depth values from a single ADCP cell into hourly bins, and
49+
append the mean values to the corresponding variables in the output dataset (starting at
50+
index slice_start), along with additional statistical variables specified by binning_functions.
51+
:param nc_cell: input xarray Dataset representing a single ADCP cell (or point time series)
52+
:param ds: output netcdf4 Dataset to update with resampled values
4753
:param slice_start: start index of the slice
48-
:param varlist: list of variable names to subset the dataset
49-
:param binning_function: list of numpy function names for binning
50-
:param one_day: timedelta one day
51-
:param epoch: base epoch
52-
:param is_WCUR: flag indicating if WCUR is present
54+
:param binning_functions: list of numpy function names for binning
5355
:return: end index of the slice
5456
"""
55-
df_cell = nc_cell[varlist].squeeze().to_dataframe()
56-
## back the index 30min
57+
df_cell = nc_cell.squeeze().to_dataframe()
58+
# shift the index forward 30min to centre the bins on the hour
5759
df_cell.index = df_cell.index + pd.Timedelta(minutes=30)
5860
# TODO: shift timestamps to centre of sampling interval
5961

6062
df_cell_1H = df_cell.resample('1H')
6163
slice_end = len(df_cell_1H) + slice_start
6264

63-
## move time it forward and get it
64-
time_slice = (np.fromiter(df_cell_1H.groups.keys(), dtype='M8[ns]') - epoch) / one_day
65+
# set binned timestamps
66+
time_slice = (np.fromiter(df_cell_1H.groups.keys(), dtype='M8[ns]') - TIME_EPOCH) / ONE_DAY
6567
ds['TIME'][slice_start:slice_end] = time_slice
6668

6769
# take the mean of the variables
6870
ds['UCUR'][slice_start:slice_end], \
6971
ds['VCUR'][slice_start:slice_end], \
7072
ds['WCUR'][slice_start:slice_end], \
71-
ds['DEPTH'][slice_start:slice_end] = cell_velocity_resample(df_cell_1H, 'mean', is_WCUR)
73+
ds['DEPTH'][slice_start:slice_end] = cell_velocity_resample(df_cell_1H, 'mean')
7274

73-
for method in binning_function:
75+
for method in binning_functions:
7476
ds['UCUR_' + method][slice_start:slice_end], \
7577
ds['VCUR_' + method][slice_start:slice_end], \
7678
ds['WCUR_' + method][slice_start:slice_end], \
77-
ds['DEPTH_' + method][slice_start:slice_end] = cell_velocity_resample(df_cell_1H, method, is_WCUR)
79+
ds['DEPTH_' + method][slice_start:slice_end] = cell_velocity_resample(df_cell_1H, method)
7880

7981
return slice_end
8082

@@ -98,11 +100,6 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir
98100

99101
varlist = ['UCUR', 'VCUR', 'WCUR', 'DEPTH']
100102
binning_fun = ['max', 'min', 'std', 'count']
101-
102-
time_units="days since 1950-01-01 00:00:00 UTC"
103-
time_calendar="gregorian"
104-
epoch = np.datetime64("1950-01-01T00:00:00")
105-
one_day = np.timedelta64(1, 'D')
106103

107104
bad_files = {}
108105

@@ -188,7 +185,6 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir
188185
with xr.open_dataset(os.path.join(input_dir, file)) as nc:
189186

190187
is_2D = 'HEIGHT_ABOVE_SENSOR' in list(nc.variables)
191-
is_WCUR = 'WCUR' in list(nc.data_vars)
192188

193189
## mask values with QC flag>2
194190
for var in varlist:
@@ -213,14 +209,12 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir
213209
nc_cell = nc_chunk.sel(HEIGHT_ABOVE_SENSOR=cell_height)
214210
## convert to absolute DEPTH
215211
nc_cell['DEPTH'] = nc_cell['DEPTH'] - cell_height
216-
slice_end = get_resampled_values(nc_cell, ds, slice_start, varlist, binning_fun,
217-
epoch, one_day, is_WCUR)
212+
slice_end = append_resampled_values(nc_cell[varlist], ds, slice_start, binning_fun)
218213
CELL_INDEX[slice_start:slice_end] = np.full(slice_end - slice_start, cell_idx, dtype=np.uint32)
219214

220215
slice_start = slice_end
221216
else:
222-
slice_end = get_resampled_values(nc_chunk, ds, slice_start, varlist, binning_fun,
223-
epoch, one_day, is_WCUR)
217+
slice_end = append_resampled_values(nc_chunk[varlist], ds, slice_start, binning_fun)
224218
CELL_INDEX[slice_start:slice_end] = np.full(slice_end - slice_start, 0, dtype=np.uint32)
225219

226220
slice_start = slice_end
@@ -260,10 +254,10 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir
260254
timeformat = '%Y-%m-%dT%H:%M:%SZ'
261255
file_timeformat = '%Y%m%d'
262256

263-
time_start = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(timeformat)
264-
time_end = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(timeformat)
265-
time_start_filename = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(file_timeformat)
266-
time_end_filename = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(file_timeformat)
257+
time_start = num2date(np.min(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(timeformat)
258+
time_end = num2date(np.max(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(timeformat)
259+
time_start_filename = num2date(np.min(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(file_timeformat)
260+
time_end_filename = num2date(np.max(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(file_timeformat)
267261

268262

269263
contributor_name, contributor_email, contributor_role = utils.get_contributors(files_to_agg=files_to_agg, input_dir=input_dir)

0 commit comments

Comments
 (0)