1717
1818TEMPLATE_JSON = resource_filename (__name__ , 'velocity_hourly_timeseries_template.json' )
1919QC_FLAG_MAX = 2
20+ TIME_UNITS = "days since 1950-01-01 00:00:00 UTC"
21+ TIME_CALENDAR = "gregorian"
22+ TIME_EPOCH = np .datetime64 ("1950-01-01T00:00:00" )
23+ ONE_DAY = np .timedelta64 (1 , 'D' )
2024
21- def cell_velocity_resample (df , binning_function , is_WCUR ):
25+
26+ def cell_velocity_resample (df , binning_function ):
2227 """
2328 Resample a dataset to a specific time_interval.
2429 if WCUR not present, returns nan
2530 :param df: grouped dataframe
2631 :param binning_function: name of standard numpy function used for binning
27- :param is_WCUR: True if WCUR is present in nc, False otherwise
2832 :return: binned U, v, W CUR according to the binning function
2933 """
3034 df_binned = df .apply (binning_function )
3135 UCUR = np .array (df_binned ['UCUR' ])
3236 VCUR = np .array (df_binned ['VCUR' ])
33- if is_WCUR :
37+ if 'WCUR' in df_binned :
3438 WCUR = np .array (df_binned ['WCUR' ])
3539 else :
3640 WCUR = np .full (len (df ), np .nan )
@@ -39,42 +43,40 @@ def cell_velocity_resample(df, binning_function, is_WCUR):
3943 return UCUR , VCUR , WCUR , DEPTH
4044
4145
42- def get_resampled_values (nc_cell , ds , slice_start , varlist , binning_function , epoch , one_day , is_WCUR ):
46+ def append_resampled_values (nc_cell , ds , slice_start , binning_functions ):
4347 """
44- get U, V, W current values resampled
45- :param nc_cell: xarray DATASET
46- :param ds: netcdf4 dataset
48+ Resample U, V, W current and depth values from a single ADCP cell into hourly bins, and
49+ append the mean values to the corresponding variables in the output dataset (starting at
50+ index slice_start), along with additional statistical variables specified by binning_functions.
51+ :param nc_cell: input xarray Dataset representing a single ADCP cell (or point time series)
52+ :param ds: output netcdf4 Dataset to update with resampled values
4753 :param slice_start: start index of the slice
48- :param varlist: list of variable names to subset the dataset
49- :param binning_function: list of numpy function names for binning
50- :param one_day: timedelta one day
51- :param epoch: base epoch
52- :param is_WCUR: flag indicating if WCUR is present
54+ :param binning_functions: list of numpy function names for binning
5355 :return: end index of the slice
5456 """
55- df_cell = nc_cell [ varlist ] .squeeze ().to_dataframe ()
56- ## back the index 30min
57+ df_cell = nc_cell .squeeze ().to_dataframe ()
58+ # shift the index forward 30min to centre the bins on the hour
5759 df_cell .index = df_cell .index + pd .Timedelta (minutes = 30 )
5860 # TODO: shift timestamps to centre of sampling interval
5961
6062 df_cell_1H = df_cell .resample ('1H' )
6163 slice_end = len (df_cell_1H ) + slice_start
6264
63- ## move time it forward and get it
64- time_slice = (np .fromiter (df_cell_1H .groups .keys (), dtype = 'M8[ns]' ) - epoch ) / one_day
65+ # set binned timestamps
66+ time_slice = (np .fromiter (df_cell_1H .groups .keys (), dtype = 'M8[ns]' ) - TIME_EPOCH ) / ONE_DAY
6567 ds ['TIME' ][slice_start :slice_end ] = time_slice
6668
6769 # take the mean of the variables
6870 ds ['UCUR' ][slice_start :slice_end ], \
6971 ds ['VCUR' ][slice_start :slice_end ], \
7072 ds ['WCUR' ][slice_start :slice_end ], \
71- ds ['DEPTH' ][slice_start :slice_end ] = cell_velocity_resample (df_cell_1H , 'mean' , is_WCUR )
73+ ds ['DEPTH' ][slice_start :slice_end ] = cell_velocity_resample (df_cell_1H , 'mean' )
7274
73- for method in binning_function :
75+ for method in binning_functions :
7476 ds ['UCUR_' + method ][slice_start :slice_end ], \
7577 ds ['VCUR_' + method ][slice_start :slice_end ], \
7678 ds ['WCUR_' + method ][slice_start :slice_end ], \
77- ds ['DEPTH_' + method ][slice_start :slice_end ] = cell_velocity_resample (df_cell_1H , method , is_WCUR )
79+ ds ['DEPTH_' + method ][slice_start :slice_end ] = cell_velocity_resample (df_cell_1H , method )
7880
7981 return slice_end
8082
@@ -98,11 +100,6 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir
98100
99101 varlist = ['UCUR' , 'VCUR' , 'WCUR' , 'DEPTH' ]
100102 binning_fun = ['max' , 'min' , 'std' , 'count' ]
101-
102- time_units = "days since 1950-01-01 00:00:00 UTC"
103- time_calendar = "gregorian"
104- epoch = np .datetime64 ("1950-01-01T00:00:00" )
105- one_day = np .timedelta64 (1 , 'D' )
106103
107104 bad_files = {}
108105
@@ -188,7 +185,6 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir
188185 with xr .open_dataset (os .path .join (input_dir , file )) as nc :
189186
190187 is_2D = 'HEIGHT_ABOVE_SENSOR' in list (nc .variables )
191- is_WCUR = 'WCUR' in list (nc .data_vars )
192188
193189 ## mask values with QC flag>2
194190 for var in varlist :
@@ -213,14 +209,12 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir
213209 nc_cell = nc_chunk .sel (HEIGHT_ABOVE_SENSOR = cell_height )
214210 ## convert to absolute DEPTH
215211 nc_cell ['DEPTH' ] = nc_cell ['DEPTH' ] - cell_height
216- slice_end = get_resampled_values (nc_cell , ds , slice_start , varlist , binning_fun ,
217- epoch , one_day , is_WCUR )
212+ slice_end = append_resampled_values (nc_cell [varlist ], ds , slice_start , binning_fun )
218213 CELL_INDEX [slice_start :slice_end ] = np .full (slice_end - slice_start , cell_idx , dtype = np .uint32 )
219214
220215 slice_start = slice_end
221216 else :
222- slice_end = get_resampled_values (nc_chunk , ds , slice_start , varlist , binning_fun ,
223- epoch , one_day , is_WCUR )
217+ slice_end = append_resampled_values (nc_chunk [varlist ], ds , slice_start , binning_fun )
224218 CELL_INDEX [slice_start :slice_end ] = np .full (slice_end - slice_start , 0 , dtype = np .uint32 )
225219
226220 slice_start = slice_end
@@ -260,10 +254,10 @@ def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir
260254 timeformat = '%Y-%m-%dT%H:%M:%SZ'
261255 file_timeformat = '%Y%m%d'
262256
263- time_start = num2date (np .min (TIME [:]), time_units , time_calendar ).strftime (timeformat )
264- time_end = num2date (np .max (TIME [:]), time_units , time_calendar ).strftime (timeformat )
265- time_start_filename = num2date (np .min (TIME [:]), time_units , time_calendar ).strftime (file_timeformat )
266- time_end_filename = num2date (np .max (TIME [:]), time_units , time_calendar ).strftime (file_timeformat )
257+ time_start = num2date (np .min (TIME [:]), TIME_UNITS , TIME_CALENDAR ).strftime (timeformat )
258+ time_end = num2date (np .max (TIME [:]), TIME_UNITS , TIME_CALENDAR ).strftime (timeformat )
259+ time_start_filename = num2date (np .min (TIME [:]), TIME_UNITS , TIME_CALENDAR ).strftime (file_timeformat )
260+ time_end_filename = num2date (np .max (TIME [:]), TIME_UNITS , TIME_CALENDAR ).strftime (file_timeformat )
267261
268262
269263 contributor_name , contributor_email , contributor_role = utils .get_contributors (files_to_agg = files_to_agg , input_dir = input_dir )
0 commit comments