Skip to content

Commit 35e0167

Browse files
Merge pull request #164 from aodn/version_1.5
Fix several open issues and improve unittests
2 parents af80337 + 87e8f72 commit 35e0167

20 files changed

Lines changed: 172 additions & 129 deletions

aodntools/timeseries_products/aggregated_timeseries.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
import os
66
import shutil
77
import tempfile
8-
from datetime import datetime
98

109
import numpy as np
1110
import xarray as xr
1211
from netCDF4 import Dataset, num2date, stringtochar
1312
from pkg_resources import resource_filename
1413

1514
from aodntools import __version__
16-
from aodntools.timeseries_products.common import NoInputFilesError, check_file, in_water
15+
from aodntools.timeseries_products.common import (NoInputFilesError, check_file, in_water, current_utc_timestamp,
16+
TIMESTAMP_FORMAT, DATESTAMP_FORMAT)
1717

1818
TEMPLATE_JSON = resource_filename(__name__, 'aggregated_timeseries_template.json')
1919

@@ -39,7 +39,7 @@ def get_variable_values(nc, variable):
3939
Get values of the variable and its QC flags.
4040
If variable is not present, nan returned, its QC flags set to 9
4141
If variable present but not its QC flags, QC set to 0
42-
:param nc: dataset
42+
:param nc: xarray dataset
4343
:param variable: name of the variable to get
4444
:return: variable values and variable qc flags
4545
"""
@@ -48,6 +48,8 @@ def get_variable_values(nc, variable):
4848

4949
if variable in file_variables:
5050
variable_values = nc[variable].values
51+
if any(np.isnan(variable_values)):
52+
variable_values = np.ma.masked_array(variable_values, mask=np.isnan(variable_values))
5153
if variable+'_quality_control' in file_variables:
5254
variableQC_values = nc[variable+'_quality_control'].values
5355
else:
@@ -303,28 +305,25 @@ def main_aggregator(files_to_agg, var_to_agg, site_code, input_dir='', output_di
303305
ds['source_file'].setncatts(source_file_attributes(download_url_prefix, opendap_url_prefix))
304306

305307
## set global attrs
306-
timeformat = '%Y-%m-%dT%H:%M:%SZ'
307-
file_timeformat = '%Y%m%d'
308-
309-
time_start = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(timeformat)
310-
time_end = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(timeformat)
311-
time_start_filename = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(file_timeformat)
312-
time_end_filename = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(file_timeformat)
308+
time_start = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(TIMESTAMP_FORMAT)
309+
time_end = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(TIMESTAMP_FORMAT)
310+
time_start_filename = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(DATESTAMP_FORMAT)
311+
time_end_filename = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(DATESTAMP_FORMAT)
313312

314313
add_attribute = {
315314
'title': ("Long Timeseries Velocity Aggregated product: " + var_to_agg + " at " +
316315
site_code + " between " + time_start + " and " + time_end),
317316
'site_code': site_code,
318317
'time_coverage_start': time_start,
319318
'time_coverage_end': time_end,
320-
'geospatial_vertical_min': np.min(ds['DEPTH']),
321-
'geospatial_vertical_max': np.max(ds['DEPTH']),
322-
'geospatial_lat_min': np.min(ds['LATITUDE']),
323-
'geospatial_lat_max': np.max(ds['LATITUDE']),
324-
'geospatial_lon_min': np.min(ds['LONGITUDE']),
325-
'geospatial_lon_max': np.max(ds['LONGITUDE']),
326-
'date_created': datetime.utcnow().strftime(timeformat),
327-
'history': datetime.utcnow().strftime(timeformat) + ': Aggregated file created.',
319+
'geospatial_vertical_min': np.min(ds['DEPTH'][:]),
320+
'geospatial_vertical_max': np.max(ds['DEPTH'][:]),
321+
'geospatial_lat_min': np.min(ds['LATITUDE'][:]),
322+
'geospatial_lat_max': np.max(ds['LATITUDE'][:]),
323+
'geospatial_lon_min': np.min(ds['LONGITUDE'][:]),
324+
'geospatial_lon_max': np.max(ds['LONGITUDE'][:]),
325+
'date_created': current_utc_timestamp(),
326+
'history': current_utc_timestamp() + ': Aggregated file created.',
328327
'keywords': ', '.join([var_to_agg, 'AGGREGATED']),
329328
'rejected_files': "\n".join(rejected_files),
330329
'generating_code_version': __version__}
@@ -346,7 +345,7 @@ def main_aggregator(files_to_agg, var_to_agg, site_code, input_dir='', output_di
346345
file_version = 1
347346
output_name = '_'.join(['IMOS', facility_code, data_code, time_start_filename, site_code, ('FV0'+str(file_version)),
348347
(var_to_agg + "-" + product_type),
349-
('END-'+ time_end_filename), 'C-' + datetime.utcnow().strftime(file_timeformat)]) + '.nc'
348+
('END-'+ time_end_filename), 'C-' + current_utc_timestamp(DATESTAMP_FORMAT)]) + '.nc'
350349
ncout_path = os.path.join(output_dir, output_name)
351350
shutil.move(temp_outfile, os.path.join(output_dir, ncout_path))
352351

aodntools/timeseries_products/aggregated_timeseries_template.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,8 @@
289289
"_global":{
290290
"abstract": "Aggregated Time-series Product: This file contains all measurements of the selected variable from all instruments deployed at the selected site. Timestamps are chronologically ordered, but may not be at uniform intervals. Instrument details are stored as a variable in order to keep a record of the origin of each measurement. The quality control flags of the variable of interest and DEPTH are preserved. Out-of-water measurements have been excluded, but no other filtering has been applied to the input data.",
291291
"acknowledgement": "Any users of IMOS data are required to clearly acknowledge the source of the material derived from IMOS in the format: \"Data was sourced from the Integrated Marine Observing System (IMOS) - IMOS is a national collaborative research infrastructure, supported by the Australian Government.\" If relevant, also credit other organisations involved in collection of this particular datastream (as listed in 'credit' in the metadata record).",
292-
"author": "Klein, Eduardo",
293-
"author_email": "eduardo.kleinsalas@utas.edu.au",
292+
"author": "Australian Ocean Data Network (AODN)",
293+
"author_email": "info@aodn.org.au",
294294
"citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\".",
295295
"Conventions": "CF-1.6,IMOS-1.4",
296296
"data_centre": "Australian Ocean Data Network (AODN)",

aodntools/timeseries_products/common.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
"""Code shared by all timeseries product generating code"""
2+
from datetime import datetime, timezone
3+
24
import numpy as np
35

6+
# Common date/time format strings
7+
TIMESTAMP_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
8+
DATESTAMP_FORMAT = '%Y%m%d'
9+
410

511
class NoInputFilesError(Exception):
612
"""Exception raised if there are no valid input files to aggregate"""
@@ -183,4 +189,8 @@ def in_water(nc):
183189
:param nc: xarray dataset
184190
:return: xarray dataset
185191
"""
186-
return nc.where(in_water_index(nc), drop=True)
192+
return nc.where(in_water_index(nc), drop=True)
193+
194+
195+
def current_utc_timestamp(format=TIMESTAMP_FORMAT):
196+
return datetime.now(timezone.utc).strftime(format)

aodntools/timeseries_products/gridded_timeseries.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
import argparse
44
import os.path
55
import json
6-
from datetime import datetime
6+
from datetime import datetime, timezone
77

88
import xarray as xr
99
import pandas as pd
1010

1111
from pkg_resources import resource_filename
1212

1313
from aodntools import __version__
14+
from aodntools.timeseries_products.common import current_utc_timestamp, TIMESTAMP_FORMAT, DATESTAMP_FORMAT
1415
import aodntools.timeseries_products.aggregated_timeseries as TStools
1516

1617

@@ -122,14 +123,12 @@ def generate_netcdf_output_filename(nc, facility_code, data_code, VoI, site_code
122123
:return: name of the output file
123124
"""
124125

125-
file_timeformat = '%Y%m%d'
126-
127126
if '_' in VoI:
128127
VoI = VoI.replace('_', '-')
129-
t_start = pd.to_datetime(nc.TIME.min().values).strftime(file_timeformat)
130-
t_end = pd.to_datetime(nc.TIME.max().values).strftime(file_timeformat)
128+
t_start = pd.to_datetime(nc.TIME.min().values).strftime(DATESTAMP_FORMAT)
129+
t_end = pd.to_datetime(nc.TIME.max().values).strftime(DATESTAMP_FORMAT)
131130

132-
output_name = '_'.join(['IMOS', facility_code, data_code, t_start, site_code, ('FV0'+str(file_version)), (VoI+"-"+product_type), ('END-'+ t_end), 'C-' + datetime.utcnow().strftime(file_timeformat)]) + '.nc'
131+
output_name = '_'.join(['IMOS', facility_code, data_code, t_start, site_code, ('FV0'+str(file_version)), (VoI+"-"+product_type), ('END-'+ t_end), 'C-' + current_utc_timestamp(DATESTAMP_FORMAT)]) + '.nc'
133132

134133
return output_name
135134

@@ -250,10 +249,9 @@ def grid_variable(input_file, VoI, depth_bins=None, max_separation=50, depth_bin
250249
for attr in ('geospatial_lat_min', 'geospatial_lat_max', 'geospatial_lon_min', 'geospatial_lon_max', 'site_code',
251250
'included_values_flagged_as', 'contributor_name', 'contributor_role', 'contributor_email'):
252251
VoI_interpolated.attrs[attr] = input_global_attributes[attr]
253-
timeformat = '%Y-%m-%dT%H:%M:%SZ'
254-
date_start = pd.to_datetime(VoI_interpolated.TIME.values.min()).strftime(timeformat)
255-
date_end = pd.to_datetime(VoI_interpolated.TIME.values.max()).strftime(timeformat)
256-
date_created = datetime.utcnow().strftime(timeformat)
252+
date_start = pd.to_datetime(VoI_interpolated.TIME.values.min()).strftime(TIMESTAMP_FORMAT)
253+
date_end = pd.to_datetime(VoI_interpolated.TIME.values.max()).strftime(TIMESTAMP_FORMAT)
254+
date_created = current_utc_timestamp()
257255
VoI_interpolated.attrs.update(global_attribute_dictionary)
258256
VoI_interpolated.attrs.update({
259257
'source_file': input_file,

aodntools/timeseries_products/gridded_timeseries_template.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@
7777
"title": "Gridded Time Series Product: {VoI} interpolated at {site_code} to fixed target depths at 1-hour time intervals, between {time_min} and {time_max} and {depth_min} and {depth_max} meters.",
7878
"abstract": "Gridded Time Series Product: This file contains {VoI} readings from all instruments deployed at the {site_code} mooring site. The source of the values is the Hourly Time Series Product where TIME is fixed to 1-hour interval. The variable values are interpolated to a fixed target depths using a linear interpolation between consecutive existing depths. Only values flagged as 1 or 2 are used in the interpolation.",
7979
"acknowledgement": "Any users of IMOS data are required to clearly acknowledge the source of the material derived from IMOS in the format: \"Data was sourced from the Integrated Marine Observing System (IMOS) - IMOS is a national collaborative research infrastructure, supported by the Australian Government.\" If relevant, also credit other organisations involved in collection of this particular datastream (as listed in 'credit' in the metadata record).",
80-
"author": "Klein, Eduardo",
81-
"author_email": "eduardo.kleinsalas@utas.edu.au",
80+
"author": "Australian Ocean Data Network (AODN)",
81+
"author_email": "info@aodn.org.au",
8282
"citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\".",
8383
"Conventions": "CF-1.6,IMOS-1.4",
8484
"data_centre": "Australian Ocean Data Network (AODN)",

aodntools/timeseries_products/hourly_timeseries.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import json
55
import os.path
66
from collections import OrderedDict
7-
from datetime import datetime
87

98
import numpy as np
109
import pandas as pd
@@ -14,7 +13,8 @@
1413

1514
from aodntools import __version__
1615
from aodntools.timeseries_products import aggregated_timeseries as utils
17-
from aodntools.timeseries_products.common import NoInputFilesError, check_file, get_qc_variable_names, in_water
16+
from aodntools.timeseries_products.common import (NoInputFilesError, check_file, get_qc_variable_names, in_water,
17+
current_utc_timestamp, TIMESTAMP_FORMAT, DATESTAMP_FORMAT)
1818

1919
TEMPLATE_JSON = resource_filename(__name__, 'hourly_timeseries_template.json')
2020
BINNING_METHOD_JSON = resource_filename(__name__, 'binning_method.json')
@@ -180,8 +180,8 @@ def set_globalattr(nc_aggregated, templatefile, site_code, add_attribute, parame
180180
'geospatial_lat_max': nc_aggregated.LATITUDE.values.max(),
181181
'geospatial_lon_min': nc_aggregated.LONGITUDE.values.min(),
182182
'geospatial_lon_max': nc_aggregated.LONGITUDE.values.max(),
183-
'date_created': datetime.utcnow().strftime(timeformat),
184-
'history': datetime.utcnow().strftime(timeformat) + ': Hourly aggregated file created.',
183+
'date_created': current_utc_timestamp(),
184+
'history': current_utc_timestamp() + ': Hourly aggregated file created.',
185185
'keywords': ', '.join(parameter_names + ['HOURLY', 'AGGREGATED'])}
186186
global_metadata.update(agg_attr)
187187
global_metadata.update(add_attribute)
@@ -259,14 +259,12 @@ def generate_netcdf_output_filename(nc, facility_code, data_code, site_code, pro
259259
:return: name of the output file
260260
"""
261261

262-
file_timeformat = '%Y%m%d'
263-
264-
t_start = pd.to_datetime(nc.TIME.min().values).strftime(file_timeformat)
265-
t_end = pd.to_datetime(nc.TIME.max().values).strftime(file_timeformat)
262+
t_start = pd.to_datetime(nc.TIME.min().values).strftime(DATESTAMP_FORMAT)
263+
t_end = pd.to_datetime(nc.TIME.max().values).strftime(DATESTAMP_FORMAT)
266264

267265
output_name = '_'.join(
268266
['IMOS', facility_code, data_code, t_start, site_code, ('FV0' + str(file_version)), product_type,
269-
('END-' + t_end), 'C-' + datetime.utcnow().strftime(file_timeformat)]) + '.nc'
267+
('END-' + t_end), 'C-' + current_utc_timestamp(DATESTAMP_FORMAT)]) + '.nc'
270268

271269
return output_name
272270

@@ -567,4 +565,4 @@ def hourly_aggregator(files_to_aggregate, site_code, qcflags, input_dir='', outp
567565
qcflags = [int(i) for i in args.qcflags]
568566

569567
hourly_aggregator(files_to_aggregate=files_to_aggregate, site_code=args.site_code, qcflags=qcflags,
570-
input_dir=args.input_dir, output_dir=args.output_path)
568+
input_dir=args.input_dir, output_dir=args.output_dir)

aodntools/timeseries_products/hourly_timeseries_template.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,8 @@
368368
"_global":{
369369
"abstract": "Hourly Time Series Product: This file contains selected variables from all instruments deployed at the {site_code} mooring site. The values are binned to a fixed 1-hour interval. Instrument details are stored as variables in order to keep a record of the origin of each measurement. Out-of-water measurements have been excluded. Only values flagged as {flags} are retained in the aggregation.",
370370
"acknowledgement": "Any users of IMOS data are required to clearly acknowledge the source of the material derived from IMOS in the format: \"Data was sourced from the Integrated Marine Observing System (IMOS) - IMOS is a national collaborative research infrastructure, supported by the Australian Government.\" If relevant, also credit other organisations involved in collection of this particular datastream (as listed in 'credit' in the metadata record).",
371-
"author": "Klein, Eduardo",
372-
"author_email": "eduardo.kleinsalas@utas.edu.au",
371+
"author": "Australian Ocean Data Network (AODN)",
372+
"author_email": "info@aodn.org.au",
373373
"citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\".",
374374
"Conventions": "CF-1.6,IMOS-1.4",
375375
"data_centre": "Australian Ocean Data Network (AODN)",

aodntools/timeseries_products/velocity_aggregated_timeseries.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
from netCDF4 import Dataset, num2date, stringtochar
55
import numpy as np
66
import json
7-
from datetime import datetime
87
import argparse
98
from pkg_resources import resource_filename
109
from aodntools import __version__
1110

1211
import xarray as xr
1312

1413
from aodntools.timeseries_products import aggregated_timeseries as utils
15-
from aodntools.timeseries_products.common import NoInputFilesError, check_velocity_file
14+
from aodntools.timeseries_products.common import (NoInputFilesError, check_velocity_file, current_utc_timestamp,
15+
TIMESTAMP_FORMAT, DATESTAMP_FORMAT)
1616

1717
TEMPLATE_JSON = resource_filename(__name__, 'velocity_aggregated_timeseries_template.json')
1818

@@ -144,7 +144,7 @@ def velocity_aggregated(files_to_agg, site_code, input_dir='', output_dir='./',
144144
WCUR[start:end] = flat_variable(nc, 'WCUR')
145145
WCURqc[start:end] = flat_variable(nc, 'WCUR_quality_control')
146146
else:
147-
WCUR[start:end] = np.full(n_obs, np.nan)
147+
WCUR[start:end] = np.ma.masked
148148
WCURqc[start:end] = np.full(n_obs, 9)
149149

150150
##calculate depth and add CELL_INDEX
@@ -188,13 +188,10 @@ def velocity_aggregated(files_to_agg, site_code, input_dir='', output_dir='./',
188188
ds['source_file'].setncatts(utils.source_file_attributes(download_url_prefix, opendap_url_prefix))
189189

190190
## set global attrs
191-
timeformat = '%Y-%m-%dT%H:%M:%SZ'
192-
file_timeformat = '%Y%m%d'
193-
194-
time_start = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(timeformat)
195-
time_end = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(timeformat)
196-
time_start_filename = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(file_timeformat)
197-
time_end_filename = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(file_timeformat)
191+
time_start = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(TIMESTAMP_FORMAT)
192+
time_end = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(TIMESTAMP_FORMAT)
193+
time_start_filename = num2date(np.min(TIME[:]), time_units, time_calendar).strftime(DATESTAMP_FORMAT)
194+
time_end_filename = num2date(np.max(TIME[:]), time_units, time_calendar).strftime(DATESTAMP_FORMAT)
198195

199196
add_attribute = {
200197
'title': ("Long Timeseries Velocity Aggregated product: " + ', '.join(varlist) + " at " +
@@ -208,8 +205,8 @@ def velocity_aggregated(files_to_agg, site_code, input_dir='', output_dir='./',
208205
'geospatial_lat_max': np.max(ds['LATITUDE']),
209206
'geospatial_lon_min': np.min(ds['LONGITUDE']),
210207
'geospatial_lon_max': np.max(ds['LONGITUDE']),
211-
'date_created': datetime.utcnow().strftime(timeformat),
212-
'history': datetime.utcnow().strftime(timeformat) + ': Aggregated file created.',
208+
'date_created': current_utc_timestamp(),
209+
'history': current_utc_timestamp() + ': Aggregated file created.',
213210
'keywords': ', '.join(varlist + ['AGGREGATED']),
214211
'rejected_files': "\n".join(bad_files.keys()),
215212
'generating_code_version': __version__
@@ -235,7 +232,7 @@ def velocity_aggregated(files_to_agg, site_code, input_dir='', output_dir='./',
235232
file_version = 1
236233
output_name = '_'.join(['IMOS', facility_code, data_code, time_start_filename, site_code, ('FV0'+str(file_version)),
237234
("velocity-"+product_type),
238-
('END-'+ time_end_filename), 'C-' + datetime.utcnow().strftime(file_timeformat)]) + '.nc'
235+
('END-'+ time_end_filename), 'C-' + current_utc_timestamp(DATESTAMP_FORMAT)]) + '.nc'
239236
ncout_path = os.path.join(output_dir, output_name)
240237
shutil.move(temp_outfile, ncout_path)
241238

0 commit comments

Comments
 (0)