Skip to content

Commit e713ce5

Browse files
committed
fix encoding of fill values in gridded_timeseries
(+ a bit of refactoring on the way)
1 parent 261129a commit e713ce5

1 file changed

Lines changed: 24 additions & 32 deletions

File tree

aodntools/timeseries_products/gridded_timeseries.py

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os.path
55
import json
66
from datetime import datetime, timezone
7+
from collections import defaultdict
78

89
import xarray as xr
910
import pandas as pd
@@ -91,24 +92,28 @@ def write_netCDF_aggfile(agg_dataset, output_path, encoding):
9192
return output_path
9293

9394

94-
def set_variableattr(varlist, variable_attribute_dictionary, add_variable_attribute):
95+
def set_variableattr(varlist, variable_attribute_dictionary):
9596
"""
96-
set variables variables atributes
97+
Set variable atributes, separate attributes that should be passed to xarray separately as encoding
98+
parameters
9799
98-
:param varlist: list of variable names
100+
:param varlist: list of variable names to pick out
99101
:param variable_attribute_dictionary: dictionary of the variable attributes
100-
:param add_variable_attribute: additional attributes to add
101-
:return: dictionary of attributes
102+
:return: tuple (dictionary of attributes, dictionary of encoding attributes)
102103
"""
103104

104-
# with open(templatefile) as json_file:
105-
# variable_metadata = json.load(json_file)['_variables']
106-
variable_attributes = {key: variable_attribute_dictionary[key] for key in varlist}
107-
if len(add_variable_attribute)>0:
108-
for key in add_variable_attribute.keys():
109-
variable_attributes[key].update(add_variable_attribute[key])
105+
encoding_attributes = {'_FillValue'}
106+
time_encoding_attributes = {'units', 'calendar'}
107+
variable_attributes = defaultdict(dict)
108+
variable_encodings = defaultdict(dict)
109+
for var in varlist:
110+
for name, value in variable_attribute_dictionary[var].items():
111+
if name in encoding_attributes or (var == 'TIME' and name in time_encoding_attributes):
112+
variable_encodings[var][name] = value
113+
else:
114+
variable_attributes[var][name] = value
110115

111-
return variable_attributes
116+
return variable_attributes, variable_encodings
112117

113118
def generate_netcdf_output_filename(nc, facility_code, data_code, VoI, site_code, product_type, file_version):
114119
"""
@@ -237,10 +242,7 @@ def grid_variable(input_file, VoI, depth_bins=None, max_separation=50, depth_bin
237242

238243
## set variable attributes
239244
varlist = list(VoI_interpolated.variables)
240-
add_variable_attribute = {}
241-
variable_attributes = set_variableattr(varlist, variable_attribute_dictionary, add_variable_attribute)
242-
time_units = variable_attributes['TIME'].pop('units')
243-
time_calendar = variable_attributes['TIME'].pop('calendar')
245+
variable_attributes, encoding = set_variableattr(varlist, variable_attribute_dictionary)
244246
for variable in varlist:
245247
VoI_interpolated[variable].attrs = variable_attributes[variable]
246248

@@ -293,22 +295,12 @@ def grid_variable(input_file, VoI, depth_bins=None, max_separation=50, depth_bin
293295
file_version=file_version)
294296
ncout_path = os.path.join(output_dir, ncout_filename)
295297

296-
encoding = {'TIME': {'_FillValue': None,
297-
'units': time_units,
298-
'calendar': time_calendar,
299-
'zlib': True,
300-
'complevel': 5},
301-
VoI: {'zlib': True,
302-
'complevel': 5,
303-
'dtype': np.dtype('float32')},
304-
VoI+'_count': {'dtype': np.dtype('int16'),
305-
'zlib': True,
306-
'complevel': 5},
307-
'DEPTH': {'dtype': np.dtype('float32'),
308-
'zlib': True,
309-
'complevel': 5},
310-
'LONGITUDE': {'_FillValue': False},
311-
'LATITUDE': {'_FillValue': False}}
298+
# data types and compression for encoding
299+
for var in {'TIME', VoI, VoI+'_count', 'DEPTH'}:
300+
encoding[var].update({'zlib': True, 'complevel': 5})
301+
encoding[VoI].update({'dtype': np.dtype('float32')})
302+
encoding[VoI+'_count'].update({'dtype': np.dtype('int16')})
303+
encoding['DEPTH'].update({'dtype': np.dtype('float32')})
312304

313305
write_netCDF_aggfile(VoI_interpolated, ncout_path, encoding)
314306

0 commit comments

Comments
 (0)