|
4 | 4 | import os.path |
5 | 5 | import json |
6 | 6 | from datetime import datetime, timezone |
| 7 | +from collections import defaultdict |
7 | 8 |
|
8 | 9 | import xarray as xr |
9 | 10 | import pandas as pd |
@@ -91,24 +92,28 @@ def write_netCDF_aggfile(agg_dataset, output_path, encoding): |
91 | 92 | return output_path |
92 | 93 |
|
93 | 94 |
|
94 | | -def set_variableattr(varlist, variable_attribute_dictionary, add_variable_attribute): |
| 95 | +def set_variableattr(varlist, variable_attribute_dictionary): |
95 | 96 | """ |
96 | | - set variables variables atributes |
| 97 | + Set variable atributes, separate attributes that should be passed to xarray separately as encoding |
| 98 | + parameters |
97 | 99 |
|
98 | | - :param varlist: list of variable names |
| 100 | + :param varlist: list of variable names to pick out |
99 | 101 | :param variable_attribute_dictionary: dictionary of the variable attributes |
100 | | - :param add_variable_attribute: additional attributes to add |
101 | | - :return: dictionary of attributes |
| 102 | + :return: tuple (dictionary of attributes, dictionary of encoding attributes) |
102 | 103 | """ |
103 | 104 |
|
104 | | - # with open(templatefile) as json_file: |
105 | | - # variable_metadata = json.load(json_file)['_variables'] |
106 | | - variable_attributes = {key: variable_attribute_dictionary[key] for key in varlist} |
107 | | - if len(add_variable_attribute)>0: |
108 | | - for key in add_variable_attribute.keys(): |
109 | | - variable_attributes[key].update(add_variable_attribute[key]) |
| 105 | + encoding_attributes = {'_FillValue'} |
| 106 | + time_encoding_attributes = {'units', 'calendar'} |
| 107 | + variable_attributes = defaultdict(dict) |
| 108 | + variable_encodings = defaultdict(dict) |
| 109 | + for var in varlist: |
| 110 | + for name, value in variable_attribute_dictionary[var].items(): |
| 111 | + if name in encoding_attributes or (var == 'TIME' and name in time_encoding_attributes): |
| 112 | + variable_encodings[var][name] = value |
| 113 | + else: |
| 114 | + variable_attributes[var][name] = value |
110 | 115 |
|
111 | | - return variable_attributes |
| 116 | + return variable_attributes, variable_encodings |
112 | 117 |
|
113 | 118 | def generate_netcdf_output_filename(nc, facility_code, data_code, VoI, site_code, product_type, file_version): |
114 | 119 | """ |
@@ -237,10 +242,7 @@ def grid_variable(input_file, VoI, depth_bins=None, max_separation=50, depth_bin |
237 | 242 |
|
238 | 243 | ## set variable attributes |
239 | 244 | varlist = list(VoI_interpolated.variables) |
240 | | - add_variable_attribute = {} |
241 | | - variable_attributes = set_variableattr(varlist, variable_attribute_dictionary, add_variable_attribute) |
242 | | - time_units = variable_attributes['TIME'].pop('units') |
243 | | - time_calendar = variable_attributes['TIME'].pop('calendar') |
| 245 | + variable_attributes, encoding = set_variableattr(varlist, variable_attribute_dictionary) |
244 | 246 | for variable in varlist: |
245 | 247 | VoI_interpolated[variable].attrs = variable_attributes[variable] |
246 | 248 |
|
@@ -293,22 +295,12 @@ def grid_variable(input_file, VoI, depth_bins=None, max_separation=50, depth_bin |
293 | 295 | file_version=file_version) |
294 | 296 | ncout_path = os.path.join(output_dir, ncout_filename) |
295 | 297 |
|
296 | | - encoding = {'TIME': {'_FillValue': None, |
297 | | - 'units': time_units, |
298 | | - 'calendar': time_calendar, |
299 | | - 'zlib': True, |
300 | | - 'complevel': 5}, |
301 | | - VoI: {'zlib': True, |
302 | | - 'complevel': 5, |
303 | | - 'dtype': np.dtype('float32')}, |
304 | | - VoI+'_count': {'dtype': np.dtype('int16'), |
305 | | - 'zlib': True, |
306 | | - 'complevel': 5}, |
307 | | - 'DEPTH': {'dtype': np.dtype('float32'), |
308 | | - 'zlib': True, |
309 | | - 'complevel': 5}, |
310 | | - 'LONGITUDE': {'_FillValue': False}, |
311 | | - 'LATITUDE': {'_FillValue': False}} |
| 298 | + # data types and compression for encoding |
| 299 | + for var in {'TIME', VoI, VoI+'_count', 'DEPTH'}: |
| 300 | + encoding[var].update({'zlib': True, 'complevel': 5}) |
| 301 | + encoding[VoI].update({'dtype': np.dtype('float32')}) |
| 302 | + encoding[VoI+'_count'].update({'dtype': np.dtype('int16')}) |
| 303 | + encoding['DEPTH'].update({'dtype': np.dtype('float32')}) |
312 | 304 |
|
313 | 305 | write_netCDF_aggfile(VoI_interpolated, ncout_path, encoding) |
314 | 306 |
|
|
0 commit comments