Skip to content

Commit a79b7ab

Browse files
authored
Merge pull request #7 from aodn/fill_values
Fix up creation of variables with fill values
2 parents d8c8838 + 139a731 commit a79b7ab

4 files changed

Lines changed: 77 additions & 17 deletions

File tree

ncwriter/schema.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
"items": {"type": "string", "pattern": NAME_PATTERN}
3939
},
4040
"type": {"type": "string"},
41-
"attributes": ATTRIBUTES_SCHEMA
41+
"attributes": ATTRIBUTES_SCHEMA,
42+
"data": {"type": ["null", "array"]}
4243
},
4344
"additionalProperties": False
4445
}

ncwriter/template.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
import netCDF4
2121

22-
from ncwriter.schema import validate_dimensions, validate_variables, validate_attributes
22+
from .schema import validate_dimensions, validate_variables, validate_attributes
2323

2424

2525
class NetCDFGroupDict(object):
@@ -323,13 +323,13 @@ def _create_var_opts(self, vdict):
323323
inside = inside.union(aliases)
324324
return list(inside)
325325

326-
def update_dimensinos(self):
326+
def update_dimensions(self):
327327
"""Update the sizes of dimensions to be consistent with the arrays set as variable values, if possible.
328328
Otherwise raise ValueError. Also raise ValueError if a dimension that already has a non-zero size is not
329329
consistent with variable array sizes.
330330
"""
331331
for name, var in self.variables.items():
332-
values = var.get('values')
332+
values = var.get('data')
333333
if values is None:
334334
continue
335335

@@ -375,10 +375,10 @@ def createVariables(self, **kwargs):
375375
if dimensions is None: # no kwargs in createVariable
376376
ncvar = self.ncobj.createVariable(varname, datatype)
377377
else:
378-
var_c_keys = list(self._create_var_opts(var))
378+
var_attr = var.get('attributes', {})
379+
var_c_keys = list(self._create_var_opts(var_attr))
379380

380-
var_c_opts = dict(
381-
(x, var[x]) for x in var_c_keys)
381+
var_c_opts = {x: var_attr[x] for x in var_c_keys}
382382

383383
ureq_fillvalue = [
384384
x for x in cwargs.keys() if x in self.fill_aliases
@@ -391,11 +391,11 @@ def createVariables(self, **kwargs):
391391
var_c_opts.update(cwargs)
392392

393393
# user precendence
394-
if (ureq_fillvalue and vreq_fillvalue):
394+
if ureq_fillvalue and vreq_fillvalue:
395395
[var_c_opts.pop(x) for x in vreq_fillvalue]
396396
fv_val = [var_c_opts.pop(x) for x in ureq_fillvalue]
397397
var_c_opts['fill_value'] = fv_val[-1]
398-
elif (ureq_fillvalue and not vreq_fillvalue):
398+
elif ureq_fillvalue and not vreq_fillvalue:
399399
fv_val = [var_c_opts.pop(x) for x in ureq_fillvalue]
400400
var_c_opts['fill_value'] = fv_val[-1]
401401
else:
@@ -407,7 +407,10 @@ def createVariables(self, **kwargs):
407407
varname, datatype, dimensions=dimensions, **var_c_opts)
408408

409409
# add variable values
410-
ncvar[:] = var['values']
410+
if 'data' not in var:
411+
raise ValueError('No data specified for variable {varname}'.format(varname=varname))
412+
if var['data'] is not None:
413+
ncvar[:] = var['data']
411414

412415
# add variable attributes
413416
if var.get('attributes'):
@@ -434,7 +437,7 @@ def to_netcdf(self, outfile, var_args={}, **kwargs):
434437
self.outfile = outfile
435438
self.ncobj = netCDF4.Dataset(self.outfile, mode='w', **kwargs)
436439

437-
self.update_dimensinos()
440+
self.update_dimensions()
438441
self.createDimensions()
439442
self.createVariables(**var_args)
440443
self.createGlobalAttrs()

test_ncwriter/test_schema.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,14 @@ def test_validate_variables(self):
3232
validate_variables({'X': {'type': 'float32'}})
3333
validate_variables({'X': {'dimensions': []}})
3434
validate_variables({'X': {'attributes': {'name': 'X'}}})
35+
validate_variables({'X': {'data': None}})
3536
validate_variables({'X': {'dimensions': ['X'], 'type': 'float32'}})
3637
validate_variables({
3738
'X': {
3839
'dimensions': ['X'],
3940
'type': 'float32',
40-
'attributes': {'name': 'X', 'count': 1}
41+
'attributes': {'name': 'X', 'count': 1},
42+
'data': [42]
4143
}
4244
})
4345

@@ -51,6 +53,8 @@ def test_validate_variables(self):
5153
validate_variables({'X': {'type': 'float32', 'something': 'else'}})
5254
with self.assertRaises(ValidationError):
5355
validate_variables({'X': {'type': 'float32', 'attributes': 'none'}})
56+
with self.assertRaises(ValidationError):
57+
validate_variables({'X': {'data': 100}})
5458

5559
def test_validate_attributes(self):
5660
validate_attributes({})

test_ncwriter/test_template.py

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,19 +132,34 @@ def test_add_variable_attributes(self):
132132

133133
def test_set_variable_values(self):
134134
template = DatasetTemplate.from_json(TEMPLATE_JSON)
135-
template.variables['TEMP']['values'] = self.values10
136-
self.assertTrue(all(template.variables['TEMP']['values'] == self.values10))
135+
template.variables['TEMP']['data'] = self.values10
136+
self.assertTrue(all(template.variables['TEMP']['data'] == self.values10))
137137

138138
def test_create_empty_file(self):
139139
template = DatasetTemplate()
140140
template.to_netcdf(self.temp_nc_file)
141141
dataset = Dataset(self.temp_nc_file)
142142

143+
def test_create_empty_variable(self):
144+
template = DatasetTemplate(dimensions={'X': 10})
145+
template.variables['X'] = {'dimensions': ['X'], 'type': 'float32'}
146+
self.assertRaises(ValueError, template.to_netcdf, self.temp_nc_file) # not providing 'data' is an error
147+
148+
del self._temp_nc_file # Get a new temp file
149+
template.variables['X']['data'] = None # This is ok, it's a shortcut for all fill values
150+
template.to_netcdf(self.temp_nc_file)
151+
152+
dataset = Dataset(self.temp_nc_file)
153+
dataset.set_auto_mask(True)
154+
dsx = dataset.variables['X']
155+
self.assertIsInstance(dsx[:], np.ma.MaskedArray)
156+
self.assertTrue(dsx[:].mask.all())
157+
143158
def test_create_file(self):
144159
template = DatasetTemplate.from_json(TEMPLATE_JSON)
145-
template.variables['TIME']['values'] = self.values10
146-
template.variables['DEPTH']['values'] = self.values1
147-
template.variables['TEMP']['values'] = self.values10.reshape((10, 1))
160+
template.variables['TIME']['data'] = self.values10
161+
template.variables['DEPTH']['data'] = self.values1
162+
template.variables['TEMP']['data'] = self.values10.reshape((10, 1))
148163
template.to_netcdf(self.temp_nc_file)
149164

150165
dataset = Dataset(self.temp_nc_file)
@@ -173,6 +188,43 @@ def test_create_file(self):
173188
ds_global_attributes = OrderedDict((k, dataset.getncattr(k)) for k in dataset.ncattrs())
174189
self.assertEqual(self.global_attributes, ds_global_attributes)
175190

191+
def test_fill_values(self):
192+
template = DatasetTemplate(dimensions={'X': 10})
193+
template.variables['X'] = {'dimensions': ['X'],
194+
'type': 'float32',
195+
'attributes': {'_FillValue': -999.}
196+
}
197+
x = np.array([-999., -999., -999., -999., -999., 1., 2., 3., 4., 5])
198+
template.variables['X']['data'] = x
199+
template.to_netcdf(self.temp_nc_file)
200+
201+
dataset = Dataset(self.temp_nc_file)
202+
dataset.set_auto_mask(True)
203+
dsx = dataset.variables['X']
204+
self.assertEqual(-999., dsx._FillValue)
205+
self.assertIsInstance(dsx[:], np.ma.MaskedArray)
206+
self.assertTrue(dsx[:5].mask.all())
207+
self.assertTrue((dsx[5:] == x[5:]).all())
208+
209+
def test_fill_values_from_masked_array(self):
210+
template = DatasetTemplate(dimensions={'X': 10})
211+
template.variables['X'] = {'dimensions': ['X'],
212+
'type': 'float32',
213+
'attributes': {'_FillValue': -999.}
214+
}
215+
x = np.array([-4, -3, -2, -1, 0, 1., 2., 3., 4., 5])
216+
template.variables['X']['data'] = np.ma.masked_array(x, mask=[True, True, True, True, True,
217+
False, False, False, False, False]
218+
)
219+
template.to_netcdf(self.temp_nc_file)
220+
221+
dataset = Dataset(self.temp_nc_file)
222+
dsx = dataset.variables['X']
223+
self.assertEqual(-999., dsx._FillValue)
224+
self.assertIsInstance(dsx[:], np.ma.MaskedArray)
225+
self.assertTrue(dsx[:5].mask.all())
226+
self.assertTrue((dsx[5:] == x[5:]).all())
227+
176228
# TODO: add data from multiple numpy arrays
177229
# e.g. template.add_data(TIME=time_values, TEMP=temp_values, PRES=pres_values)
178230
# TODO: add data from Pandas dataframe (later...)

0 commit comments

Comments
 (0)