Skip to content

Commit 999585b

Browse files
committed
More complete validation using jsonschema
Also renamed variable properties "dims"->"dimensions" and "attr"->"attributes" for consistency with dataset-level properties.
1 parent fce6f72 commit 999585b

7 files changed

Lines changed: 201 additions & 69 deletions

File tree

ncwriter/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from .template import DatasetTemplate
2+
from .schema import ValidationError
23

34
__all__ = [
4-
'DatasetTemplate'
5+
'DatasetTemplate',
6+
'ValidationError'
57
]

ncwriter/schema.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""This module holds schema definitions for validating the various :py:class:`dicts` that make up parts of a
2+
template, and also the helper functions necessary to validate an object against their respective schema.
3+
"""
4+
5+
from jsonschema import validate, ValidationError
6+
7+
8+
NAME_PATTERN = r"^[A-Za-z][A-Za-z0-9_]*$"
9+
10+
11+
DIMENSIONS_SCHEMA = {
12+
"type": "object",
13+
"patternProperties": {
14+
NAME_PATTERN: {
15+
"type": ["integer", "null"],
16+
"minimum": 0
17+
}
18+
},
19+
"additionalProperties": False
20+
}
21+
22+
ATTRIBUTES_SCHEMA = {
23+
"type": "object",
24+
"patternProperties": {
25+
NAME_PATTERN: {
26+
"type": ["string", "number", "array"]
27+
}
28+
},
29+
"additionalProperties": False
30+
}
31+
32+
33+
VARIABLE_DEFINITION_SCHEMA = {
34+
"type": "object",
35+
"properties": {
36+
"dimensions": {
37+
"type": "array",
38+
"items": {"type": "string", "pattern": NAME_PATTERN}
39+
},
40+
"type": {"type": "string"},
41+
"attributes": ATTRIBUTES_SCHEMA
42+
},
43+
"additionalProperties": False
44+
}
45+
46+
47+
VARIABLES_SCHEMA = {
48+
"type": "object",
49+
"patternProperties": {
50+
NAME_PATTERN: VARIABLE_DEFINITION_SCHEMA
51+
},
52+
"additionalProperties": False
53+
}
54+
55+
56+
def validate_dimensions(d):
57+
validate(d, DIMENSIONS_SCHEMA)
58+
59+
60+
def validate_variables(v):
61+
validate(v, VARIABLES_SCHEMA)
62+
63+
64+
def validate_attributes(a):
65+
validate(a, ATTRIBUTES_SCHEMA)

ncwriter/template.py

Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,8 @@
1818
from copy import deepcopy
1919

2020
import netCDF4
21-
import numpy as np
2221

23-
24-
def validate_dict(o, name):
25-
"""
26-
Check that the given object is a dictionary. If not, raise TypeError.
27-
28-
:param o: The object to validate.
29-
:param str name: The name of the object to use in the error message.
30-
:return: None
31-
"""
32-
33-
if not isinstance(o, dict):
34-
raise TypeError("{name} should be a dictionary (got {o})".format(name=name, o=repr(o)))
22+
from schema import validate_dimensions, validate_variables, validate_attributes
3523

3624

3725
class NetCDFGroupDict(object):
@@ -54,11 +42,11 @@ def __init__(self,
5442
Example:
5543
dmn = {'lon':360,'lat':210}
5644
var = {}
57-
var['water'] = {'type':'double','dims':['lat','lon']}
45+
var['water'] = {'type':'double','dimensions':['lat','lon']}
5846
w1 = NetCDFGroupDict(dimensions=dmn,variables=var)
5947
dmn2 = {'time':300,'lon':720,'lat':330}
6048
var2 = {}
61-
var2['temp'] = {'type':'double','dims':['time','lat','lon']}
49+
var2['temp'] = {'type':'double','dimensions':['time','lat','lon']}
6250
w2 = NetCDFGroupDict(dimensions=dmn2,variables=var2)
6351
w3 = w1+w2
6452
#w3.variables.keys() = ['water','temp']
@@ -106,7 +94,7 @@ def dimensions(self):
10694

10795
@dimensions.setter
10896
def dimensions(self, value):
109-
validate_dict(value, 'dimensions')
97+
validate_dimensions(value)
11098
self._dimensions = value
11199

112100
@property
@@ -115,7 +103,7 @@ def variables(self):
115103

116104
@variables.setter
117105
def variables(self, value):
118-
validate_dict(value, 'variables')
106+
validate_variables(value)
119107
self._variables = value
120108

121109
@property
@@ -124,7 +112,7 @@ def global_attributes(self):
124112

125113
@global_attributes.setter
126114
def global_attributes(self, value):
127-
validate_dict(value, 'global_attributes')
115+
validate_attributes(value)
128116
self._global_attributes = value
129117

130118
def is_dim_consistent(self):
@@ -133,26 +121,26 @@ def is_dim_consistent(self):
133121
checkdims = set()
134122
for k in self.variables.keys():
135123
try:
136-
for d in self.variables[k]['dims']:
124+
for d in self.variables[k]['dimensions']:
137125
checkdims.add(d)
138126
except KeyError:
139127
print("Variable %s missing dimension information `dims`" % k)
140128

141129
except TypeError:
142-
if self.variables[k]['dims'] is None:
130+
if self.variables[k]['dimensions'] is None:
143131
continue
144132

145-
missing = ['dims']
133+
missing = ['dimensions']
146134

147135
try:
148136
self.variables['k']['vtype']
149137
except KeyError:
150138
missing += ['type']
151139

152140
try:
153-
self.variables['k']['attr']
141+
self.variables['k']['attributes']
154142
except KeyError:
155-
missing += ['attr']
143+
missing += ['attributes']
156144

157145
errstr = "Variable %s is missing information for: "
158146
for _ in missing:
@@ -171,7 +159,7 @@ def search_time_in_vars(self):
171159
tvars = set()
172160
for v in self.variables:
173161
try:
174-
tvars.add(self.variables[v]['attr']['time']['value'])
162+
tvars.add(self.variables[v]['attributes']['time']['value'])
175163
except KeyError:
176164
None
177165

@@ -217,11 +205,11 @@ def change_time(self, var, timevar):
217205
if not v_included:
218206
for k in self.variables.keys():
219207
if v in k:
220-
self.variables[k]['dims'][0] = t
221-
self.variables[k]['attr']['time']['value'] = t
208+
self.variables[k]['dimensions'][0] = t
209+
self.variables[k]['attributes']['time']['value'] = t
222210
else:
223-
self.variables[v]['dims'][0] = t
224-
self.variables[v]['attr']['time']['value'] = t
211+
self.variables[v]['dimensions'][0] = t
212+
self.variables[v]['attributes']['time']['value'] = t
225213

226214
@classmethod
227215
def check_dims(self, dimdict):
@@ -239,9 +227,9 @@ def check_var(self, vardict, name=None):
239227
name = 'input'
240228

241229
vkeys = vardict.keys()
242-
have_dims = 'dims' in vkeys
230+
have_dims = 'dimensions' in vkeys
243231
have_type = 'type' in vkeys
244-
have_att = 'attr' in vkeys
232+
have_att = 'attributes' in vkeys
245233
have_one = have_dims | have_type | have_att
246234
have_none = not have_one
247235

@@ -250,14 +238,14 @@ def check_var(self, vardict, name=None):
250238
self.check_var(vardict[k], name=k)
251239

252240
if have_dims:
253-
notnone = vardict['dims'] is not None
254-
notlist = vardict['dims'] is not list
241+
notnone = vardict['dimensions'] is not None
242+
notlist = vardict['dimensions'] is not list
255243
if notnone and notlist:
256244
ValueError(
257245
"Dim for %s should be a None or a list object" % name)
258246

259247
if have_att:
260-
notdict = vardict['attr'] is not dict
248+
notdict = vardict['attributes'] is not dict
261249
if notdict:
262250
ValueError("Attr for %s should be a dictionary object" % name)
263251
if have_type:
@@ -283,7 +271,7 @@ def check_consistency(self, dimdict, vdict):
283271
alldims = dimdict.keys()
284272
allvars = vdict.keys()
285273
for k in allvars:
286-
vardims = vdict[k].get('dims')
274+
vardims = vdict[k].get('dimensions')
287275
if vardims is None:
288276
continue
289277
else:
@@ -346,7 +334,7 @@ def update_dimensinos(self):
346334
continue
347335

348336
var_shape = values.shape
349-
var_dims = var.get('dims', [])
337+
var_dims = var.get('dimensions', [])
350338
if len(var_shape) != len(var_dims):
351339
raise ValueError(
352340
"Variable '{name}' has {ndim} dimensions, but value array has {nshape} dimensions.".format(
@@ -382,7 +370,7 @@ def createVariables(self, **kwargs):
382370
"""
383371
for varname, var in self.variables.iteritems():
384372
datatype = var['type']
385-
dimensions = var['dims']
373+
dimensions = var['dimensions']
386374
cwargs = kwargs.copy()
387375
if dimensions is None: # no kwargs in createVariable
388376
ncvar = self.ncobj.createVariable(varname, datatype)
@@ -422,8 +410,8 @@ def createVariables(self, **kwargs):
422410
ncvar[:] = var['values']
423411

424412
# add variable attributes
425-
if var.get('attr'):
426-
attrs = var['attr'].copy()
413+
if var.get('attributes'):
414+
attrs = var['attributes'].copy()
427415
for not_attr in self._create_var_opts(attrs):
428416
attrs.pop(not_attr)
429417
ncvar.setncatts(attrs)

test_ncwriter/template1.json

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,25 @@
55
},
66
"variables": {
77
"TIME": {
8-
"dims": [
8+
"dimensions": [
99
"TIME"
1010
],
11-
"type": "float64",
12-
"attr": null
11+
"type": "float64"
1312
},
1413
"DEPTH": {
15-
"dims": [
14+
"dimensions": [
1615
"DEPTH"
1716
],
1817
"type": "float32",
19-
"attr": null
18+
"attributes": {}
2019
},
2120
"TEMP": {
22-
"dims": [
21+
"dimensions": [
2322
"TIME",
2423
"DEPTH"
2524
],
2625
"type": "float32",
27-
"attr": {
26+
"attributes": {
2827
"standard_name": "sea_water_temperature",
2928
"units": "degC",
3029
"valid_min": 0.0,

test_ncwriter/template_partial.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"variables": {
33
"TEMP": {
4-
"attr": {
4+
"attributes": {
55
"standard_name": "sea_water_temperature",
66
"units": "degC"
77
}

test_ncwriter/test_schema.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import unittest
2+
3+
from ncwriter.schema import validate_dimensions, validate_variables, validate_attributes, ValidationError
4+
5+
6+
class TestSchema(unittest.TestCase):
7+
def test_validate_dimensions(self):
8+
validate_dimensions({})
9+
validate_dimensions({'X': 1, 'Y': 2})
10+
11+
with self.assertRaises(ValidationError):
12+
validate_dimensions(None)
13+
with self.assertRaises(ValidationError):
14+
validate_dimensions('X')
15+
with self.assertRaises(ValidationError):
16+
validate_dimensions(['X'])
17+
with self.assertRaises(ValidationError):
18+
validate_dimensions(10)
19+
20+
with self.assertRaises(ValidationError):
21+
validate_dimensions({'123': 123})
22+
with self.assertRaises(ValidationError):
23+
validate_dimensions({'X': 'one'})
24+
with self.assertRaises(ValidationError):
25+
validate_dimensions({'X': -1})
26+
with self.assertRaises(ValidationError):
27+
validate_dimensions({'X': 1.5})
28+
29+
def test_validate_variables(self):
30+
validate_variables({})
31+
validate_variables({'X': {}})
32+
validate_variables({'X': {'type': 'float32'}})
33+
validate_variables({'X': {'dimensions': []}})
34+
validate_variables({'X': {'attributes': {'name': 'X'}}})
35+
validate_variables({'X': {'dimensions': ['X'], 'type': 'float32'}})
36+
validate_variables({
37+
'X': {
38+
'dimensions': ['X'],
39+
'type': 'float32',
40+
'attributes': {'name': 'X', 'count': 1}
41+
}
42+
})
43+
44+
with self.assertRaises(ValidationError):
45+
validate_variables(None)
46+
with self.assertRaises(ValidationError):
47+
validate_variables('VAR')
48+
with self.assertRaises(ValidationError):
49+
validate_variables({'__X': {}})
50+
with self.assertRaises(ValidationError):
51+
validate_variables({'X': {'type': 'float32', 'something': 'else'}})
52+
with self.assertRaises(ValidationError):
53+
validate_variables({'X': {'type': 'float32', 'attributes': 'none'}})
54+
55+
def test_validate_attributes(self):
56+
validate_attributes({})
57+
validate_attributes({'name': 'test'})
58+
validate_attributes({'name': 1.5})
59+
validate_attributes({'name': [1, 2, 3]})
60+
61+
with self.assertRaises(ValidationError):
62+
validate_attributes(None)
63+
with self.assertRaises(ValidationError):
64+
validate_attributes('X')
65+
with self.assertRaises(ValidationError):
66+
validate_attributes([])
67+
with self.assertRaises(ValidationError):
68+
validate_attributes({'_badname': 1})
69+
with self.assertRaises(ValidationError):
70+
validate_attributes({'null': None})
71+
with self.assertRaises(ValidationError):
72+
validate_attributes({'bool': True})
73+
with self.assertRaises(ValidationError):
74+
validate_attributes({'object': {}})
75+
76+
77+
if __name__ == '__main__':
78+
unittest.main()

0 commit comments

Comments
 (0)