Skip to content

Commit 8d6ccd9

Browse files
author
Tom Reitz
committed
validate fixes and uniqueness in array elements
1 parent 496d9a1 commit 8d6ccd9

2 files changed

Lines changed: 60 additions & 29 deletions

File tree

lightbeam/util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def get_swagger_ref_for_endpoint(namespace, swagger, endpoint):
9090
def resolve_swagger_ref(swagger, ref):
9191
if "definitions" in swagger.keys():
9292
definition = ref.replace("#/definitions/", "")
93-
return swagger["definitions"][definition]
93+
return swagger["definitions"].get(definition, None)
9494
elif "components" in swagger.keys() and "schemas" in swagger["components"].keys():
9595
definition = ref.replace("#/components/schemas/", "")
96-
return swagger["components"]["schemas"][definition]
96+
return swagger["components"]["schemas"].get(definition, None)

lightbeam/validate.py

Lines changed: 58 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,24 @@ async def validate_endpoint(self, endpoint):
196196
"records_skipped": 0,
197197
"records_failed": 0
198198
})
199+
# structures to support testing uniqueness accross payloads:
200+
definition = self.get_swagger_definition_for_endpoint(endpoint)
201+
if "Descriptor" in endpoint:
202+
swagger = self.lightbeam.api.descriptors_swagger
203+
else:
204+
swagger = self.lightbeam.api.resources_swagger
205+
206+
if "definitions" in swagger.keys():
207+
resource_schema = swagger["definitions"][definition]
208+
elif "components" in swagger.keys() and "schemas" in swagger["components"].keys():
209+
resource_schema = swagger["components"]["schemas"][definition]
210+
else:
211+
self.logger.critical(f"Swagger contains neither `definitions` nor `components.schemas` - check that the Swagger is valid.")
212+
self.uniqueness_hashes = { endpoint: [] }
213+
self.identity_params_structures = {}
214+
self.schema_resolver = RefResolver("test", swagger, swagger)
215+
self.schema_validator = Draft4Validator(resource_schema, resolver=self.schema_resolver)
216+
199217
for file_name in data_files:
200218
self.logger.info(f"validating {file_name} against {definition} schema...")
201219
with open(file_name) as file:
@@ -231,28 +249,17 @@ async def validate_endpoint(self, endpoint):
231249
if self.lightbeam.num_errors > self.MAX_VALIDATION_ERRORS_TO_DISPLAY:
232250
self.logger.warn(f"... and {num_others} others!")
233251
self.logger.warn(f"... VALIDATION ERRORS on {self.lightbeam.num_errors} of {line_counter} lines in {file_name}; see details above.")
252+
253+
# free up some memory
254+
self.uniqueness_hashes = {}
255+
self.identity_params_structures = {}
256+
self.schema_resolver = None
257+
self.schema_validator = None
234258

235259

236260
async def do_validate_payload(self, endpoint, file_name, data, line_counter):
237261
if self.fail_fast_threshold is not None and self.lightbeam.num_errors >= self.fail_fast_threshold: return
238-
definition = self.get_swagger_definition_for_endpoint(endpoint)
239-
if "Descriptor" in endpoint:
240-
swagger = self.lightbeam.api.descriptors_swagger
241-
else:
242-
swagger = self.lightbeam.api.resources_swagger
243-
244-
if "definitions" in swagger.keys():
245-
resource_schema = swagger["definitions"][definition]
246-
elif "components" in swagger.keys() and "schemas" in swagger["components"].keys():
247-
resource_schema = swagger["components"]["schemas"][definition]
248-
else:
249-
self.logger.critical(f"Swagger contains neither `definitions` nor `components.schemas` - check that the Swagger is valid.")
250262

251-
resolver = RefResolver("test", swagger, swagger)
252-
validator = Draft4Validator(resource_schema, resolver=resolver)
253-
identity_params_structure = self.lightbeam.api.get_params_for_endpoint(endpoint, type='identity')
254-
distinct_params = []
255-
256263
# check payload is valid JSON
257264
try:
258265
payload = json.loads(data)
@@ -263,7 +270,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter):
263270
# check payload obeys Swagger schema
264271
if "schema" in self.validation_methods:
265272
try:
266-
validator.validate(payload)
273+
self.schema_validator.validate(payload)
267274
except Exception as e:
268275
e_path = [str(x) for x in list(e.path)]
269276
context = ""
@@ -279,14 +286,13 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter):
279286
return
280287

281288
# check natural keys are unique
289+
if not self.identity_params_structures.get(endpoint, False):
290+
self.identity_params_structures[endpoint] = self.lightbeam.api.get_params_for_endpoint(endpoint, type='identity')
282291
if "uniqueness" in self.validation_methods:
283-
params = json.dumps(util.interpolate_params(identity_params_structure, payload))
284-
params_hash = hashlog.get_hash(params)
285-
if params_hash in distinct_params:
286-
self.log_validation_error(endpoint, file_name, line_counter, "uniqueness", "duplicate value(s) for natural key(s): {params}")
287-
return
288-
else: distinct_params.append(params_hash)
289-
292+
error_message = self.violates_uniqueness(endpoint, payload, path="")
293+
if error_message != "":
294+
self.log_validation_error(endpoint, file_name, line_counter, "uniqueness", error_message)
295+
290296
# check references values are valid
291297
if "references" in self.validation_methods and "Descriptor" not in endpoint: # Descriptors have no references
292298
self.lightbeam.api.do_oauth()
@@ -297,7 +303,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter):
297303

298304
def log_validation_error(self, endpoint, file_name, line_number, method, message):
299305
if self.lightbeam.num_errors < self.MAX_VALIDATION_ERRORS_TO_DISPLAY:
300-
self.logger.warning(f"... VALIDATION ERROR (line {line_number}): {message}")
306+
self.logger.warning(f"... VALIDATION ERROR ({method} at line {line_number}): {message}")
301307
self.lightbeam.num_errors += 1
302308

303309
# update run metadata...
@@ -319,6 +325,31 @@ def log_validation_error(self, endpoint, file_name, line_number, method, message
319325
failures.append(failure)
320326
self.lightbeam.metadata["resources"][endpoint]["failures"] = failures
321327

328+
def violates_uniqueness(self, endpoint, payload, path=""):
329+
params = json.dumps(util.interpolate_params(self.identity_params_structures[endpoint], payload))
330+
params_hash = hashlog.get_hash(params)
331+
if params_hash in self.uniqueness_hashes[endpoint]:
332+
return f"duplicate value(s) for identity key(s): " + ("(at "+path+"): " if path!="" else ": ") + f"{params}"
333+
else:
334+
self.uniqueness_hashes[endpoint].append(params_hash)
335+
# (recursively) check uniqueness of items in arrays
336+
swagger = self.lightbeam.api.resources_swagger
337+
endpoint_def = util.get_swagger_ref_for_endpoint(self.lightbeam.config.get('namespace', ''), swagger, endpoint)
338+
for k in payload.keys():
339+
if isinstance(payload[k], list):
340+
subarray_definition = util.resolve_swagger_ref(swagger, endpoint_def)
341+
if subarray_definition:
342+
subarray_ref = subarray_definition['properties'][k].get('items',{}).get('$ref','')
343+
if not self.identity_params_structures.get(subarray_ref, False):
344+
self.identity_params_structures[subarray_ref] = self.lightbeam.api.get_identity_params_from_swagger(swagger, subarray_ref)
345+
if subarray_ref not in self.uniqueness_hashes.keys():
346+
self.uniqueness_hashes[subarray_ref] = []
347+
for i in range(0, len(payload[k])):
348+
value = self.violates_uniqueness(subarray_ref, payload[k][i], path+("." if path!="" else "") + f"{k}[{i}]")
349+
if value!="": return value
350+
return ""
351+
352+
322353
def load_local_descriptors(self):
323354
local_descriptors = []
324355
all_endpoints = self.lightbeam.api.get_sorted_endpoints()
@@ -340,7 +371,7 @@ def has_invalid_descriptor_values(self, payload, path=""):
340371
if value!="": return value
341372
elif isinstance(payload[k], list):
342373
for i in range(0, len(payload[k])):
343-
value = self.has_invalid_descriptor_values(payload[k][i], path+("." if path!="" else "")+k+"["+str(i)+"]")
374+
value = self.has_invalid_descriptor_values(payload[k][i], path+("." if path!="" else "") + f"{k}[{i}]")
344375
if value!="": return value
345376
elif isinstance(payload[k], str) and k.endswith("Descriptor"):
346377
if "#" not in payload[k]:

0 commit comments

Comments
 (0)