@@ -197,6 +197,24 @@ async def validate_endpoint(self, endpoint):
197197 "records_skipped" : 0 ,
198198 "records_failed" : 0
199199 })
200+ # structures to support testing uniqueness accross payloads:
201+ definition = self .get_swagger_definition_for_endpoint (endpoint )
202+ if "Descriptor" in endpoint :
203+ swagger = self .lightbeam .api .descriptors_swagger
204+ else :
205+ swagger = self .lightbeam .api .resources_swagger
206+
207+ if "definitions" in swagger .keys ():
208+ resource_schema = swagger ["definitions" ][definition ]
209+ elif "components" in swagger .keys () and "schemas" in swagger ["components" ].keys ():
210+ resource_schema = swagger ["components" ]["schemas" ][definition ]
211+ else :
212+ self .logger .critical (f"Swagger contains neither `definitions` nor `components.schemas` - check that the Swagger is valid." )
213+ self .uniqueness_hashes = { endpoint : [] }
214+ self .identity_params_structures = {}
215+ self .schema_resolver = RefResolver ("test" , swagger , swagger )
216+ self .schema_validator = Draft4Validator (resource_schema , resolver = self .schema_resolver )
217+
200218 for file_name in data_files :
201219 self .logger .info (f"validating { file_name } against { definition } schema..." )
202220 with open (file_name ) as file :
@@ -237,29 +255,18 @@ async def validate_endpoint(self, endpoint):
237255 num_others = self .lightbeam .num_errors - self .MAX_VALIDATION_ERRORS_TO_DISPLAY
238256 if self .lightbeam .num_errors > self .MAX_VALIDATION_ERRORS_TO_DISPLAY :
239257 self .logger .warn (f"... and { num_others } others!" )
240- self .logger .warn (f"... VALIDATION ERRORS on { self .lightbeam .num_errors } of { line_number } lines in { file_name } ; see details above." )
258+ self .logger .warn (f"... VALIDATION ERRORS on { self .lightbeam .num_errors } of { line_counter } lines in { file_name } ; see details above." )
259+
260+ # free up some memory
261+ self .uniqueness_hashes = {}
262+ self .identity_params_structures = {}
263+ self .schema_resolver = None
264+ self .schema_validator = None
241265
242266
243267 async def do_validate_payload (self , endpoint , file_name , data , line_number ):
244268 if self .fail_fast_threshold is not None and self .lightbeam .num_errors >= self .fail_fast_threshold : return
245- definition = self .get_swagger_definition_for_endpoint (endpoint )
246- if "Descriptor" in endpoint :
247- swagger = self .lightbeam .api .descriptors_swagger
248- else :
249- swagger = self .lightbeam .api .resources_swagger
250-
251- if "definitions" in swagger .keys ():
252- resource_schema = swagger ["definitions" ][definition ]
253- elif "components" in swagger .keys () and "schemas" in swagger ["components" ].keys ():
254- resource_schema = swagger ["components" ]["schemas" ][definition ]
255- else :
256- self .logger .critical (f"Swagger contains neither `definitions` nor `components.schemas` - check that the Swagger is valid." )
257269
258- resolver = RefResolver ("test" , swagger , swagger )
259- validator = Draft4Validator (resource_schema , resolver = resolver )
260- identity_params_structure = self .lightbeam .api .get_params_for_endpoint (endpoint , type = 'identity' )
261- distinct_params = []
262-
263270 # check payload is valid JSON
264271 try :
265272 payload = json .loads (data )
@@ -270,7 +277,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_number):
270277 # check payload obeys Swagger schema
271278 if "schema" in self .validation_methods :
272279 try :
273- validator .validate (payload )
280+ self . schema_validator .validate (payload )
274281 except Exception as e :
275282 e_path = [str (x ) for x in list (e .path )]
276283 context = ""
@@ -286,14 +293,13 @@ async def do_validate_payload(self, endpoint, file_name, data, line_number):
286293 return
287294
288295 # check natural keys are unique
296+ if not self .identity_params_structures .get (endpoint , False ):
297+ self .identity_params_structures [endpoint ] = self .lightbeam .api .get_params_for_endpoint (endpoint , type = 'identity' )
289298 if "uniqueness" in self .validation_methods :
290- params = json .dumps (util .interpolate_params (identity_params_structure , payload ))
291- params_hash = hashlog .get_hash (params )
292- if params_hash in distinct_params :
293- self .log_validation_error (endpoint , file_name , line_number , "uniqueness" , "duplicate value(s) for natural key(s): {params}" )
294- return
295- else : distinct_params .append (params_hash )
296-
299+ error_message = self .violates_uniqueness (endpoint , payload , path = "" )
300+ if error_message != "" :
301+ self .log_validation_error (endpoint , file_name , line_counter , "uniqueness" , error_message )
302+
297303 # check references values are valid
298304 if "references" in self .validation_methods and "Descriptor" not in endpoint : # Descriptors have no references
299305 self .lightbeam .api .do_oauth ()
@@ -304,7 +310,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_number):
304310
305311 def log_validation_error (self , endpoint , file_name , line_number , method , message ):
306312 if self .lightbeam .num_errors < self .MAX_VALIDATION_ERRORS_TO_DISPLAY :
307- self .logger .warning (f"... VALIDATION ERROR (line { line_number } ): { message } " )
313+ self .logger .warning (f"... VALIDATION ERROR ({ method } at line { line_number } ): { message } " )
308314 self .lightbeam .num_errors += 1
309315
310316 # update run metadata...
@@ -326,6 +332,31 @@ def log_validation_error(self, endpoint, file_name, line_number, method, message
326332 failures .append (failure )
327333 self .lightbeam .metadata ["resources" ][endpoint ]["failures" ] = failures
328334
335+ def violates_uniqueness (self , endpoint , payload , path = "" ):
336+ params = json .dumps (util .interpolate_params (self .identity_params_structures [endpoint ], payload ))
337+ params_hash = hashlog .get_hash (params )
338+ if params_hash in self .uniqueness_hashes [endpoint ]:
339+ return f"duplicate value(s) for identity key(s): " + ("(at " + path + "): " if path != "" else ": " ) + f"{ params } "
340+ else :
341+ self .uniqueness_hashes [endpoint ].append (params_hash )
342+ # (recursively) check uniqueness of items in arrays
343+ swagger = self .lightbeam .api .resources_swagger
344+ endpoint_def = util .get_swagger_ref_for_endpoint (self .lightbeam .config .get ('namespace' , '' ), swagger , endpoint )
345+ for k in payload .keys ():
346+ if isinstance (payload [k ], list ):
347+ subarray_definition = util .resolve_swagger_ref (swagger , endpoint_def )
348+ if subarray_definition :
349+ subarray_ref = subarray_definition ['properties' ][k ].get ('items' ,{}).get ('$ref' ,'' )
350+ if not self .identity_params_structures .get (subarray_ref , False ):
351+ self .identity_params_structures [subarray_ref ] = self .lightbeam .api .get_identity_params_from_swagger (swagger , subarray_ref )
352+ if subarray_ref not in self .uniqueness_hashes .keys ():
353+ self .uniqueness_hashes [subarray_ref ] = []
354+ for i in range (0 , len (payload [k ])):
355+ value = self .violates_uniqueness (subarray_ref , payload [k ][i ], path + ("." if path != "" else "" ) + f"{ k } [{ i } ]" )
356+ if value != "" : return value
357+ return ""
358+
359+
329360 def load_local_descriptors (self ):
330361 local_descriptors = []
331362 all_endpoints = self .lightbeam .api .get_sorted_endpoints ()
@@ -347,7 +378,7 @@ def has_invalid_descriptor_values(self, payload, path=""):
347378 if value != "" : return value
348379 elif isinstance (payload [k ], list ):
349380 for i in range (0 , len (payload [k ])):
350- value = self .has_invalid_descriptor_values (payload [k ][i ], path + ("." if path != "" else "" )+ k + "[" + str ( i ) + " ]" )
381+ value = self .has_invalid_descriptor_values (payload [k ][i ], path + ("." if path != "" else "" ) + f" { k } [ { i } ]" )
351382 if value != "" : return value
352383 elif isinstance (payload [k ], str ) and k .endswith ("Descriptor" ):
353384 if "#" not in payload [k ]:
0 commit comments