@@ -196,6 +196,24 @@ async def validate_endpoint(self, endpoint):
196196 "records_skipped" : 0 ,
197197 "records_failed" : 0
198198 })
199+ # structures to support testing uniqueness accross payloads:
200+ definition = self .get_swagger_definition_for_endpoint (endpoint )
201+ if "Descriptor" in endpoint :
202+ swagger = self .lightbeam .api .descriptors_swagger
203+ else :
204+ swagger = self .lightbeam .api .resources_swagger
205+
206+ if "definitions" in swagger .keys ():
207+ resource_schema = swagger ["definitions" ][definition ]
208+ elif "components" in swagger .keys () and "schemas" in swagger ["components" ].keys ():
209+ resource_schema = swagger ["components" ]["schemas" ][definition ]
210+ else :
211+ self .logger .critical (f"Swagger contains neither `definitions` nor `components.schemas` - check that the Swagger is valid." )
212+ self .uniqueness_hashes = { endpoint : [] }
213+ self .identity_params_structures = {}
214+ self .schema_resolver = RefResolver ("test" , swagger , swagger )
215+ self .schema_validator = Draft4Validator (resource_schema , resolver = self .schema_resolver )
216+
199217 for file_name in data_files :
200218 self .logger .info (f"validating { file_name } against { definition } schema..." )
201219 with open (file_name ) as file :
@@ -231,28 +249,17 @@ async def validate_endpoint(self, endpoint):
231249 if self .lightbeam .num_errors > self .MAX_VALIDATION_ERRORS_TO_DISPLAY :
232250 self .logger .warn (f"... and { num_others } others!" )
233251 self .logger .warn (f"... VALIDATION ERRORS on { self .lightbeam .num_errors } of { line_counter } lines in { file_name } ; see details above." )
252+
253+ # free up some memory
254+ self .uniqueness_hashes = {}
255+ self .identity_params_structures = {}
256+ self .schema_resolver = None
257+ self .schema_validator = None
234258
235259
236260 async def do_validate_payload (self , endpoint , file_name , data , line_counter ):
237261 if self .fail_fast_threshold is not None and self .lightbeam .num_errors >= self .fail_fast_threshold : return
238- definition = self .get_swagger_definition_for_endpoint (endpoint )
239- if "Descriptor" in endpoint :
240- swagger = self .lightbeam .api .descriptors_swagger
241- else :
242- swagger = self .lightbeam .api .resources_swagger
243-
244- if "definitions" in swagger .keys ():
245- resource_schema = swagger ["definitions" ][definition ]
246- elif "components" in swagger .keys () and "schemas" in swagger ["components" ].keys ():
247- resource_schema = swagger ["components" ]["schemas" ][definition ]
248- else :
249- self .logger .critical (f"Swagger contains neither `definitions` nor `components.schemas` - check that the Swagger is valid." )
250262
251- resolver = RefResolver ("test" , swagger , swagger )
252- validator = Draft4Validator (resource_schema , resolver = resolver )
253- identity_params_structure = self .lightbeam .api .get_params_for_endpoint (endpoint , type = 'identity' )
254- distinct_params = []
255-
256263 # check payload is valid JSON
257264 try :
258265 payload = json .loads (data )
@@ -263,7 +270,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter):
263270 # check payload obeys Swagger schema
264271 if "schema" in self .validation_methods :
265272 try :
266- validator .validate (payload )
273+ self . schema_validator .validate (payload )
267274 except Exception as e :
268275 e_path = [str (x ) for x in list (e .path )]
269276 context = ""
@@ -279,14 +286,13 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter):
279286 return
280287
281288 # check natural keys are unique
289+ if not self .identity_params_structures .get (endpoint , False ):
290+ self .identity_params_structures [endpoint ] = self .lightbeam .api .get_params_for_endpoint (endpoint , type = 'identity' )
282291 if "uniqueness" in self .validation_methods :
283- params = json .dumps (util .interpolate_params (identity_params_structure , payload ))
284- params_hash = hashlog .get_hash (params )
285- if params_hash in distinct_params :
286- self .log_validation_error (endpoint , file_name , line_counter , "uniqueness" , "duplicate value(s) for natural key(s): {params}" )
287- return
288- else : distinct_params .append (params_hash )
289-
292+ error_message = self .violates_uniqueness (endpoint , payload , path = "" )
293+ if error_message != "" :
294+ self .log_validation_error (endpoint , file_name , line_counter , "uniqueness" , error_message )
295+
290296 # check references values are valid
291297 if "references" in self .validation_methods and "Descriptor" not in endpoint : # Descriptors have no references
292298 self .lightbeam .api .do_oauth ()
@@ -297,7 +303,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter):
297303
298304 def log_validation_error (self , endpoint , file_name , line_number , method , message ):
299305 if self .lightbeam .num_errors < self .MAX_VALIDATION_ERRORS_TO_DISPLAY :
300- self .logger .warning (f"... VALIDATION ERROR (line { line_number } ): { message } " )
306+ self .logger .warning (f"... VALIDATION ERROR ({ method } at line { line_number } ): { message } " )
301307 self .lightbeam .num_errors += 1
302308
303309 # update run metadata...
@@ -319,6 +325,31 @@ def log_validation_error(self, endpoint, file_name, line_number, method, message
319325 failures .append (failure )
320326 self .lightbeam .metadata ["resources" ][endpoint ]["failures" ] = failures
321327
328+ def violates_uniqueness (self , endpoint , payload , path = "" ):
329+ params = json .dumps (util .interpolate_params (self .identity_params_structures [endpoint ], payload ))
330+ params_hash = hashlog .get_hash (params )
331+ if params_hash in self .uniqueness_hashes [endpoint ]:
332+ return f"duplicate value(s) for identity key(s): " + ("(at " + path + "): " if path != "" else ": " ) + f"{ params } "
333+ else :
334+ self .uniqueness_hashes [endpoint ].append (params_hash )
335+ # (recursively) check uniqueness of items in arrays
336+ swagger = self .lightbeam .api .resources_swagger
337+ endpoint_def = util .get_swagger_ref_for_endpoint (self .lightbeam .config .get ('namespace' , '' ), swagger , endpoint )
338+ for k in payload .keys ():
339+ if isinstance (payload [k ], list ):
340+ subarray_definition = util .resolve_swagger_ref (swagger , endpoint_def )
341+ if subarray_definition :
342+ subarray_ref = subarray_definition ['properties' ][k ].get ('items' ,{}).get ('$ref' ,'' )
343+ if not self .identity_params_structures .get (subarray_ref , False ):
344+ self .identity_params_structures [subarray_ref ] = self .lightbeam .api .get_identity_params_from_swagger (swagger , subarray_ref )
345+ if subarray_ref not in self .uniqueness_hashes .keys ():
346+ self .uniqueness_hashes [subarray_ref ] = []
347+ for i in range (0 , len (payload [k ])):
348+ value = self .violates_uniqueness (subarray_ref , payload [k ][i ], path + ("." if path != "" else "" ) + f"{ k } [{ i } ]" )
349+ if value != "" : return value
350+ return ""
351+
352+
322353 def load_local_descriptors (self ):
323354 local_descriptors = []
324355 all_endpoints = self .lightbeam .api .get_sorted_endpoints ()
@@ -340,7 +371,7 @@ def has_invalid_descriptor_values(self, payload, path=""):
340371 if value != "" : return value
341372 elif isinstance (payload [k ], list ):
342373 for i in range (0 , len (payload [k ])):
343- value = self .has_invalid_descriptor_values (payload [k ][i ], path + ("." if path != "" else "" )+ k + "[" + str ( i ) + " ]" )
374+ value = self .has_invalid_descriptor_values (payload [k ][i ], path + ("." if path != "" else "" ) + f" { k } [ { i } ]" )
344375 if value != "" : return value
345376 elif isinstance (payload [k ], str ) and k .endswith ("Descriptor" ):
346377 if "#" not in payload [k ]:
0 commit comments