Skip to content

Commit 5bf02dc

Browse files
author
Tom Reitz
committed
implementing validate references selector, behavior and remote switch, plus update docs
1 parent 496d9a1 commit 5bf02dc

2 files changed

Lines changed: 41 additions & 17 deletions

File tree

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,12 @@ validate:
147147
- references # checks that references resolve, either locally or in the remote API
148148
# or
149149
# methods: "*"
150+
references:
151+
selector:
152+
- studentAssessments.studentReference
153+
- studentSchoolAssociations.schoolReference
154+
behavior: exclude # or `include`
155+
remote: False # default=True
150156
```
151157
Default `validate`.`methods` are `["schema", "descriptors", "uniqueness"]` (not `references`; see below). In addition to the above methods, `lighteam validate` will also (first) check that each payload is valid JSON.
152158

@@ -167,6 +173,8 @@ This is optional; if absent, references in every payload are checked, no matter
167173
* `fetch`ed data becoming stale over time
168174
* needing to track which data is your own vs. was `fetch`ed (all the data must coexist in the `config.data_dir` to be discoverable by `lightbeam validate`)
169175

176+
You may specify a `selector` list of the form `someEndpoint.path.to.someReference` to include or exclude (according to `behavior`) specific references from reference validation. You may also specity `remote: False` to only validate references against local data in your JSONL files.
177+
170178

171179
## `send`
172180
```bash

lightbeam/validate.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,17 @@ def validate(self):
3939
if type(self.validation_methods)==str and (self.validation_methods=="*" or self.validation_methods.lower()=='all'):
4040
self.validation_methods = self.DEFAULT_VALIDATION_METHODS
4141
self.validation_methods.append("references")
42-
42+
self.validation_references_selector = self.lightbeam.config.get("validate",{}).get("references",{}).get("selector", [])
43+
for selector in self.validation_references_selector:
44+
if "." not in selector:
45+
self.logger.error(f"`config.validate.references.selector` {selector} is incorrectly formatted (should be `someEndpoint.someReference`, such as `studentSchoolAssociation.schoolReference`)")
46+
self.validation_references_behavior = self.lightbeam.config.get("validate",{}).get("references",{}).get("behavior", "exclude")
47+
if self.validation_references_behavior not in ["exclude", "include"]:
48+
self.logger.error(f"`config.validate.references.behavior` must be either `exclude` (default) or `include`)")
49+
self.validation_references_remote = self.lightbeam.config.get("validate",{}).get("references",{}).get("remote", True)
50+
if "references" in self.validation_methods and not self.validation_references_remote:
51+
self.logger.info(f"(references will only be validated against local data, since `config.validate.references.remote: False`)")
52+
4353
self.lightbeam.api.load_swagger_docs()
4454
self.logger.info(f"validating by methods {self.validation_methods}...")
4555
if "descriptors" in self.validation_methods:
@@ -290,7 +300,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter):
290300
# check references values are valid
291301
if "references" in self.validation_methods and "Descriptor" not in endpoint: # Descriptors have no references
292302
self.lightbeam.api.do_oauth()
293-
error_message = self.has_invalid_references(payload, path="")
303+
error_message = self.has_invalid_references(endpoint, payload, path="")
294304
if error_message != "":
295305
self.log_validation_error(endpoint, file_name, line_counter, "references", error_message)
296306

@@ -360,40 +370,46 @@ def has_invalid_descriptor_values(self, payload, path=""):
360370
return ""
361371

362372
# Validates descriptor values for a single payload (returns an error message or empty string)
363-
def has_invalid_references(self, payload, path=""):
373+
def has_invalid_references(self, endpoint, payload, path=""):
364374
for k in payload.keys():
365375
if isinstance(payload[k], dict) and not k.endswith("Reference"):
366-
value = self.has_invalid_references(payload[k], path+("." if path!="" else "")+k)
376+
value = self.has_invalid_references(endpoint, payload[k], path+("." if path!="" else "")+k)
367377
if value!="": return value
368378
elif isinstance(payload[k], list):
369379
for i in range(0, len(payload[k])):
370-
value = self.has_invalid_references(payload[k][i], path+("." if path!="" else "")+k+"["+str(i)+"]")
380+
value = self.has_invalid_references(endpoint, payload[k][i], path+("." if path!="" else "")+k+"["+str(i)+"]")
371381
if value!="": return value
372382
elif isinstance(payload[k], dict) and k.endswith("Reference"):
383+
check_this_reference = (
384+
(f"{endpoint}.{path}{k}" in self.validation_references_selector and self.validation_references_behavior=="include")
385+
or (f"{endpoint}.{path}{k}" not in self.validation_references_selector and self.validation_references_behavior=="exclude")
386+
)
387+
if not check_this_reference: continue
373388
is_valid_reference = False
374389
original_endpoint = util.pluralize_endpoint(k.replace("Reference",""))
375390

391+
params = payload[k].copy()
392+
if "link" in params.keys(): del params["link"]
393+
376394
# this deals with the fact that an educationOrganizationReference may be to a school, LEA, etc.:
377395
endpoints_to_check = self.EDFI_GENERICS_TO_RESOURCES_MAPPING.get(original_endpoint, [original_endpoint])
378-
for endpoint in endpoints_to_check:
396+
for endpt in endpoints_to_check:
379397
# check if it's a local reference:
380-
if endpoint not in self.local_reference_cache.keys(): break
398+
if endpt not in self.local_reference_cache.keys(): break
381399
# construct cache_key for reference
382-
cache_key = self.get_cache_key(payload[k])
383-
if cache_key in self.local_reference_cache[endpoint]:
400+
cache_key = self.get_cache_key(params)
401+
if cache_key in self.local_reference_cache[endpt]:
384402
is_valid_reference = True
385403
break
386-
if not is_valid_reference: # not found in local data...
387-
for endpoint in endpoints_to_check:
404+
if not is_valid_reference and self.validation_references_remote: # not found in local data...
405+
for endpt in endpoints_to_check:
388406
# check if it's a remote reference:
389-
params = payload[k].copy()
390-
if "link" in params.keys(): del params["link"]
391-
value = self.remote_reference_exists(endpoint, params)
407+
value = self.remote_reference_exists(endpt, params)
392408
if value:
393409
is_valid_reference = True
394410
break
395-
if not is_valid_reference:
396-
return f"payload contains an invalid {k} " + (" (at "+path+"): " if path!="" else ": ") + json.dumps(params)
411+
if not is_valid_reference:
412+
return f"payload contains an invalid {k} " + (" (at "+path+"): " if path!="" else ": ") + json.dumps(params)
397413
return ""
398414

399415
# Tells you if a specified descriptor value is valid or not
@@ -443,7 +459,7 @@ def remote_reference_exists(self, endpoint, params):
443459
else:
444460
pass # await asyncio.sleep(1)
445461
curr_token_version = int(str(self.lightbeam.token_version))
446-
elif status=='404':
462+
elif status=='404' or status=='400':
447463
return False
448464
elif status in ['200', '201']:
449465
# 200 response might still return zero matching records...

0 commit comments

Comments
 (0)