Skip to content

Commit 02cee3c

Browse files
authored
Merge pull request #62 from edanalytics/main
update `lightbeam create` branch to latest main
2 parents 02d17a2 + 496d9a1 commit 02cee3c

8 files changed

Lines changed: 74 additions & 44 deletions

File tree

CHANGELOG.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
### v0.1.6
2+
<details>
3+
<summary>Released 2024-11-15</summary>
4+
5+
* feature: [implement structured results file output for validate](https://github.com/edanalytics/lightbeam/pull/52)
6+
* bugfix: [`--results-file` required a directory prefix](https://github.com/edanalytics/lightbeam/pull/57)
7+
* bugfix: [`validate` of descriptor values without a hash char (`#`) failed](https://github.com/edanalytics/lightbeam/pull/53)
8+
* bugfix: [validate uniqueness by identity instead of required](https://github.com/edanalytics/lightbeam/pull/54)
9+
* bugfix: [allow deletion of students (if permitted by credential claimset and API auth strategy)](https://github.com/edanalytics/lightbeam/pull/55)
10+
* bugfix: [make `max_failures` truly optional](https://github.com/edanalytics/lightbeam/pull/58)
11+
</details>
12+
113
### v0.1.5
214
<details>
315
<summary>Released 2024-07-23</summary>
@@ -113,4 +125,4 @@
113125
<summary>Released 2022-09-22</summary>
114126

115127
* initial release
116-
</details>
128+
</details>

lightbeam/VERSION.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.1.5
1+
0.1.6

lightbeam/api.py

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ async def load_descriptors_values(self):
304304
for key in v.keys():
305305
if key.endswith("Id"): descriptor = key[0:-2]
306306
self.descriptor_values.append([descriptor, v["namespace"], v["codeValue"], v["shortDescription"], v.get("description", "")])
307-
307+
308308
# save
309309
if self.lightbeam.track_state:
310310
self.logger.debug(f"saving descriptor values to {cache_file}...")
@@ -331,33 +331,47 @@ async def load_descriptors_values(self):
331331
# }
332332
# (The first element is a required attribute of the assessmentItem; the other two are required elements
333333
# of the required nested assessmentReference.)
334-
def get_params_for_endpoint(self, endpoint):
334+
def get_params_for_endpoint(self, endpoint, type='required'):
335335
if "Descriptor" in endpoint: swagger = self.descriptors_swagger
336336
else: swagger = self.resources_swagger
337-
definition = util.camel_case(self.lightbeam.config["namespace"]) + "_" + util.singularize_endpoint(endpoint)
338-
return self.get_required_params_from_swagger(swagger, definition)
337+
definition = util.get_swagger_ref_for_endpoint(self.lightbeam.config["namespace"], swagger, endpoint)
338+
if type=='required':
339+
return self.get_required_params_from_swagger(swagger, definition)
340+
else:
341+
# descriptor endpoints all have the same structure and identity fields:
342+
if "Descriptor" in endpoint:
343+
return { 'namespace':'namespace', 'codeValue':'codeValue', 'shortDescription':'shortDescription'}
344+
else:
345+
return self.get_identity_params_from_swagger(swagger, definition)
339346

340347
def get_required_params_from_swagger(self, swagger, definition, prefix=""):
341348
params = {}
342-
use_definitions = False
343-
if "definitions" in swagger.keys():
344-
schema = swagger["definitions"][definition]
345-
use_definitions = True
346-
elif "components" in swagger.keys() and "schemas" in swagger["components"].keys():
347-
schema = swagger["components"]["schemas"][definition]
348-
else:
349+
schema = util.resolve_swagger_ref(swagger, definition)
350+
if not schema:
349351
self.logger.critical(f"Swagger contains neither `definitions` nor `components.schemas` - check that the Swagger is valid.")
350352

351-
for requiredProperty in schema["required"]:
352-
if "$ref" in schema["properties"][requiredProperty].keys():
353-
sub_definition = schema["properties"][requiredProperty]["$ref"]
354-
if use_definitions:
355-
sub_definition = sub_definition.replace("#/definitions/", "")
356-
else:
357-
sub_definition = sub_definition.replace("#/components/schemas/", "")
358-
sub_params = self.get_required_params_from_swagger(swagger, sub_definition, prefix=requiredProperty+".")
353+
for prop in schema["required"]:
354+
if "$ref" in schema["properties"][prop].keys():
355+
sub_definition = schema["properties"][prop]["$ref"]
356+
sub_params = self.get_required_params_from_swagger(swagger, sub_definition, prefix=prop+".")
357+
for k,v in sub_params.items():
358+
params[k] = v
359+
elif schema["properties"][prop]["type"]!="array":
360+
params[prop] = prefix + prop
361+
return params
362+
363+
def get_identity_params_from_swagger(self, swagger, definition, prefix=""):
364+
params = {}
365+
schema = util.resolve_swagger_ref(swagger, definition)
366+
if not schema:
367+
self.logger.critical(f"Swagger contains neither `definitions` nor `components.schemas` - check that the Swagger is valid.")
368+
369+
for prop in schema["properties"]:
370+
if prop.endswith("Reference") and "required" in schema.keys() and prop in schema['required'] and "$ref" in schema["properties"][prop].keys():
371+
sub_definition = schema["properties"][prop]["$ref"]
372+
sub_params = self.get_identity_params_from_swagger(swagger, sub_definition, prefix=prop+".")
359373
for k,v in sub_params.items():
360374
params[k] = v
361-
elif schema["properties"][requiredProperty]["type"]!="array":
362-
params[requiredProperty] = prefix + requiredProperty
375+
elif "type" in schema["properties"][prop].keys() and schema["properties"][prop]["type"]!="array" and "x-Ed-Fi-isIdentity" in schema["properties"][prop].keys():
376+
params[prop] = prefix + prop
363377
return params

lightbeam/delete.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,6 @@ def delete(self):
3636
self.lightbeam.confirm_delete(endpoints)
3737

3838
for endpoint in endpoints:
39-
# it doesn't seem possible to delete students once you've sent them
40-
# (I think because other entities may have referenced them in the meantime)
41-
if endpoint=='students':
42-
self.logger.warn("data for {0} endpoint cannot be deleted (this is an Ed-Fi limitation); skipping".format(endpoint))
43-
continue
44-
4539
asyncio.run(self.do_deletes(endpoint))
4640
self.logger.info("finished processing endpoint {0}!".format(endpoint))
4741
self.logger.info(" (final status counts: {0})".format(self.lightbeam.status_counts))

lightbeam/lightbeam.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def __init__(self, config_file, logger=None, selector="*", exclude="", keep_keys
8282
self.truncator = Truncator(self)
8383
self.api = EdFiAPI(self)
8484
self.token_version = 0
85-
self.results_file = results_file
85+
self.results_file = os.path.abspath(results_file) if results_file else None
8686
self.start_timestamp = datetime.now()
8787

8888
# load params and/or env vars for config YAML interpolation
@@ -168,7 +168,8 @@ def write_structured_output(self, command):
168168
# failures.line_numbers are split each on their own line; here we remove those line breaks
169169
content = re.sub(r'"line_numbers": \[(\d|,|\s|\n)*\]', self.replace_linebreaks, content)
170170
fp.write(content)
171-
self.logger.info(f"results written to {self.results_file}")
171+
172+
self.logger.info(f"results written to {self.results_file}")
172173

173174

174175
def load_config_file(self) -> dict:

lightbeam/truncate.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,6 @@ def truncate(self):
2828
self.lightbeam.confirm_truncate(endpoints)
2929

3030
for endpoint in endpoints:
31-
# it doesn't seem possible to delete students once you've sent them
32-
# (I think because other entities may have referenced them in the meantime)
33-
# if endpoint=='students':
34-
# self.logger.warn("data for {0} endpoint cannot be deleted (this is an Ed-Fi limitation); skipping".format(endpoint))
35-
# continue
36-
3731
asyncio.run(self.do_truncates(endpoint))
3832
self.logger.info("finished processing endpoint {0}!".format(endpoint))
3933
self.logger.info(" (final status counts: {0})".format(self.lightbeam.status_counts))

lightbeam/util.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,18 @@ def keys_match(key, wildcard_key):
7979
if key==wildcard_key: return True
8080
if wildcard_key.startswith("*") and key.endswith(wildcard_key.lstrip("*")): return True
8181
if wildcard_key.endswith("*") and key.startswith(wildcard_key.rstrip("*")): return True
82-
return False
82+
return False
83+
84+
def get_swagger_ref_for_endpoint(namespace, swagger, endpoint):
85+
if "definitions" in swagger.keys():
86+
return "#/definitions/" + camel_case(namespace) + "_" + singularize_endpoint(endpoint)
87+
elif "components" in swagger.keys() and "schemas" in swagger["components"].keys():
88+
return "#/components/schemas/" + camel_case(namespace) + "_" + singularize_endpoint(endpoint)
89+
90+
def resolve_swagger_ref(swagger, ref):
91+
if "definitions" in swagger.keys():
92+
definition = ref.replace("#/definitions/", "")
93+
return swagger["definitions"][definition]
94+
elif "components" in swagger.keys() and "schemas" in swagger["components"].keys():
95+
definition = ref.replace("#/components/schemas/", "")
96+
return swagger["components"]["schemas"][definition]

lightbeam/validate.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ class Validator:
1414
MAX_VALIDATION_ERRORS_TO_DISPLAY = 10
1515
MAX_VALIDATE_TASK_QUEUE_SIZE = 100
1616
DEFAULT_VALIDATION_METHODS = ["schema", "descriptors", "uniqueness"]
17-
DEFAULT_FAIL_FAST_THRESHOLD = 10
1817

1918
EDFI_GENERICS_TO_RESOURCES_MAPPING = {
2019
"educationOrganizations": ["localEducationAgencies", "stateEducationAgencies", "schools"],
@@ -35,7 +34,7 @@ def __init__(self, lightbeam=None):
3534
def validate(self):
3635

3736
# The below should go in __init__(), but rely on lightbeam.config which is not yet available there.
38-
self.fail_fast_threshold = self.lightbeam.config.get("validate",{}).get("references",{}).get("max_failures", self.DEFAULT_FAIL_FAST_THRESHOLD)
37+
self.fail_fast_threshold = self.lightbeam.config.get("validate",{}).get("references",{}).get("max_failures", None)
3938
self.validation_methods = self.lightbeam.config.get("validate",{}).get("methods",self.DEFAULT_VALIDATION_METHODS)
4039
if type(self.validation_methods)==str and (self.validation_methods=="*" or self.validation_methods.lower()=='all'):
4140
self.validation_methods = self.DEFAULT_VALIDATION_METHODS
@@ -219,7 +218,7 @@ async def validate_endpoint(self, endpoint):
219218
self.lightbeam.metadata["resources"][endpoint]["records_failed"] = self.lightbeam.num_errors
220219

221220
# implement "fail fast" feature:
222-
if self.lightbeam.num_errors >= self.fail_fast_threshold:
221+
if self.fail_fast_threshold is not None and self.lightbeam.num_errors >= self.fail_fast_threshold:
223222
self.lightbeam.shutdown("validate")
224223
self.logger.critical(f"... STOPPING; found {self.lightbeam.num_errors} >= validate.references.max_failures={self.fail_fast_threshold} VALIDATION ERRORS.")
225224
break
@@ -235,7 +234,7 @@ async def validate_endpoint(self, endpoint):
235234

236235

237236
async def do_validate_payload(self, endpoint, file_name, data, line_counter):
238-
if self.lightbeam.num_errors >= self.fail_fast_threshold: return
237+
if self.fail_fast_threshold is not None and self.lightbeam.num_errors >= self.fail_fast_threshold: return
239238
definition = self.get_swagger_definition_for_endpoint(endpoint)
240239
if "Descriptor" in endpoint:
241240
swagger = self.lightbeam.api.descriptors_swagger
@@ -251,7 +250,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter):
251250

252251
resolver = RefResolver("test", swagger, swagger)
253252
validator = Draft4Validator(resource_schema, resolver=resolver)
254-
params_structure = self.lightbeam.api.get_params_for_endpoint(endpoint)
253+
identity_params_structure = self.lightbeam.api.get_params_for_endpoint(endpoint, type='identity')
255254
distinct_params = []
256255

257256
# check payload is valid JSON
@@ -281,7 +280,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_counter):
281280

282281
# check natural keys are unique
283282
if "uniqueness" in self.validation_methods:
284-
params = json.dumps(util.interpolate_params(params_structure, payload))
283+
params = json.dumps(util.interpolate_params(identity_params_structure, payload))
285284
params_hash = hashlog.get_hash(params)
286285
if params_hash in distinct_params:
287286
self.log_validation_error(endpoint, file_name, line_counter, "uniqueness", "duplicate value(s) for natural key(s): {params}")
@@ -344,6 +343,8 @@ def has_invalid_descriptor_values(self, payload, path=""):
344343
value = self.has_invalid_descriptor_values(payload[k][i], path+("." if path!="" else "")+k+"["+str(i)+"]")
345344
if value!="": return value
346345
elif isinstance(payload[k], str) and k.endswith("Descriptor"):
346+
if "#" not in payload[k]:
347+
return payload[k] + f" is not a valid descriptor value for {k}" + (" (at " + path + ")" if path!="" else "") + "; format should be like `uri://namespace.org/SomeDescriptor#SomeValue`"
347348
namespace = payload[k].split("#")[0]
348349
codeValue = payload[k].split("#")[1]
349350
# check if it's a local descriptor:

0 commit comments

Comments
 (0)