Skip to content

Commit d0a6c44

Browse files
authored
Merge branch 'main' into fix/command_list
2 parents 02caae2 + 3246e38 commit d0a6c44

10 files changed

Lines changed: 142 additions & 77 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
<details>
33

44
* bugfix: [Ensure command list in help menu and log output is always consistent](https://github.com/edanalytics/lightbeam/pull/27)
5+
* bugfix: Fix how hashlog entries are removed during `lightbeam delete`
56
</details>
67

78
### v0.1.2

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ data_dir: ./
4343
namespace: ed-fi
4444
edfi_api:
4545
base_url: https://api.schooldistrict.org/v5.3/api
46+
oauth_url: https://api.schooldistrict.org/v5.3/api/oauth/token
47+
dependencies_url: https://api.schooldistrict.org/v5.3/api/metadata/data/v3/2024/dependencies
48+
descriptors_swagger_url: https://api.schooldistrict.org/v5.3/api/metadata/data/v3/2024/descriptors/swagger.json
49+
resources_swagger_url: https://api.schooldistrict.org/v5.3/api/metadata/data/v3/2024/resources/swagger.json
4650
version: 3
4751
mode: year_specific
4852
year: 2021
@@ -68,6 +72,11 @@ show_stacktrace: True
6872
* (optional) Specify the `namespace` to use when accessing the Ed-Fi API. The default is `ed-fi` but others include `tpdm` or custom values. To send data to multiple namespaces, you must use a YAML configuration file and `lightbeam send` for each.
6973
* Specify the details of the `edfi_api` to which to connect including
7074
* (optional) The `base_url` which serves a JSON object specifying the paths to data endpoints, Swagger, and dependencies. The default is `https://localhost/api` (the address of an Ed-Fi API [running locally in Docker](https://techdocs.ed-fi.org/display/EDFITOOLS/Docker+Deployment)), but the location varies depending on how Ed-Fi is deployed.
75+
* If the metadata for a particular API is not located in the "default" location (at the root of the base_url), then ALL the following urls should be explicitly specified. These can normally be left blank, unless you are encountering errors indicating that the metadata files cannot be found (such as "Could not parse response from [base_url]").
76+
* (optional) `oauth_url` (usually [base_url]/oauth/token)
77+
* (optional) `dependencies_url` (usually [base_url]/metadata/data/v3/dependencies)
78+
* (optional) `descriptors_swagger_url` (usually [base_url]/metadata/data/v3/descriptors/swagger.json)
79+
* (optional) `resources_swagger_url` (usually [base_url]/metadata/data/v3/resources/swagger.json)
7180
* The `version` as one of `3` or `2` (`2` is currently unsupported).
7281
* (optional) The `mode` as one of `shared_instance`, `sandbox`, `district_specific`, `year_specific`, or `instance_year_specific`.
7382
* (required if `mode` is `year_specific` or `instance_year_specific`) The `year` used to build the resource URL. The default is the current year.

lightbeam/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def main(argv=None):
148148
logger=logger,
149149
selector=args.selector or "*",
150150
exclude=args.exclude or "",
151-
keep_keys=args.keep_keys or "",
151+
keep_keys=args.keep_keys or "*",
152152
drop_keys=args.drop_keys or "",
153153
query=args.query or "{}",
154154
params=args.params,

lightbeam/api.py

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,22 @@ def prepare(self):
3535
except Exception as e:
3636
self.logger.critical("could not connect to {0} ({1})".format(self.config["base_url"], str(e)))
3737

38-
try:
39-
api_base = api_base.json()
40-
except Exception as e:
41-
self.logger.critical("could not parse response from {0} ({1})".format(self.config["base_url"], str(e)))
42-
43-
self.config["oauth_url"] = api_base["urls"]["oauth"]
44-
self.config["dependencies_url"] = api_base["urls"]["dependencies"]
38+
# Data URL doesn't rely on metadata connection
4539
self.config["data_url"] = self.get_data_url()
46-
self.config["open_api_metadata_url"] = api_base["urls"]["openApiMetadata"]
40+
41+
# If ALL urls are set in config (probably from source/destination file),
42+
# then they don't need to be pulled from api metadata, so this section can be skipped.
43+
# This is most common if the api metadata json files are not in the "default" location.
44+
# Otherwise, pull urls from api metadata.
45+
if self.config["oauth_url"] == "" or self.config["dependencies_url"]== "":
46+
try:
47+
api_base = api_base.json()
48+
except Exception as e:
49+
self.logger.critical("could not parse response from {0} ({1})".format(self.config["base_url"], str(e)))
50+
51+
self.config["oauth_url"] = api_base["urls"]["oauth"]
52+
self.config["dependencies_url"] = api_base["urls"]["dependencies"]
53+
self.config["open_api_metadata_url"] = api_base["urls"]["openApiMetadata"]
4754

4855
# load all endpoints in dependency-order
4956
all_endpoints = self.get_sorted_endpoints()
@@ -160,17 +167,33 @@ def get_sorted_endpoints(self):
160167

161168
# Loads the Swagger JSON from the Ed-Fi API
162169
def load_swagger_docs(self):
163-
# grab Descriptors and Resources swagger URLs
164-
try:
165-
self.logger.debug("fetching swagger docs...")
166-
response = requests.get(self.config["open_api_metadata_url"],
167-
verify=self.lightbeam.config["connection"]["verify_ssl"])
168-
if not response.ok:
169-
raise Exception("OpenAPI metadata URL returned status {0} ({1})".format(response.status_code, (response.content[:75] + "...") if len(response.content)>75 else response.content))
170-
openapi = response.json()
171170

172-
except Exception as e:
173-
self.logger.critical("Unable to load Swagger docs from API... terminating. Check API connectivity.")
171+
# If the metadata URL is set (pulled from root metadata file earlier), then pull endpoint urls from metadata
172+
if "open_api_metadata_url" in self.config.keys() and self.config["open_api_metadata_url"] != "":
173+
# grab Descriptors and Resources swagger URLs
174+
try:
175+
self.logger.debug("fetching swagger docs...")
176+
response = requests.get(self.config["open_api_metadata_url"],
177+
verify=self.lightbeam.config["connection"]["verify_ssl"])
178+
if not response.ok:
179+
raise Exception("OpenAPI metadata URL returned status {0} ({1})".format(response.status_code, (response.content[:75] + "...") if len(response.content)>75 else response.content))
180+
openapi = response.json()
181+
182+
except Exception as e:
183+
self.logger.critical("Unable to load Swagger docs from API... terminating. Check API connectivity.")
184+
185+
# If metadata URL is not found, set endpoint URLs from config file
186+
else:
187+
openapi = [
188+
{
189+
"name": "descriptors",
190+
"endpointUri": self.config["descriptors_swagger_url"],
191+
},
192+
{
193+
"name": "resources",
194+
"endpointUri": self.config["resources_swagger_url"],
195+
}
196+
]
174197

175198
# load (or re-use cached) Descriptors and Resources swagger
176199
self.descriptors_swagger = None
@@ -187,8 +210,8 @@ def load_swagger_docs(self):
187210
if endpoint_type=="descriptors" or endpoint_type=="resources":
188211
swagger_url = endpoint["endpointUri"]
189212
if self.lightbeam.track_state:
190-
hash = hashlog.get_hash_string(swagger_url)
191-
file = os.path.join(cache_dir, f"swagger-{endpoint_type}-{hash}.json")
213+
url_hash = hashlog.get_hash_string(swagger_url)
214+
file = os.path.join(cache_dir, f"swagger-{endpoint_type}-{url_hash}.json")
192215
if (
193216
self.lightbeam.track_state # we have a state_dir in which to store
194217
and not self.lightbeam.wipe # we aren't clearing the cache
@@ -234,8 +257,8 @@ async def load_descriptors_values(self):
234257
os.mkdir(cache_dir)
235258

236259
# check for cached descriptor values
237-
hash = hashlog.get_hash_string(self.config["base_url"])
238-
cache_file = os.path.join(cache_dir, f"descriptor-values-{hash}.csv")
260+
url_hash = hashlog.get_hash_string(self.config["base_url"])
261+
cache_file = os.path.join(cache_dir, f"descriptor-values-{url_hash}.csv")
239262

240263
self.lightbeam.reset_counters()
241264
if (
@@ -256,8 +279,12 @@ async def load_descriptors_values(self):
256279
# load descriptor values from API
257280
selector_backup = self.lightbeam.selector
258281
exclude_backup = self.lightbeam.exclude
282+
keep_keys_backup = self.lightbeam.keep_keys
283+
drop_keys_backup = self.lightbeam.drop_keys
259284
self.lightbeam.selector = "*Descriptors"
260285
self.lightbeam.exclude = ""
286+
self.lightbeam.keep_keys = "*"
287+
self.lightbeam.drop_keys = ""
261288
self.logger.debug(f"fetching descriptor values...")
262289
all_endpoints = self.get_sorted_endpoints()
263290
self.lightbeam.endpoints = self.apply_filters(all_endpoints)
@@ -281,6 +308,8 @@ async def load_descriptors_values(self):
281308
self.lightbeam.results = []
282309
self.lightbeam.selector = selector_backup
283310
self.lightbeam.exclude = exclude_backup
311+
self.lightbeam.keep_keys = keep_keys_backup
312+
self.lightbeam.drop_keys = drop_keys_backup
284313
self.prepare()
285314

286315

lightbeam/delete.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,13 @@ async def do_deletes(self, endpoint):
9191
params = util.interpolate_params(params_structure, data)
9292

9393
# check if we've posted this data before
94-
hash = hashlog.get_hash(data)
95-
if self.lightbeam.track_state and hash in self.hashlog_data.keys():
94+
data_hash = hashlog.get_hash(data)
95+
if self.lightbeam.track_state and data_hash in self.hashlog_data.keys():
9696
# check if the last post meets criteria for a delete
97-
if self.lightbeam.meets_process_criteria(self.hashlog_data[hash]):
97+
if self.lightbeam.meets_process_criteria(self.hashlog_data[data_hash]):
9898
# yes, we need to delete it; append to task queue
9999
tasks.append(asyncio.create_task(
100-
self.do_delete(endpoint, file_name, params, counter)))
101-
102-
# remove the payload from the hashlog
103-
del self.hashlog_data[hash]
100+
self.do_delete(endpoint, file_name, params, counter, data_hash)))
104101
else:
105102
# no, do not delete
106103
self.lightbeam.num_skipped += 1
@@ -128,7 +125,7 @@ async def do_deletes(self, endpoint):
128125
hashlog.save(hashlog_file, self.hashlog_data)
129126

130127
# Deletes a single payload for a single endpoint
131-
async def do_delete(self, endpoint, file_name, params, line):
128+
async def do_delete(self, endpoint, file_name, params, line, data_hash=None):
132129
curr_token_version = int(str(self.lightbeam.token_version))
133130
while True: # this is not great practice, but an effective way (along with the `break` below) to achieve a do:while loop
134131
try:
@@ -163,7 +160,7 @@ async def do_delete(self, endpoint, file_name, params, line):
163160
if type(j)==list and len(j)==1:
164161
the_id = j[0]['id']
165162
# now we can delete by `id`
166-
await self.do_delete_id(endpoint, the_id, file_name, line)
163+
await self.do_delete_id(endpoint, the_id, file_name, line, data_hash)
167164
break
168165

169166
elif type(j)==list and len(j)==0: skip_reason = "payload not found in API"
@@ -195,7 +192,7 @@ async def do_delete(self, endpoint, file_name, params, line):
195192
self.logger.error(" (at line {0} of {1}; ID: {2} )".format(line, file_name, id))
196193
break
197194

198-
async def do_delete_id(self, endpoint, id, file_name=None, line=None):
195+
async def do_delete_id(self, endpoint, id, file_name=None, line=None, data_hash=None):
199196
curr_token_version = int(str(self.lightbeam.token_version))
200197
while True: # this is not great practice, but an effective way (along with the `break` below) to achieve a do:while loop
201198
try:
@@ -209,12 +206,15 @@ async def do_delete_id(self, endpoint, id, file_name=None, line=None):
209206
if status!=401:
210207
self.lightbeam.num_finished += 1
211208
self.lightbeam.increment_status_counts(status)
212-
if self.lightbeam.track_state:
213-
del self.hashlog_data[hash]
214209
if status not in [ 204 ]:
215210
message = str(status) + ": " + util.linearize(body)
216211
self.lightbeam.increment_status_reason(message)
217212
self.lightbeam.num_errors += 1
213+
else:
214+
if self.lightbeam.track_state and data_hash is not None:
215+
# if we're certain delete was successful, remove this
216+
# line of data from internal tracking
217+
del self.hashlog_data[data_hash]
218218
break # (out of while loop)
219219
else:
220220
# this could be broken out to a separate function call,
@@ -233,4 +233,4 @@ async def do_delete_id(self, endpoint, id, file_name=None, line=None):
233233
self.logger.exception(e, exc_info=self.lightbeam.config["show_stacktrace"])
234234
if line and file_name:
235235
self.logger.error(" (at line {0} of {1}; ID: {2} )".format(line, file_name, id))
236-
break
236+
break

lightbeam/fetch.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ async def get_endpoint_records(self, endpoint, limit, offset, file_handle=None):
8989
payload_keys = list(values[0].keys())
9090
final_keys = util.apply_selections(payload_keys, self.lightbeam.keep_keys, self.lightbeam.drop_keys)
9191
do_key_filtering = len(payload_keys) != len(final_keys)
92+
9293
for v in values:
93-
if do_key_filtering: row = {k: v[k] for k in final_keys}
94+
if do_key_filtering: row = {k: v.get(k, None) for k in final_keys} #v.get() to account for missing keys
9495
else: row = v
9596
if file_handle: file_handle.write(json.dumps(row)+"\n")
9697
else: self.lightbeam.results.append(row)

lightbeam/lightbeam.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ class Lightbeam:
2424
"namespace": "ed-fi",
2525
"edfi_api": {
2626
"base_url": "",
27+
"oauth_url": "",
28+
"dependencies_url": "",
29+
"descriptors_swagger_url": "",
30+
"resources_swagger_url": "",
2731
"version": 3,
2832
"mode": "year_specific",
2933
"year": datetime.today().year,
@@ -50,7 +54,7 @@ class Lightbeam:
5054
MAX_STATUS_REASONS_TO_DISPLAY = 10
5155
DATA_FILE_EXTENSIONS = ['json', 'jsonl', 'ndjson']
5256

53-
def __init__(self, config_file, logger=None, selector="*", exclude="", keep_keys="", drop_keys="", query="{}", params="", wipe=False, force=False, older_than="", newer_than="", resend_status_codes="", results_file=""):
57+
def __init__(self, config_file, logger=None, selector="*", exclude="", keep_keys="*", drop_keys="", query="{}", params="", wipe=False, force=False, older_than="", newer_than="", resend_status_codes="", results_file=""):
5458
self.config_file = config_file
5559
self.logger = logger
5660
self.errors = 0

0 commit comments

Comments
 (0)