Skip to content

Commit 9f1a7a3

Browse files
authored
Merge pull request #77 from edanalytics/fix/files_discovered_multiple_times
fix duplicate file discovery
2 parents ce30c37 + b9c7e19 commit 9f1a7a3

1 file changed

Lines changed: 8 additions & 0 deletions

File tree

lightbeam/lightbeam.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,13 +264,21 @@ def get_data_files_for_endpoint(self, endpoint):
264264
camelcase_endpoint = endpoint[0].upper() + endpoint[1:]
265265
for cased_endpoint in [endpoint, camelcase_endpoint, endpoint.lower(), endpoint.upper()]:
266266
possible_file = os.path.join(self.config["data_dir"], cased_endpoint + "." + ext)
267+
file_added = False
267268
if os.path.isfile(possible_file):
268269
file_list.append(possible_file)
270+
file_added = True
269271
possible_dir = os.path.join(self.config["data_dir"] + cased_endpoint)
270272
if os.path.isdir(possible_dir):
271273
for file in os.listdir(possible_dir):
272274
if file.endswith("." + ext):
273275
file_list.append(os.path.join(self.config["data_dir"], cased_endpoint, file))
276+
file_added = True
277+
# On some systems, `os.path.isfile()` is case-insensitive, so it would return `True`
278+
# for both `Students.jsonl` and `students.jsonl`... this ensures only one is added
279+
# to `file_list`, to prevent duplicates.
280+
if file_added: break
281+
274282
return file_list
275283

276284
# Prunes the list of endpoints down to those for which .jsonl files exist in the config.data_dir

0 commit comments

Comments
 (0)