Skip to content

Commit b9c7e19

Browse files
author
Tom Reitz
committed
PR 31 introduced a bug where multiple casings of a file could be discovered, meaning a file would be processed several times; this fixes that bug
1 parent ce30c37 commit b9c7e19

1 file changed

Lines changed: 8 additions & 0 deletions

File tree

lightbeam/lightbeam.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,13 +264,21 @@ def get_data_files_for_endpoint(self, endpoint):
264264
camelcase_endpoint = endpoint[0].upper() + endpoint[1:]
265265
for cased_endpoint in [endpoint, camelcase_endpoint, endpoint.lower(), endpoint.upper()]:
266266
possible_file = os.path.join(self.config["data_dir"], cased_endpoint + "." + ext)
267+
file_added = False
267268
if os.path.isfile(possible_file):
268269
file_list.append(possible_file)
270+
file_added = True
269271
possible_dir = os.path.join(self.config["data_dir"] + cased_endpoint)
270272
if os.path.isdir(possible_dir):
271273
for file in os.listdir(possible_dir):
272274
if file.endswith("." + ext):
273275
file_list.append(os.path.join(self.config["data_dir"], cased_endpoint, file))
276+
file_added = True
277+
# On some systems, `os.path.isfile()` is case-insensitive, so it would return `True`
278+
# for both `Students.jsonl` and `students.jsonl`... this ensures only one is added
279+
# to `file_list`, to prevent duplicates.
280+
if file_added: break
281+
274282
return file_list
275283

276284
# Prunes the list of endpoints down to those for which .jsonl files exist in the config.data_dir

0 commit comments

Comments
 (0)