File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -264,13 +264,21 @@ def get_data_files_for_endpoint(self, endpoint):
264264 camelcase_endpoint = endpoint [0 ].upper () + endpoint [1 :]
265265 for cased_endpoint in [endpoint , camelcase_endpoint , endpoint .lower (), endpoint .upper ()]:
266266 possible_file = os .path .join (self .config ["data_dir" ], cased_endpoint + "." + ext )
267+ file_added = False
267268 if os .path .isfile (possible_file ):
268269 file_list .append (possible_file )
270+ file_added = True
269271 possible_dir = os .path .join (self .config ["data_dir" ] + cased_endpoint )
270272 if os .path .isdir (possible_dir ):
271273 for file in os .listdir (possible_dir ):
272274 if file .endswith ("." + ext ):
273275 file_list .append (os .path .join (self .config ["data_dir" ], cased_endpoint , file ))
276+ file_added = True
277+ # On some systems, `os.path.isfile()` is case-insensitive, so it would return `True`
278+ # for both `Students.jsonl` and `students.jsonl`... this ensures only one is added
279+ # to `file_list`, to prevent duplicates.
280+ if file_added : break
281+
274282 return file_list
275283
276284 # Prunes the list of endpoints down to those for which .jsonl files exist in the config.data_dir
You can’t perform that action at this time.
0 commit comments