diff --git a/src/evaluate/loading.py b/src/evaluate/loading.py index 505015b1..8fca3258 100644 --- a/src/evaluate/loading.py +++ b/src/evaluate/loading.py @@ -330,7 +330,9 @@ def _copy_script_and_other_resources_in_importable_dir( shutil.copyfile(original_local_path, importable_local_file) # Record metadata associating original dataset path with local unique folder - meta_path = importable_local_file.split(".py")[0] + ".json" + # (use splitext so an ancestor directory containing ".py" — pyenv, pycache, + # pypy paths — doesn't truncate the prefix to before that directory) + meta_path = os.path.splitext(importable_local_file)[0] + ".json" if not os.path.exists(meta_path): meta = {"original file path": original_local_path, "local file path": importable_local_file} # the filename is *.py in our case, so better rename to filenam.json instead of filename.py.json diff --git a/tests/test_load.py b/tests/test_load.py index e20ea671..a1e0f003 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -138,3 +138,47 @@ def test_cache_with_remote_community_module(self): evaluation_module_factory( metric, download_config=self.download_config, dynamic_modules_path=self.dynamic_modules_path ) + + +def test_copy_script_metadata_path_when_ancestor_dir_contains_py(): + """Regression: meta_path derivation when an ancestor directory contains ".py". + + The previous implementation used `importable_local_file.split(".py")[0]`, + which splits on every occurrence, so a cache path under ~/.pyenv/... wrote + the metadata to ~/.json — outside the cache tree — instead of next to the + copied script. + """ + import json + + from evaluate.loading import _copy_script_and_other_resources_in_importable_dir + + with tempfile.TemporaryDirectory() as root: + # Simulate a pyenv-style cache path: ancestor directory contains ".py". + importable_directory_path = os.path.join(root, ".pyenv", "evaluate_modules") + # The FileLock acquired inside the function needs the parent dir to exist. + os.makedirs(os.path.dirname(importable_directory_path), exist_ok=True) + subdirectory_name = "abcd1234" + name = "accuracy" + + original_script_path = os.path.join(root, "src_accuracy.py") + with open(original_script_path, "w", encoding="utf-8") as f: + f.write("# dummy metric script\n") + + _copy_script_and_other_resources_in_importable_dir( + name=name, + importable_directory_path=importable_directory_path, + subdirectory_name=subdirectory_name, + original_local_path=original_script_path, + local_imports=[], + additional_files=[], + download_mode=None, + ) + + expected_meta = os.path.join(importable_directory_path, subdirectory_name, name + ".json") + leaked_meta = os.path.join(root, ".json") + + assert os.path.exists(expected_meta), f"meta file missing at {expected_meta}" + assert not os.path.exists(leaked_meta), f"meta file leaked to {leaked_meta}" + with open(expected_meta, "r", encoding="utf-8") as f: + meta = json.load(f) + assert meta["original file path"] == original_script_path