From f05e9f61e2aae7842a165374f1b175414809988b Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 31 Mar 2026 17:13:06 -0700 Subject: [PATCH 1/4] Prototype of package.site.toml files --- Lib/site.py | 168 ++++++++++++++++++++- Lib/test/test_site.py | 337 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 500 insertions(+), 5 deletions(-) diff --git a/Lib/site.py b/Lib/site.py index 30015b3f26b4b3..dd5602188e3731 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -79,6 +79,10 @@ import stat import errno +lazy import importlib +lazy import tomllib +lazy import traceback + # Prefixes for site-packages; add additional prefixes like /usr/local here PREFIXES = [sys.prefix, sys.exec_prefix] # Enable per user site-packages directory @@ -163,6 +167,130 @@ def _init_pathinfo(): return d +class _SiteTOMLData: + """Parsed data from a single .site.toml file.""" + __slots__ = ('filename', 'sitedir', 'metadata', 'dirs', 'init') + + def __init__(self, filename, sitedir, metadata, dirs, init): + self.filename = filename # str: basename e.g. "foo.site.toml" + self.sitedir = sitedir # str: absolute path to site-packages dir + self.metadata = metadata # dict: raw [metadata] table (may be empty) + self.dirs = dirs # list[str]: validated [paths].dirs (may be empty) + self.init = init # list[str]: validated [entrypoints].init (may be empty) + + +def _read_site_toml(sitedir, name): + """Parse a .site.toml file and return a _SiteTOMLData, or None on error.""" + fullname = os.path.join(sitedir, name) + + # Check that name.site.toml file exists and is not hidden. + try: + st = os.lstat(fullname) + except OSError: + return None + if ((getattr(st, 'st_flags', 0) & stat.UF_HIDDEN) or + (getattr(st, 'st_file_attributes', 0) & stat.FILE_ATTRIBUTE_HIDDEN)): + _trace(f"Skipping hidden .site.toml file: {fullname!r}") + return None + + _trace(f"Processing .site.toml file: {fullname!r}") + + try: + with io.open_code(fullname) as f: + raw = f.read() + except OSError: + return None + + try: + data = tomllib.loads(raw.decode("utf-8")) + except Exception as exc: + _trace(f"Error parsing {fullname!r}: {exc}") + return None + + metadata = data.get("metadata", []) + + # Validate [paths].dirs + dirs = [] + if (paths_table := data.get("paths")) is not None: + if (raw_dirs := paths_table.get("dirs")) is not None: + if (isinstance(raw_dirs, list) and + all(isinstance(d, str) for d in raw_dirs)): + dirs = raw_dirs + else: + _trace(f"Invalid 'dirs' in {fullname!r}: " + f"expected list of strings") + + # Validate [entrypoints].init + init = [] + if (ep_table := data.get("entrypoints")) is not None: + if (raw_init := ep_table.get("init")) is not None: + if (isinstance(raw_init, list) and + all(isinstance(e, str) for e in raw_init)): + init = raw_init + else: + _trace(f"Invalid 'init' in {fullname!r}: " + f"expected list of strings") + + return _SiteTOMLData(name, sitedir, metadata, dirs, init) + + +def _process_site_toml_paths(toml_data_list, known_paths): + """Process [paths] from all parsed .site.toml data.""" + for td in toml_data_list: + for dir_entry in td.dirs: + try: + # The {sitedir} placeholder expands to the site directory where the pkg.site.toml + # file was found. When placed at the beginning of the path, this is the explicit + # way to name directories relative to sitedir. + dir_entry = dir_entry.replace("{sitedir}", td.sitedir) + # For backward compatibility with .pth files, relative directories are implicitly + # anchored to sitedir. + if not os.path.isabs(dir_entry): + dir_entry = os.path.join(td.sitedir, dir_entry) + dir, dircase = makepath(dir_entry) + if dircase not in known_paths and os.path.exists(dir): + sys.path.append(dir) + known_paths.add(dircase) + except Exception as exc: + fullname = os.path.join(td.sitedir, td.filename) + print(f"Error processing path {dir_entry!r} " + f"from {fullname}:", + file=sys.stderr) + for record in traceback.format_exception(exc): + for line in record.splitlines(): + print(' ' + line, file=sys.stderr) + + +def _process_site_toml_entrypoints(toml_data_list): + """Execute [entrypoints] from all parsed .site.toml data.""" + for td in toml_data_list: + for entry in td.init: + try: + # Parse "package.module:callable" format. When the optional :callable is not given, + # the entire string will end up in the last item, so swap things around. + modname, colon, funcname = entry.rpartition(':') + if colon != ':': + modname = funcname + funcname = None + + _trace(f"Executing entrypoint: {entry!r} " + f"from {td.filename!r}") + + mod = importlib.import_module(modname) + + # Call the callable if given. + if funcname is not None: + func = getattr(mod, funcname) + func() + except Exception as exc: + fullname = os.path.join(td.sitedir, td.filename) + print(f"Error in entrypoint {entry!r} from {fullname}:", + file=sys.stderr) + for record in traceback.format_exception(exc): + for line in record.splitlines(): + print(' ' + line, file=sys.stderr) + + def addpackage(sitedir, name, known_paths): """Process a .pth file within the site-packages directory: For each line in the file, either combine it with sitedir to a path @@ -230,8 +358,8 @@ def addpackage(sitedir, name, known_paths): def addsitedir(sitedir, known_paths=None): - """Add 'sitedir' argument to sys.path if missing and handle .pth files in - 'sitedir'""" + """Add 'sitedir' argument to sys.path if missing and handle .site.toml + and .pth files in 'sitedir'""" _trace(f"Adding directory: {sitedir!r}") if known_paths is None: known_paths = _init_pathinfo() @@ -246,10 +374,40 @@ def addsitedir(sitedir, known_paths=None): names = os.listdir(sitedir) except OSError: return - names = [name for name in names - if name.endswith(".pth") and not name.startswith(".")] - for name in sorted(names): + + # Phase 1: Discover and parse .site.toml files, sorted alphabetically. + toml_names = sorted( + name for name in names + if name.endswith(".site.toml") and not name.startswith(".") + ) + + toml_data_list = [] + superseded_pth = set() + + for name in toml_names: + # "foo.site.toml" supersedes "foo.pth" + base = name.removesuffix(".site.toml") + pth_name = base + ".pth" + if pth_name in names: + superseded_pth.add(pth_name) + td = _read_site_toml(sitedir, name) + if td is not None: + toml_data_list.append(td) + + # Phase 2: Process all .site.toml data (paths first, then entrypoints) + if toml_data_list: + _process_site_toml_paths(toml_data_list, known_paths) + _process_site_toml_entrypoints(toml_data_list) + + # Phase 3: Process remaining .pth files + pth_names = sorted( + name for name in names + if name.endswith(".pth") and not name.startswith(".") + and name not in superseded_pth + ) + for name in pth_names: addpackage(sitedir, name, known_paths) + if reset: known_paths = None return known_paths diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index e7dc5e2611c2de..3ecab6f6f30174 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -908,5 +908,342 @@ def test_both_args(self): self.assertEqual(output, excepted_output) +class SiteTomlTests(unittest.TestCase): + """Tests for .site.toml file processing.""" + + def setUp(self): + self.sys_path = sys.path[:] + self.tmpdir = self.sitedir = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, self.tmpdir) + + def tearDown(self): + sys.path[:] = self.sys_path + + def _make_site_toml(self, content, name='testpkg'): + """Write a .site.toml and return its name.""" + basename = name + '.site.toml' + filepath = os.path.join(self.tmpdir, basename) + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + return basename + + def _make_pth(self, content, name='testpkg'): + """Write a .pth file and return its name.""" + basename = name + '.pth' + filepath = os.path.join(self.tmpdir, basename) + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + return basename + + # --- _read_site_toml tests --- + + def test_read_site_toml_basic(self): + # Valid .site.toml with all sections. + subdir = os.path.join(self.tmpdir, 'subdir') + os.mkdir(subdir) + name = self._make_site_toml("""\ +[metadata] +schema_version = 1 +package = "testpkg" + +[paths] +dirs = ["subdir"] + +[entrypoints] +init = ["os"] +""") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNotNone(tomldata) + self.assertEqual(tomldata.filename, name) + self.assertEqual(tomldata.sitedir, self.sitedir) + self.assertEqual(tomldata.metadata, { + 'schema_version': 1, 'package': 'testpkg'}) + self.assertEqual(tomldata.dirs, ['subdir']) + self.assertEqual(tomldata.init, ['os']) + + def test_read_site_toml_parse_error(self): + # Invalid pkg.site.toml content is skipped. + name = self._make_site_toml("not valid [[[toml") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNone(tomldata) + + def test_read_site_toml_invalid_dirs_type(self): + # dirs must be a list of strings. + name = self._make_site_toml("""\ +[paths] +dirs = "not_a_list" +""") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertEqual(tomldata.dirs, []) + + def test_read_site_toml_invalid_init_type(self): + # init must be a list of strings + name = self._make_site_toml("""\ +[paths] +dirs = ["subdir"] + +[entrypoints] +init = 42 +""") + subdir = os.path.join(self.tmpdir, 'subdir') + os.mkdir(subdir) + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNotNone(tomldata) + self.assertEqual(tomldata.dirs, ['subdir']) + self.assertEqual(tomldata.init, []) + + def test_read_site_toml_empty_file(self): + # Empty .site.toml is a no-op. + name = self._make_site_toml("") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertEqual(tomldata.metadata, []) + self.assertEqual(tomldata.dirs, []) + self.assertEqual(tomldata.init, []) + + def test_read_site_toml_unknown_tables_ignored(self): + # Unknown tables should not cause errors. + name = self._make_site_toml("""\ +[metadata] +schema_version = 1 + +[unknown_section] +key = "value" + +[entrypoints] +init = ["os"] +""") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNotNone(tomldata) + self.assertEqual(tomldata.metadata, {'schema_version': 1}) + self.assertEqual(tomldata.init, ['os']) + + def test_read_site_toml_nonexistent(self): + # Nonexistent file returns None. + tomldata = site._read_site_toml(self.tmpdir, 'nonexistent.site.toml') + self.assertIsNone(tomldata) + + # --- Path processing tests --- + + def test_process_paths_relative(self): + # Relative paths are joined with sitedir. + subdir = os.path.join(self.sitedir, 'mylib') + os.mkdir(subdir) + name = self._make_site_toml("""\ +[paths] +dirs = ["mylib"] +""") + known_paths = set() + tomldata = site._read_site_toml(self.sitedir, name) + site._process_site_toml_paths([tomldata], known_paths) + self.assertIn(subdir, sys.path) + + def test_process_paths_absolute(self): + # Absolute paths are preserved as-is. + absdir = os.path.join(self.sitedir, 'abslib') + os.mkdir(absdir) + name = self._make_site_toml( + f'[paths]\ndirs = ["{absdir}"]\n') + known_paths = set() + tomldata = site._read_site_toml(self.sitedir, name) + site._process_site_toml_paths([tomldata], known_paths) + self.assertIn(absdir, sys.path) + + def test_process_paths_sitedir_placeholder(self): + # The {sitedir} placeholder expands to the site-packages dir. + subdir = os.path.join(self.sitedir, 'extra') + os.mkdir(subdir) + name = self._make_site_toml("""\ +[paths] +dirs = ["{sitedir}/extra"] +""") + known_paths = set() + tomldata = site._read_site_toml(self.sitedir, name) + site._process_site_toml_paths([tomldata], known_paths) + self.assertIn(os.path.join(self.tmpdir, 'extra'), sys.path) + + def test_process_paths_deduplication(self): + # Same path from two different files are only added once. + subdir = os.path.join(self.tmpdir, 'shared') + os.mkdir(subdir) + tomldata1 = site._SiteTOMLData( + 'a.site.toml', self.tmpdir, [], ['shared'], []) + tomldata2 = site._SiteTOMLData( + 'b.site.toml', self.tmpdir, [], ['shared'], []) + known_paths = set() + site._process_site_toml_paths([tomldata1, tomldata2], known_paths) + self.assertEqual(sys.path.count(subdir), 1) + + def test_process_paths_nonexistent(self): + # Nonexistent directories are not added. + tomldata = site._SiteTOMLData( + 'test.site.toml', self.tmpdir, [], ['nosuchdir'], []) + known_paths = set() + sys_path = sys.path[:] + site._process_site_toml_paths([tomldata], known_paths) + self.assertEqual(sys.path, sys_path) + + # --- Entrypoint tests --- + + def test_process_entrypoints_import_only(self): + # Import-only entrypoint (no callable). + mod_dir = os.path.join(self.sitedir, 'epmod') + os.mkdir(mod_dir) + init_file = os.path.join(mod_dir, '__init__.py') + with open(init_file, 'w') as f: + f.write("""\ +called = False +def startup(): + global called + called = True +""") + sys.path.insert(0, self.sitedir) + self.addCleanup(sys.modules.pop, 'epmod', None) + tomldata = site._SiteTOMLData( + 'test.site.toml', self.sitedir, [], [self.sitedir], ['epmod']) + site._process_site_toml_entrypoints([tomldata]) + import epmod + self.assertFalse(epmod.called) + + def test_process_entrypoints_with_callable(self): + # Entrypoint with callable is invoked. + # + # Create a module with a function that sets a flag. + mod_dir = os.path.join(self.sitedir, 'epmod') + os.mkdir(mod_dir) + init_file = os.path.join(mod_dir, '__init__.py') + with open(init_file, 'w') as f: + f.write("""\ +called = False +def startup(): + global called + called = True +""") + sys.path.insert(0, self.sitedir) + self.addCleanup(sys.modules.pop, 'epmod', None) + tomldata = site._SiteTOMLData( + 'test.site.toml', self.sitedir, [], [self.sitedir], ['epmod:startup']) + site._process_site_toml_entrypoints([tomldata]) + import epmod + self.assertTrue(epmod.called) + + def test_process_entrypoints_import_error(self): + # Import error prints traceback but continues. + tomldata = site._SiteTOMLData( + 'test.site.toml', self.sitedir, [], self.sitedir, + ['nosuchmodule_xyz', 'os']) + with captured_stderr() as err: + site._process_site_toml_entrypoints([tomldata]) + self.assertIn('nosuchmodule_xyz', err.getvalue()) + self.assertIn('Traceback', err.getvalue()) + # 'os' should still have been processed (no exception for it) + + def test_process_entrypoints_callable_error(self): + # Callable that raises prints traceback but continues. + mod_dir = os.path.join(self.sitedir, 'badmod') + os.mkdir(mod_dir) + init_file = os.path.join(mod_dir, '__init__.py') + with open(init_file, 'w') as f: + f.write(""" +def fail(): + raise RuntimeError("boom") +""") + sys.path.insert(0, self.sitedir) + self.addCleanup(sys.modules.pop, 'badmod') + tomldata = site._SiteTOMLData( + 'test.site.toml', self.tmpdir, None, None, + ['badmod:fail', 'os']) + with captured_stderr() as err: + site._process_site_toml_entrypoints([tomldata]) + self.assertIn('RuntimeError', err.getvalue()) + self.assertIn('boom', err.getvalue()) + + # --- addsitedir integration tests --- + + def test_addsitedir_toml_supersedes_pth(self): + # When both foo.site.toml and foo.pth exist, only .toml is used. + # + # Start by creating two directories which will be the paths that both the foo.site.toml and + # foo.site.pth files will try to add respectively. + toml_dir = os.path.join(self.sitedir, 'tomlpath') + pth_dir = os.path.join(self.sitedir, 'pthpath') + os.mkdir(toml_dir) + os.mkdir(pth_dir) + + self._make_site_toml("""\ +[paths] +dirs = ["tomlpath"] +""", name='foo') + self._make_pth("pthpath\n", name='foo') + + site.addsitedir(self.sitedir, set()) + self.assertIn(toml_dir, sys.path) + self.assertNotIn(pth_dir, sys.path) + + def test_addsitedir_toml_and_pth_coexist(self): + # Different basenames: both .toml and .pth are processed. + toml_dir = os.path.join(self.sitedir, 'tomlpath') + pth_dir = os.path.join(self.sitedir, 'pthpath') + os.mkdir(toml_dir) + os.mkdir(pth_dir) + + self._make_site_toml("""\ +[paths] +dirs = ["tomlpath"] +""", name='foo') + self._make_pth("pthpath\n", name='bar') + + site.addsitedir(self.sitedir, set()) + self.assertIn(toml_dir, sys.path) + self.assertIn(pth_dir, sys.path) + + def test_addsitedir_paths_before_entrypoints(self): + # Paths from .site.toml are added before entrypoints execution. + # + # Create a module in a subdir that will only be importable if the path + # is added first. + mod_dir = os.path.join(self.sitedir, 'initlib') + os.mkdir(mod_dir) + mod_file = os.path.join(mod_dir, 'initmod.py') + with open(mod_file, 'w') as f: + f.write('loaded = True\n') + + self._make_site_toml("""\ +[paths] +dirs = ["initlib"] + +[entrypoints] +init = ["initmod"] +""") + + self.addCleanup(sys.modules.pop, 'initmod') + site.addsitedir(self.sitedir, set()) + import initmod + self.assertTrue(initmod.loaded) + + def test_addsitedir_alphabetical_order(self): + # Multiple .site.toml files are processed alphabetically. + dir_a = os.path.join(self.tmpdir, 'aaa') + dir_b = os.path.join(self.tmpdir, 'bbb') + os.mkdir(dir_a) + os.mkdir(dir_b) + + # Create zzz.site.toml first, then aaa.site.toml + self._make_site_toml("""\ +[paths] +dirs = ['bbb'] +""", name='zzz') + self._make_site_toml("""\ +[paths] +dirs = ['aaa'] +""", name='aaa') + + site.addsitedir(self.sitedir, set()) + # Both should be in sys.path; aaa before bbb since aaa.site.toml is + # processed before zzz.site.toml + idx_a = sys.path.index(dir_a) + idx_b = sys.path.index(dir_b) + self.assertLess(idx_a, idx_b) + + if __name__ == "__main__": unittest.main() From 594f347f43dc4421ed1c04ded99b1bb0441f7200 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 31 Mar 2026 17:29:12 -0700 Subject: [PATCH 2/4] Validate [metadata].schema_version It's okay to be missing, but if it's given it must be an expected version (i.e. currently, 1). --- Lib/site.py | 9 ++++++++- Lib/test/test_site.py | 23 ++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/Lib/site.py b/Lib/site.py index dd5602188e3731..326f04aecbc79b 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -207,7 +207,14 @@ def _read_site_toml(sitedir, name): _trace(f"Error parsing {fullname!r}: {exc}") return None - metadata = data.get("metadata", []) + metadata = data.get("metadata", {}) + # Validate the TOML schema version. PEP XXX defines schema_version == 1. Both the [metadata] + # section and [metadata].schema_version are optional, but if missing, future compatibility + # cannot be guaranteed. + if (schema_version := metadata.get("schema_version")) is not None: + if schema_version != 1: + _trace(f"Unsupported [metadata].schema_version: {schema_version}") + return None # Validate [paths].dirs dirs = [] diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index 3ecab6f6f30174..8c7c095113830f 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -961,6 +961,27 @@ def test_read_site_toml_basic(self): self.assertEqual(tomldata.dirs, ['subdir']) self.assertEqual(tomldata.init, ['os']) + def test_missing_schema_version_is_okay(self): + # It's okay for the schema_version to be missing, or even the [metadata] section entirely + # (which is tested below). A missing schema_version just means that no future compatibility + # can be guaranteed. + name = self._make_site_toml("""\ +[metadata] +""") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNotNone(tomldata) + self.assertEqual(tomldata.metadata, {}) + + def test_unexpected_schema_version_is_not_okay(self): + # If [metadata].schema_version exists, but isn't a supported number, then the entire TOML + # file is invalid and ignored. + name = self._make_site_toml("""\ +[metadata] +schema_version = 801 +""") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNone(tomldata) + def test_read_site_toml_parse_error(self): # Invalid pkg.site.toml content is skipped. name = self._make_site_toml("not valid [[[toml") @@ -996,7 +1017,7 @@ def test_read_site_toml_empty_file(self): # Empty .site.toml is a no-op. name = self._make_site_toml("") tomldata = site._read_site_toml(self.sitedir, name) - self.assertEqual(tomldata.metadata, []) + self.assertEqual(tomldata.metadata, {}) self.assertEqual(tomldata.dirs, []) self.assertEqual(tomldata.init, []) From ca6e4edca34bdd708b253d82d3aba1aeb1713079 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 31 Mar 2026 18:57:01 -0700 Subject: [PATCH 3/4] Added PEP 829 draft --- pep-0829.rst | 546 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 546 insertions(+) create mode 100644 pep-0829.rst diff --git a/pep-0829.rst b/pep-0829.rst new file mode 100644 index 00000000000000..5d14ae971c1a5d --- /dev/null +++ b/pep-0829.rst @@ -0,0 +1,546 @@ +PEP: 829 +Title: Structured Startup Configuration via .site.toml Files +Author: Barry Warsaw +Status: Draft +Type: Standards Track +Topic: Packaging +Created: 31-Mar-2026 +Python-Version: 3.15 +Post-History: + + +Abstract +======== + +This PEP proposes a TOML-based configuration file format to replace +the ``.pth`` file mechanism used by ``site.py`` during interpreter +startup. The new format, using files named ``.site.toml``, +provides structured configuration for extending ``sys.path`` and +executing package initialization code, replacing the current ad-hoc +``.pth`` format that conflates path configuration with arbitrary code +execution. + + +Motivation +========== + +Python's ``.pth`` files (processed by ``Lib/site.py`` at startup) +support two functions: + +#. **Extending** ``sys.path`` -- Lines in this file (excluding + comments and lines that start with ``import``) name directories to + be appended to ``sys.path``. Relative paths are implicitly + anchored at the site-packages directory. + +#. **Executing code** -- lines starting with ``import`` (or + ``import\\t``) are executed immediate by passing the source string + to ``exec()``. + +This design has several problems: + +#. Code execution is a side effect of the implementation. Lines that + start with ``import`` can be extended by separating multiple + statements with a semicolon. As long as all the code to be + executed appears on the same line, it all gets executed when the + ``.pth`` file is processed. + +#. ``.pth`` files are essentially unstructured, leading to contents + which are difficult to reason about or verify, and often even + difficult to read. It mixes two potentially useful features with + different security constraints, and no way to separate out these + concerns. + +#. The lack of ``.pth`` file structure also means there's no way to + express metadata, no future-proofing of the format, and no defined + execution or processing order of the contents. + +#. Using ``exec()`` on the file contents during interpreter startup is + a broad attack surface. + +#. There is no explicit concept of an entry point, which is an + established pattern in Python packaging. Packages that require + code execution and initialization at startup abuse ``import`` lines + rather than explicitly declaring entry points. + + +Specification +============= + +This PEP defines a new file format called ``.site.toml`` +which addresses all of the stated problems with ``.pth`` files. Like +``.pth`` files, ``.site.toml`` files are processed at Python +startup time by the ``site.py`` module, which means that the ``-S`` +option, which disables ``site.py`` also disables +``.site.toml`` files. + +The standard library ``tomllib`` package is used to read and process +``.site.toml`` files. + +Any parsing errors cause the entire ``.site.toml`` file to be +ignored and not processed (but it still supersedes any parallel +``.pth`` file). Any errors that occur when importing entry +point modules or calling entry point functions are reported but do no +abort the Python executable. + + +File Naming and Discovery +------------------------- + +* As with ``.pth`` files, packages may optionally install a single + ``.site.toml``, just like the current ``.pth`` file + convention. + +* The naming convention is ``.site.toml``. The ``.site`` + marker distinguishes these from other TOML files that might exist in + site-packages and describes the file's purpose (processed by + ``site.py``). + +* ``.site.toml`` files live in the same site-packages directories + where ``.pth`` files are found today. + +* The discovery rules for ``.site.toml`` files is the same as + ``.pth`` files today. File names that start with a single ``.`` + (e.g. ``.site.toml``) and files with OS-level hidden attributes (``UF_HIDDEN``, + ``FILE_ATTRIBUTE_HIDDEN``) are excluded. + +* The processing order is alphabetical by filename, matching ``.pth`` + behavior. + +* If both ``.site.toml`` and ``.pth`` exist in the same + directory, only the ``.site.toml`` file is processed. + + +Processing Model +---------------- + +All ``.site.toml`` files in a given site-packages directory +are read and parsed into an intermediate data structure before any +processing (i.e. path extension or entry point execution) occurs. +This two-phase approach (read then process) enables: + +* A future **policy mechanism** that can inspect and modify the collected data + before execution (e.g., disabling entry points for specific packages or + enforcing path restrictions). **NOTE**: Such a policy framework is + explicitly out-of-scope for this PEP. + +* Future finer-grained control over the processing of path extensions + and entry point execution. For example, one could imagine special + ``-X`` options, environment variables, or other types of + configuration that allow path extensions only, or can explicitly + manage allow or deny lists of entry points. **NOTE**: Such + configuration options are explicitly out-of-scope for this PEP. + +* Better error reporting. All parsing, format, and data type errors + can be surfaced before any processing occurs. + +Within each site-packages directory, the processing order is: + +#. Discover and parse all ``.site.toml`` files (alphabetically). +#. Process all ``[paths]`` entries from the parsed data. +#. Execute all ``[entrypoints]`` entries from the parsed data. +#. Process any remaining ``.pth`` files that are not superseded by a + ``.site.toml`` file. + +This ensures that path extensions are in place before any entry point code +runs, and that ``.site.toml``-declared paths are available to both +entry point imports and ``.pth`` import lines. + + +.site.toml file schema +------------------------------- + +A ``.site.toml`` file is defined to have three sections, all of which +are optional: + +.. _code-block: toml + + [metadata] + schema_version = 1 + package = "foo" + version = "2.3.4" + author = "A Person " + + [paths] + dirs = ["../lib", "/opt/mylib", "{sitedir}/extra"] + + [entrypoints] + init = ["foo.startup:initialize", "foo.plugins"] + + +``[metadata]`` +'''''''''''''' + +This section contains package and/or file metadata. There are no required +keys, and no semantics are assigned to any keys in this section *except* for +the optional ``schema_version`` key (see below). Any additional keys are +permitted and preserved. + +Defined keys: + +``schema_version`` (integer, recommended) + The TOML file schema version number. Must be the integer ``1`` for this + specification. If present, Python guarantees forward-compatible handling: + future versions will either process the file according to the declared + schema or skip it with a clear diagnostic. It is an error if the + ``schema_version`` is present but has an unsupported value, the entire + file is skipped. If ``schema_version`` is omitted, the file is processed + on a best-effort basis with no forward-compatibility guarantees. + +Recommended keys: + +``package`` (string) + The package name. + +``version`` (string) + The package version. + +``author`` (string) + The package author. Should be + ``email.utils.parseaddr()``-compatible, e.g., + ``"A person "`` or + ``"aperson@example.com"``. + + +``[paths]`` +''''''''''' + +Defined keys: + +``dirs`` + A list of strings specifying directories to append to ``sys.path``. + +Path entries use a hybrid resolution scheme: + +* **Relative paths** are anchored at the site-packages directory (sitedir), + matching current ``.pth`` behavior. For example, ``../lib`` in a file under + ``/usr/lib/python3.15/site-packages/`` resolves to + ``/usr/lib/python3.15/lib``. + +* **Absolute paths** are preserved as-is. For example, ``/opt/mylib`` is used + exactly as written. + +* **Placeholder variables** are supported using ``{name}`` syntax. The + placeholder ``{sitedir}`` expands to the site-packages directory where the + ``.site.toml`` file was found. Thus ``{sitedir}/relpath`` and + ``relpath`` resolve to the same path and this is the explicit form + of the relative path form. + +While only ``{sitedir}`` is defined in this PEP, additional +placeholder variables (e.g., ``{prefix}``, ``{exec_prefix}``, +``{userbase}``) may be defined in future PEPs. + +If ``dirs`` is not a list of strings, a warning is emitted (visible +with ``-v``) and the section is skipped. + +Directories that do not exist on the filesystem are silently skipped, +matching ``.pth`` behavior. Duplicate paths are +de-duplicated, also matching ``.pth`` behavior. + + +``[entrypoints]`` +''''''''''''''''' + +``init`` -- a list of strings specifying entry point references to +execute at startup. Each item uses the standard Python entry point +syntax: ``package.module:callable``. + +* The ``:callable`` portion is optional. If omitted (e.g., + ``package.module``), the module is imported via + ``importlib.import_module()`` but nothing is called. This covers the common + ``.pth`` pattern of ``import foo`` for side effects. + +* Callables are invoked with no arguments. + +* Entries are executed in the listed order. + +* The ``[extras]`` syntax from the packaging entry point spec is not + supported; it is installer metadata and has no meaning at + interpreter startup. + + +General Schema Rules +'''''''''''''''''''' + +* All three sections are optional. An empty ``.site.toml`` + file is a valid no-op. + +* Unknown tables are silently ignored, providing forward compatibility for + future extensions. + +* ``[paths]`` is always processed before ``[entrypoints]``, regardless of the + order the sections appear in the TOML file. + + +Error Handling +-------------- + +Errors are handled differently depending on the phase: + +Phase 1: Reading and Parsing + If a ``.site.toml`` file cannot be opened, decoded, or parsed as + valid TOML, it is skipped and processing continues to the next file. + Errors are reported only when ``-v`` (verbose) is given. + +Phase 2: Execution + If a path entry or entry point raises an exception during processing, the + traceback is printed to ``sys.stderr``, the failing entry is skipped, and + processing continues with the remaining entries in that file and + subsequent files. + +This is a deliberate improvement over ``.pth`` behavior, which aborts +processing the remainder of a file on the first error. + + +Rationale +========= + +TOML as the configuration format + TOML is already used by ``pyproject.toml`` and is familiar to the Python + packaging ecosystem. It is an easily human readable and writable format + that aids in validation and auditing. TOML files are structured and + typed, and can be easily reasoned about. TOML files allows for easy + future extensibility. The ``tomllib`` module is available in the standard + library since Python 3.11. + +The ``.site.toml`` naming convention + A double extension clearly communicates purpose: the ``.site`` marker + indicates this is a site-startup configuration file, while ``.toml`` + indicates the format. This avoids ambiguity with other TOML files that + might exist in site-packages now or in the future. The package name + prefix preserves the current ``.pth`` convention of a single + startup file per package. + +Hybrid path resolution + Implicit relative path joining (matching ``.pth`` behavior) + provides a smooth migration path, while ``{sitedir}`` and future + placeholder variables offer explicit, extensible alternatives. As with + ``.pth`` files, absolute paths are preserved and used verbatim. + +``importlib.import_module()`` instead of ``exec()`` + Using the standard import machinery is more predictable and auditable than + ``exec()``. It integrates with the import system's hooks and logging, and + the ``package.module:callable`` syntax is already well-established in the + Python packaging ecosystem (e.g., ``console_scripts``). Allowing for + optional ``:callable`` syntax preserves the import-side-effect + functionality of ``.pth`` files, making migration easier. + +Two-phase processing + Reading all configuration before executing any of it provides a natural + extension point for future policy mechanisms and makes error reporting + more predictable. + +Alphabetical ordering with no priority mechanism + Packages are installed independently, and there is no external arbiter of + priority. Alphabetical ordering matches ``.pth`` behavior and is + simple to reason about. Priority could be addressed by a future site-wide + policy configuration. + +``schema_version`` as recommended, not required + Requiring ``schema_version`` would make the simplest valid file more + verbose. Making it recommended strikes a balance: files that include it + get forward-compatibility guarantees, while simple files that omit it + still work on a best-effort basis. + +Continue on error rather than abort + The ``.pth`` behavior of aborting the rest of a file on the first + error is unnecessarily harsh. If a package declares three entry points + and one fails, the other two should still run. + + +Backwards Compatibility +======================= + +* ``.pth`` file processing is **not** removed. Both + ``.pth`` and ``.site.toml`` files are discovered + in parallel within each site-packages directory. This preserves + backward compatibility for all existing (pre-migration) packages. + Deprecation of ``.pth`` files is out-of-scope for this PEP. + +* When ``.site.toml`` exists alongside ``.pth``, the + ``.site.toml`` takes precedence and the ``.pth`` file is + skipped, providing for a natural migration path and easy compatibility with + older versions of Python which are unaware of ``.site.toml`` files. + +* Within a site-packages directory, all ``.site.toml`` files + are fully processed (paths and entry points) before any remaining + ``.pth`` files. + +* The ``site.addsitedir()`` public API retains its existing signature + and continues to accept ``known_paths``. + + +Security Implications +===================== + +This PEP improves the security posture of interpreter startup: + +* ``.site.toml`` files replace ``exec()`` with + ``importlib.import_module()`` and explicit ``getattr()`` calls, + which are more constrained and auditable. + +* ``io.open_code()`` is used to read ``.site.toml`` files, ensuring + that audit hooks (:pep:`578`) can monitor file access. + +* The two-phase processing model creates a natural point where a future policy + mechanism could inspect and restrict what gets executed. + +* The ``package.module:callable`` syntax limits execution to + importable modules and their attributes, unlike ``exec()`` which can + run arbitrary code. + +The overall attack surface is not eliminated -- a malicious +``.site.toml`` file can still cause arbitrary code execution via +``init`` entrypoints, but the mechanism proposed in this PEP is more +structured, auditable, and amenable to policy controls. + + +How to Teach This +================= + +For package authors +------------------- + +If your package currently ships a ``.pth`` file, you can migrate to a +``.site.toml`` file. The equivalent of a ``.pth`` file +containing a directory name is: + +.. _code-block: toml + + [paths] + dirs = ["my_directory"] + +The equivalent of a ``.pth`` file containing ``import my_package`` +is: + +.. _code-block: toml + + [entrypoints] + init = ["my_package"] + +If your ``.pth`` file calls a specific function, use the +``module:callable`` syntax: + +.. _code-block: toml + + [entrypoints] + init = ["my_package.startup:initialize"] + +If your ``.pth`` file includes arbitrary code, put that code in a +start up function and use the ``module:callable`` syntax. + +Both ``.pth`` and ``.site.toml`` can coexist during +migration. If both exist for the same package, only the ``.site.toml`` is +processed. Thus, it is recommended that packages compatible with older +Pythons ship both files. + +For tool authors +---------------- + + Build backends and installers should generate ``.site.toml`` files + alongside or instead of ``.pth`` files, depending on the package's + Python support matrix. The TOML format is easy to generate programmatically + using ``tomllib`` (for reading) or string formatting (for writing, since the + schema is simple). + + +Reference Implementation +========================= + +A reference implementation is provided as modifications to ``Lib/site.py``, +adding the following: + +* ``_SiteTOMLData`` -- a ``__slots__`` class holding parsed data from + a single ``.site.toml`` file (metadata, dirs, init). + +* ``_read_site_toml(sitedir, name)`` -- reads and parses a single + ``.site.toml`` file, validates types, and returns a + ``_SiteTOMLData`` instance or ``None`` on error. + +* ``_process_site_toml_paths(toml_data_list, known_paths)`` -- + processes ``[paths].dirs`` from all parsed files, expanding + placeholders and adding directories to ``sys.path`` as appropriate. + +* ``_process_site_toml_entrypoints(toml_data_list)`` -- executes + ``[entrypoints].init`` from all parsed files. + +* Modified ``addsitedir()`` -- orchestrates the three-phase flow: + discover and parse ``.site.toml`` files, process paths and + entry points, then process remaining ``.pth`` files. + +Tests are provided in ``Lib/test/test_site.py`` in the +``SiteTomlTests`` class. + + +Rejected Ideas +============== + +Single configuration file instead of per-package files + A single site-wide configuration file was considered but rejected + because it would require coordination between independently + installed packages and would not mirror the ``.pth`` + convention that tools already understand. + +JSON instead of TOML + JSON lacks comments and is less human-friendly. TOML is already + the standard configuration format in the Python ecosystem via + ``pyproject.toml``. + +YAML instead of TOML + YAML is not in the standard library and has well-documented + parsing pitfalls. + +Python instead of TOML + Python is imperative, TOML is declarative. Thus TOML files are + much more readily validated and reasoned about. + +``$schema`` URL reference + Unlike JSON, TOML has no standard ``$schema`` convention. A + simple integer ``schema_version`` is sufficient and + self-contained. + +Required ``schema_version`` + Requiring ``schema_version`` would make the simplest valid file + more verbose without significant benefit. The recommended-but- + optional approach balances simplicity with future-proofing. + +Separate ``load`` and ``execute`` keys in ``[entrypoints]`` + Splitting import-only and callable entry points into separate lists + was considered but rejected because it complicates execution + ordering. A single ``init`` list with both forms keeps ordering + explicit. + +Priority or weight field for processing order + Since packages are installed independently, there is no arbiter of + priority. Alphabetical ordering matches ``.pth`` + behavior. Priority could be addressed by a future site-wide + policy configuration file, not per-package metadata. + +Passing arguments to callables + Callables are invoked with no arguments for simplicity and parity + with existing ``.pth`` import behavior. Future PEPs may + define an optional context argument (e.g., the parsed TOML data or + a site info object). + + +Open Issues +=========== + +* Should a warning be emitted when both ``.pth`` and + ``.site.toml`` coexist? + +* Should future ``-X`` options provide fine-grained control over + error reporting, unknown table warnings, and entry point execution? + +* Should callables receive context (e.g., the path to the + ``.site.toml`` file, the parsed TOML data, or a site info object)? + +* What additional placeholder variables should be supported beyond + ``{sitedir}``? Candidates include ``{prefix}``, ``{exec_prefix}``, and + ``{userbase}``. + + +Copyright +========= + +This document is placed in the public domain or under the +CC0-1.0-Universal license, whichever is more permissive. From 508f4936a0dce1a5ef15de1c31450867386ce984 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 31 Mar 2026 19:12:27 -0700 Subject: [PATCH 4/4] Update and refine PEP 829 --- Lib/site.py | 2 +- pep-0829.rst | 30 +++++++++++++++++++----------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/Lib/site.py b/Lib/site.py index 326f04aecbc79b..ad46dc6943efd0 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -208,7 +208,7 @@ def _read_site_toml(sitedir, name): return None metadata = data.get("metadata", {}) - # Validate the TOML schema version. PEP XXX defines schema_version == 1. Both the [metadata] + # Validate the TOML schema version. PEP 829 defines schema_version == 1. Both the [metadata] # section and [metadata].schema_version are optional, but if missing, future compatibility # cannot be guaranteed. if (schema_version := metadata.get("schema_version")) is not None: diff --git a/pep-0829.rst b/pep-0829.rst index 5d14ae971c1a5d..d329f42a7f9171 100644 --- a/pep-0829.rst +++ b/pep-0829.rst @@ -33,7 +33,7 @@ support two functions: anchored at the site-packages directory. #. **Executing code** -- lines starting with ``import`` (or - ``import\\t``) are executed immediate by passing the source string + ``import\\t``) are executed immediately by passing the source string to ``exec()``. This design has several problems: @@ -107,7 +107,7 @@ File Naming and Discovery behavior. * If both ``.site.toml`` and ``.pth`` exist in the same - directory, only the ``.site.toml`` file is processed. + directory, only the ``.site.toml`` file is processed. Processing Model @@ -152,7 +152,7 @@ entry point imports and ``.pth`` import lines. A ``.site.toml`` file is defined to have three sections, all of which are optional: -.. _code-block: toml +.. code-block:: toml [metadata] schema_version = 1 @@ -181,9 +181,9 @@ Defined keys: The TOML file schema version number. Must be the integer ``1`` for this specification. If present, Python guarantees forward-compatible handling: future versions will either process the file according to the declared - schema or skip it with a clear diagnostic. It is an error if the - ``schema_version`` is present but has an unsupported value, the entire - file is skipped. If ``schema_version`` is omitted, the file is processed + schema or skip it with a clear diagnostic. If the + ``schema_version`` is present but has an unsupported value, the + entire file is skipped. If ``schema_version`` is omitted, the file is processed on a best-effort basis with no forward-compatibility guarantees. Recommended keys: @@ -279,7 +279,15 @@ Errors are handled differently depending on the phase: Phase 1: Reading and Parsing If a ``.site.toml`` file cannot be opened, decoded, or parsed as valid TOML, it is skipped and processing continues to the next file. - Errors are reported only when ``-v`` (verbose) is given. + Errors are reported only when ``-v`` (verbose) is given. Importantly, + a ``.site.toml`` file that fails to parse **still supersedes** + its corresponding ``.pth`` file. The existence of the + ``.site.toml`` file is sufficient to suppress + ``.pth`` processing, regardless of whether the TOML file + parses successfully. This prevents confusing dual-execution + scenarios and ensures that a broken ``.site.toml`` is + noticed rather than silently masked by fallback to the + ``.pth`` file. Phase 2: Execution If a path entry or entry point raises an exception during processing, the @@ -404,7 +412,7 @@ If your package currently ships a ``.pth`` file, you can migrate to a ``.site.toml`` file. The equivalent of a ``.pth`` file containing a directory name is: -.. _code-block: toml +.. code-block:: toml [paths] dirs = ["my_directory"] @@ -412,7 +420,7 @@ containing a directory name is: The equivalent of a ``.pth`` file containing ``import my_package`` is: -.. _code-block: toml +.. code-block:: toml [entrypoints] init = ["my_package"] @@ -420,7 +428,7 @@ is: If your ``.pth`` file calls a specific function, use the ``module:callable`` syntax: -.. _code-block: toml +.. code-block:: toml [entrypoints] init = ["my_package.startup:initialize"] @@ -429,7 +437,7 @@ If your ``.pth`` file includes arbitrary code, put that code in a start up function and use the ``module:callable`` syntax. Both ``.pth`` and ``.site.toml`` can coexist during -migration. If both exist for the same package, only the ``.site.toml`` is +migration. If both exist for the same package, only the ``.site.toml`` is processed. Thus, it is recommended that packages compatible with older Pythons ship both files.