From 609223859506dd8a7c241c1035233798c3cb1585 Mon Sep 17 00:00:00 2001 From: LeSingh1 Date: Tue, 26 May 2026 09:19:10 -0700 Subject: [PATCH 1/2] fix(client): sanitize NO_PROXY whitespace before httpx init NO_PROXY values that contain newlines or tabs (common when set from Docker, .env files, or shell scripts that wrap lines) used to break OpenAI() / AsyncOpenAI() construction with httpx.InvalidURL: the hostname token includes the whitespace and httpx rejects it during its proxy-mounts setup before any request is made. The underlying parse bug is in httpx (get_environment_proxies splits only on commas), but the encode/httpx project is not currently accepting external PRs, so the practical workaround is for the SDK to normalize the value just for the duration of the internal httpx client's __init__. Introduce a _sanitized_proxy_env() context manager that swaps any whitespace inside NO_PROXY / no_proxy for commas, wraps each of the three internal httpx wrappers (_DefaultHttpxClient, _DefaultAsyncHttpxClient, _DefaultAioHttpClient), and restores the original value on exit so user code observes the env var it set. Fixes #3303. --- src/openai/_base_client.py | 40 +++++++++++++++++++++++++++++++++++--- tests/test_client.py | 26 +++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py index 216b36aabd..ffbdf1ed9d 100644 --- a/src/openai/_base_client.py +++ b/src/openai/_base_client.py @@ -1,5 +1,7 @@ from __future__ import annotations +import os +import re import sys import json import time @@ -10,6 +12,7 @@ import logging import platform import warnings +import contextlib import email.utils from types import TracebackType from random import random @@ -831,12 +834,41 @@ def _idempotency_key(self) -> str: return f"stainless-python-retry-{uuid.uuid4()}" +@contextlib.contextmanager +def _sanitized_proxy_env() -> Iterator[None]: + """Temporarily replace any whitespace inside `NO_PROXY` / `no_proxy` with commas. + + httpx parses these env vars by splitting on commas only, so newlines or + tabs that sneak in via Docker, .env files, or shell scripts become part + of the hostname and trigger `httpx.InvalidURL` during client construction. + Normalize them just for the duration of the wrapped httpx initialization + so the parsed proxy bypass list is well-formed. See openai/openai-python#3303. + """ + saved: Dict[str, Optional[str]] = {} + for name in ("NO_PROXY", "no_proxy"): + original = os.environ.get(name) + saved[name] = original + if original is not None and re.search(r"\s", original): + cleaned = re.sub(r"\s+", ",", original.strip()) + cleaned = re.sub(r",+", ",", cleaned).strip(",") + os.environ[name] = cleaned + try: + yield + finally: + for name, original in saved.items(): + if original is None: + os.environ.pop(name, None) + else: + os.environ[name] = original + + class _DefaultHttpxClient(httpx.Client): def __init__(self, **kwargs: Any) -> None: kwargs.setdefault("timeout", DEFAULT_TIMEOUT) kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) kwargs.setdefault("follow_redirects", True) - super().__init__(**kwargs) + with _sanitized_proxy_env(): + super().__init__(**kwargs) if TYPE_CHECKING: @@ -1423,7 +1455,8 @@ def __init__(self, **kwargs: Any) -> None: kwargs.setdefault("timeout", DEFAULT_TIMEOUT) kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) kwargs.setdefault("follow_redirects", True) - super().__init__(**kwargs) + with _sanitized_proxy_env(): + super().__init__(**kwargs) try: @@ -1441,7 +1474,8 @@ def __init__(self, **kwargs: Any) -> None: kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) kwargs.setdefault("follow_redirects", True) - super().__init__(**kwargs) + with _sanitized_proxy_env(): + super().__init__(**kwargs) if TYPE_CHECKING: diff --git a/tests/test_client.py b/tests/test_client.py index 2d8955a58e..a0306ad8fe 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1292,6 +1292,23 @@ def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> N assert len(mounts) == 1 assert mounts[0][0].pattern == "https://" + def test_no_proxy_with_whitespace_does_not_raise(self, monkeypatch: pytest.MonkeyPatch) -> None: + # Regression test for openai/openai-python#3303: NO_PROXY values that + # contain newlines or other whitespace (common in Docker, .env files, + # and shell scripts) used to break client construction with + # httpx.InvalidURL because httpx splits NO_PROXY only by comma. + # The sanitizer in _DefaultHttpxClient.__init__ normalizes whitespace + # to commas just for the duration of the httpx init, then restores + # the original env var. + monkeypatch.setenv("NO_PROXY", "localhost\nexample.com\t192.168.1.1") + monkeypatch.delenv("no_proxy", raising=False) + + # Construction must not raise. + DefaultHttpxClient() + + # The original env var is restored so user code observes what it set. + assert os.environ["NO_PROXY"] == "localhost\nexample.com\t192.168.1.1" + @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning") def test_default_client_creation(self) -> None: # Ensure that the client can be initialized without any exceptions @@ -2552,6 +2569,15 @@ async def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch assert len(mounts) == 1 assert mounts[0][0].pattern == "https://" + async def test_no_proxy_with_whitespace_does_not_raise(self, monkeypatch: pytest.MonkeyPatch) -> None: + # Async counterpart of the sync regression test for #3303. + monkeypatch.setenv("NO_PROXY", "localhost\nexample.com\t192.168.1.1") + monkeypatch.delenv("no_proxy", raising=False) + + DefaultAsyncHttpxClient() + + assert os.environ["NO_PROXY"] == "localhost\nexample.com\t192.168.1.1" + @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning") async def test_default_client_creation(self) -> None: # Ensure that the client can be initialized without any exceptions From ef872e1650e46ed81ef2216612d387d85a11a467 Mon Sep 17 00:00:00 2001 From: LeSingh1 Date: Tue, 26 May 2026 10:01:01 -0700 Subject: [PATCH 2/2] fix(client): serialize _sanitized_proxy_env env-var mutation The contextmanager added in 6092238 mutates os.environ to clean up NO_PROXY whitespace, which is process-wide and not thread-safe. Two clients constructed on different threads can interleave: thread A saves the original and cleans the env, thread B enters during that window and snapshots the already-cleaned value as its 'original', then both threads restore in turn, leaving NO_PROXY permanently sanitized for the rest of the process. Wrap the env mutation in a module-level threading.Lock so the save/clean/restore sequence runs atomically. Add a concurrency regression test that spins up 8 threads constructing 20 clients each and asserts NO_PROXY equals the caller's original string at the end. Codex review caught this on the original PR. --- src/openai/_base_client.py | 48 ++++++++++++++++++++++++-------------- tests/test_client.py | 33 ++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 17 deletions(-) diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py index ffbdf1ed9d..3340b41d77 100644 --- a/src/openai/_base_client.py +++ b/src/openai/_base_client.py @@ -11,6 +11,7 @@ import inspect import logging import platform +import threading import warnings import contextlib import email.utils @@ -834,6 +835,12 @@ def _idempotency_key(self) -> str: return f"stainless-python-retry-{uuid.uuid4()}" +# Serializes the temporary NO_PROXY mutation in `_sanitized_proxy_env` so +# overlapping client constructions from multiple threads cannot snapshot +# each other's cleaned value and leave the env permanently sanitized. +_proxy_env_lock = threading.Lock() + + @contextlib.contextmanager def _sanitized_proxy_env() -> Iterator[None]: """Temporarily replace any whitespace inside `NO_PROXY` / `no_proxy` with commas. @@ -842,24 +849,31 @@ def _sanitized_proxy_env() -> Iterator[None]: tabs that sneak in via Docker, .env files, or shell scripts become part of the hostname and trigger `httpx.InvalidURL` during client construction. Normalize them just for the duration of the wrapped httpx initialization - so the parsed proxy bypass list is well-formed. See openai/openai-python#3303. + so the parsed proxy bypass list is well-formed, then restore the + original value. See openai/openai-python#3303. + + The mutation is serialized by `_proxy_env_lock` because `os.environ` is + process-wide; without the lock, concurrent client constructions could + snapshot each other's already-cleaned value as their "original" and + leave NO_PROXY permanently sanitized for the rest of the process. """ - saved: Dict[str, Optional[str]] = {} - for name in ("NO_PROXY", "no_proxy"): - original = os.environ.get(name) - saved[name] = original - if original is not None and re.search(r"\s", original): - cleaned = re.sub(r"\s+", ",", original.strip()) - cleaned = re.sub(r",+", ",", cleaned).strip(",") - os.environ[name] = cleaned - try: - yield - finally: - for name, original in saved.items(): - if original is None: - os.environ.pop(name, None) - else: - os.environ[name] = original + with _proxy_env_lock: + saved: Dict[str, Optional[str]] = {} + for name in ("NO_PROXY", "no_proxy"): + original = os.environ.get(name) + saved[name] = original + if original is not None and re.search(r"\s", original): + cleaned = re.sub(r"\s+", ",", original.strip()) + cleaned = re.sub(r",+", ",", cleaned).strip(",") + os.environ[name] = cleaned + try: + yield + finally: + for name, original in saved.items(): + if original is None: + os.environ.pop(name, None) + else: + os.environ[name] = original class _DefaultHttpxClient(httpx.Client): diff --git a/tests/test_client.py b/tests/test_client.py index a0306ad8fe..850fa8428a 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1309,6 +1309,39 @@ def test_no_proxy_with_whitespace_does_not_raise(self, monkeypatch: pytest.Monke # The original env var is restored so user code observes what it set. assert os.environ["NO_PROXY"] == "localhost\nexample.com\t192.168.1.1" + def test_no_proxy_concurrent_construction_preserves_original( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + # Without the lock around _sanitized_proxy_env, two threads that + # construct clients at the same time could snapshot each other's + # already-cleaned value as the "original" and leave NO_PROXY + # permanently sanitized for the rest of the process. Spin up + # several constructions in parallel and assert the env is the + # same string the caller set when everything has finished. + import threading + + original = "localhost\nexample.com\t192.168.1.1\n10.0.0.0/8" + monkeypatch.setenv("NO_PROXY", original) + monkeypatch.delenv("no_proxy", raising=False) + + errors: list[BaseException] = [] + + def construct() -> None: + try: + for _ in range(20): + DefaultHttpxClient() + except BaseException as e: + errors.append(e) + + threads = [threading.Thread(target=construct) for _ in range(8)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert not errors, f"client construction raised: {errors!r}" + assert os.environ["NO_PROXY"] == original + @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning") def test_default_client_creation(self) -> None: # Ensure that the client can be initialized without any exceptions