From 3e9648bb87078a9fa0437323524ea248b73eeadb Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 09:26:58 +0200 Subject: [PATCH 01/13] fix(starlette): Do not attach eagerly consumed request bodies on streamed spans in async handlers --- sentry_sdk/integrations/starlette.py | 181 ++++++-- .../integrations/starlette/test_starlette.py | 423 +++++++++--------- 2 files changed, 359 insertions(+), 245 deletions(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index 4371ed4f58..c0a9c09cfc 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -8,7 +8,7 @@ from typing import TYPE_CHECKING import sentry_sdk -from sentry_sdk.consts import OP +from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import ( _DEFAULT_FAILED_REQUEST_STATUS_CODES, DidNotEnable, @@ -37,6 +37,7 @@ parse_version, transaction_from_function, ) +from sentry_sdk._types import OVER_SIZE_LIMIT_SUBSTITUTE if TYPE_CHECKING: from typing import Any, Awaitable, Callable, Container, Dict, Optional, Tuple, Union @@ -89,6 +90,9 @@ TRANSACTION_STYLE_VALUES = ("endpoint", "url") +_SCOPE_STATE_JSON_REQUEST_BODY_KEY = "sentry_sdk.json_request_body" +_SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY = "sentry_sdk.formdata_request_body" + class StarletteIntegration(Integration): identifier = "starlette" @@ -147,6 +151,16 @@ def setup_once() -> None: if version >= (0, 24): patch_templates() + def setup_once_with_options( + self: "StarletteIntegration", options: "Optional[dict[str, Any]]" = None + ) -> None: + is_span_streaming_enabled = has_span_streaming_enabled(options) + if not is_span_streaming_enabled: + return + + _patch_json_request_body_accessor() + _patch_formdata_request_body_accessor() + def _enable_span_for_middleware(middleware_class: "Any") -> type: old_call = middleware_class.__call__ @@ -479,61 +493,142 @@ def _is_async_callable(obj: "Any") -> bool: ) -def patch_request_response() -> None: - old_request_response = starlette.routing.request_response +def _patch_json_request_body_accessor(): + """ + Caches request body data on the ASGI scope, so that the body can be attached to telemetry after the request handler runs. + Without the cache, consuming the stream can cause the application to hang. + """ + _original_json = Request.json - def _sentry_request_response(func: "Callable[[Any], Any]") -> "ASGIApp": - old_func = func + @functools.wraps(_original_json) + async def sentry_json(self: "Request"): + request_json = await _original_json(self) + self.scope["state"][_SCOPE_STATE_JSON_REQUEST_BODY_KEY] = request_json + return request_json - is_coroutine = _is_async_callable(old_func) - if is_coroutine: + Request.json = sentry_json - async def _sentry_async_func(*args: "Any", **kwargs: "Any") -> "Any": - client = sentry_sdk.get_client() - integration = client.get_integration(StarletteIntegration) - if integration is None: - return await old_func(*args, **kwargs) - request = args[0] +def _patch_formdata_request_body_accessor(): + """ + Caches request body data on the ASGI scope, so that the body can be attached to telemetry after the request handler runs. + Without the cache, consuming the stream can cause the application to hang. + """ + _original_form = Request.form - _set_transaction_name_and_source( - sentry_sdk.get_current_scope(), - integration.transaction_style, - request, - ) + @functools.wraps(_original_form) + async def sentry_form(self: "Request"): + print("wrapped form") + request_formdata = await _original_form(self) + self.scope["state"][_SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY] = request_formdata + return request_formdata - sentry_scope = sentry_sdk.get_isolation_scope() - extractor = StarletteRequestExtractor(request) - info = await extractor.extract_request_info() + Request.form = sentry_form - def _make_request_event_processor( - req: "Any", integration: "Any" - ) -> "Callable[[Event, dict[str, Any]], Event]": - def event_processor( - event: "Event", hint: "Dict[str, Any]" - ) -> "Event": - # Add info from request to event - request_info = event.get("request", {}) - if info: - if "cookies" in info: - request_info["cookies"] = info["cookies"] - if "data" in info: - request_info["data"] = info["data"] - event["request"] = deepcopy(request_info) - return event +def _serialize_cached_request_body_attribute( + client: "sentry_sdk.client.BaseClient", request: "Request" +) -> "Optional[str]": + """ + Returns a stringified JSON representation of the request body if the request body is cached on the ASGI scope and within size bounds. + """ + if ( + "content-length" not in request.headers + or _SCOPE_STATE_JSON_REQUEST_BODY_KEY not in request.scope["state"] + and _SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY not in request.scope["state"] + ): + return None - return event_processor + content_length = int(request.headers["content-length"]) + if content_length and not request_body_within_bounds(client, content_length): + return OVER_SIZE_LIMIT_SUBSTITUTE - sentry_scope._name = StarletteIntegration.identifier - sentry_scope.add_event_processor( - _make_request_event_processor(request, integration) + if _SCOPE_STATE_JSON_REQUEST_BODY_KEY in request.scope["state"]: + return json.dumps(request.scope["state"][_SCOPE_STATE_JSON_REQUEST_BODY_KEY]) + + form = request.scope["state"][_SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY] + + form_data = {} + for key, val in form.items(): + is_file = isinstance(val, UploadFile) + form_data[key] = val if not is_file else "[Unparsable]" + + return json.dumps(form_data) + + +async def _wrap_async_handler(handler, *args, **kwargs): + """ + Wraps an asynchronous handler function to attach request info to the server segment span. + The request body cached on the ASGI scope is attached to streamed spans, but consuming the request body in the event + processor can still cause application hangs. + """ + client = sentry_sdk.get_client() + integration = client.get_integration(StarletteIntegration) + if integration is None: + return await handler(*args, **kwargs) + + request = args[0] + + _set_transaction_name_and_source( + sentry_sdk.get_current_scope(), + integration.transaction_style, + request, + ) + + sentry_scope = sentry_sdk.get_isolation_scope() + extractor = StarletteRequestExtractor(request) + + info = await extractor.extract_request_info() + + def _make_request_event_processor( + req: "Any", integration: "Any" + ) -> "Callable[[Event, dict[str, Any]], Event]": + def event_processor(event: "Event", hint: "Dict[str, Any]") -> "Event": + # Add info from request to event + request_info = event.get("request", {}) + if info: + if "cookies" in info: + request_info["cookies"] = info["cookies"] + if "data" in info: + request_info["data"] = info["data"] + event["request"] = deepcopy(request_info) + + return event + + return event_processor + + sentry_scope._name = StarletteIntegration.identifier + sentry_scope.add_event_processor( + _make_request_event_processor(request, integration) + ) + + try: + return await handler(*args, **kwargs) + finally: + current_span = _get_current_streamed_span() + + if type(current_span) is StreamedSpan: + serialized_request_body = _serialize_cached_request_body_attribute( + client=client, request=request + ) + if serialized_request_body: + current_span._segment.set_attribute( + SPANDATA.HTTP_REQUEST_BODY_DATA, + serialized_request_body, ) - if has_span_streaming_enabled(client.options): - _set_request_body_data_on_streaming_segment(info) - return await old_func(*args, **kwargs) +def patch_request_response() -> None: + old_request_response = starlette.routing.request_response + + def _sentry_request_response(func: "Callable[[Any], Any]") -> "ASGIApp": + old_func = func + + is_coroutine = _is_async_callable(old_func) + if is_coroutine: + + async def _sentry_async_func(*args: "Any", **kwargs: "Any") -> "Any": + return await _wrap_async_handler(old_func, *args, **kwargs) func = _sentry_async_func diff --git a/tests/integrations/starlette/test_starlette.py b/tests/integrations/starlette/test_starlette.py index 25bdaa1787..3dc76bd095 100644 --- a/tests/integrations/starlette/test_starlette.py +++ b/tests/integrations/starlette/test_starlette.py @@ -18,6 +18,7 @@ StarletteIntegration, ) from sentry_sdk.utils import parse_version +from sentry_sdk.consts import SPANDATA import starlette from starlette.authentication import ( @@ -26,6 +27,7 @@ AuthenticationError, SimpleUser, ) +from starlette.requests import Request from starlette.exceptions import HTTPException from starlette.middleware import Middleware from starlette.middleware.authentication import AuthenticationMiddleware @@ -33,6 +35,10 @@ from starlette.testclient import TestClient from tests.integrations.conftest import parametrize_test_configurable_status_codes +try: + from starlette.middleware.exceptions import ExceptionMiddleware +except ImportError: + from starlette.exceptions import ExceptionMiddleware STARLETTE_VERSION = parse_version(starlette.__version__) @@ -305,117 +311,258 @@ async def my_send(*args, **kwargs): await self.app(scope, partial_receive, partial_send) +@pytest.fixture(autouse=True) +def reset_starlette_integration(uninstall_integration): + original_request_json = Request.json + original_request_form = Request.form + original_starlette_call = starlette.applications.Starlette.__call__ + original_request_response = starlette.routing.request_response + original_middleware_init = Middleware.__init__ + original_authentication_call = AuthenticationMiddleware.__call__ + original_exception_middleware_init = ExceptionMiddleware.__init__ + original_exception_middleware_call = ExceptionMiddleware.__call__ + + yield + + Request.json = original_request_json + Request.form = original_request_form + starlette.applications.Starlette.__call__ = original_starlette_call + starlette.routing.request_response = original_request_response + Middleware.__init__ = original_middleware_init + AuthenticationMiddleware.__call__ = original_authentication_call + ExceptionMiddleware.__init__ = original_exception_middleware_init + ExceptionMiddleware.__call__ = original_exception_middleware_call + + uninstall_integration("starlette") + + @pytest.mark.asyncio -async def test_request_info_json_body(sentry_init, capture_events): +@pytest.mark.parametrize("span_streaming", [True, False]) +async def test_request_info_json_body( + sentry_init, capture_events, capture_items, span_streaming +): sentry_init( traces_sample_rate=1.0, send_default_pii=True, integrations=[StarletteIntegration()], + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static", + }, ) starlette_app = starlette_app_factory() - events = capture_events() - client = TestClient(starlette_app) - client.post( - "/body/json", - json=BODY_JSON, - headers={ - "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", - }, - ) - (event, transaction_event) = events + if span_streaming: + items = capture_items("event", "span") + + client.post( + "/body/json", + json=BODY_JSON, + headers={ + "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", + }, + ) - assert event["request"]["cookies"] == { - "tasty_cookie": "strawberry", - "yummy_cookie": "choco", - } - assert event["request"]["data"] == BODY_JSON + (event,) = (item.payload for item in items if item.type == "event") + assert event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + assert event["request"]["data"] == BODY_JSON - assert transaction_event["request"]["cookies"] == { - "tasty_cookie": "strawberry", - "yummy_cookie": "choco", - } - assert transaction_event["request"]["data"] == BODY_JSON + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + server_span = next( + span for span in spans if span["attributes"]["sentry.op"] == "http.server" + ) + + assert json.loads( + server_span["attributes"][SPANDATA.HTTP_REQUEST_BODY_DATA] + ) == {"some": "json", "for": "testing", "nested": {"numbers": 123}} + else: + events = capture_events() + + client.post( + "/body/json", + json=BODY_JSON, + headers={ + "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", + }, + ) + + (event, transaction_event) = events + + assert event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + assert event["request"]["data"] == BODY_JSON + + assert transaction_event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + assert transaction_event["request"]["data"] == BODY_JSON @pytest.mark.asyncio -async def test_formdata_request_body(sentry_init, capture_events): +@pytest.mark.parametrize("span_streaming", [True, False]) +async def test_formdata_request_body( + sentry_init, capture_events, capture_items, span_streaming +): sentry_init( traces_sample_rate=1.0, send_default_pii=True, max_request_body_size="always", integrations=[StarletteIntegration()], + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static", + }, ) starlette_app = starlette_app_factory() - events = capture_events() - client = TestClient(starlette_app) - client.post( - "/body/form", - data=BODY_FORM.encode("utf-8"), - headers={ - "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", - }, - ) - (event, transaction_event) = events - assert event["request"]["data"].keys() == PARSED_FORM.keys() - assert event["request"]["data"]["username"] == PARSED_FORM["username"] - assert event["request"]["data"]["password"] == "[Filtered]" - assert event["request"]["data"]["photo"] == "" - assert transaction_event["_meta"]["request"]["data"]["photo"] == { - "": {"rem": [["!raw", "x"]]} - } - - assert transaction_event["request"]["data"].keys() == PARSED_FORM.keys() - assert transaction_event["request"]["data"]["username"] == PARSED_FORM["username"] - assert transaction_event["request"]["data"]["password"] == "[Filtered]" - assert transaction_event["request"]["data"]["photo"] == "" - assert transaction_event["_meta"]["request"]["data"]["photo"] == { - "": {"rem": [["!raw", "x"]]} - } + if span_streaming: + items = capture_items("event", "span") + + client.post( + "/body/form", + data=BODY_FORM.encode("utf-8"), + headers={ + "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", + }, + ) + + (event,) = (item.payload for item in items if item.type == "event") + assert event["request"]["data"].keys() == PARSED_FORM.keys() + assert event["request"]["data"]["username"] == PARSED_FORM["username"] + assert event["request"]["data"]["password"] == "[Filtered]" + assert event["request"]["data"]["photo"] == "" + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + server_span = next( + span for span in spans if span["attributes"]["sentry.op"] == "http.server" + ) + + # Going forward, the sanitization of data will need to happen within the `before_send_span` hooks + # See https://sentry.slack.com/archives/C09RR0KD2N7/p1776951331206129?thread_ts=1776951227.440659&cid=C09RR0KD2N7 + parsed_form_attribute = json.loads( + server_span["attributes"][SPANDATA.HTTP_REQUEST_BODY_DATA] + ) + assert parsed_form_attribute.keys() == PARSED_FORM.keys() + assert parsed_form_attribute["username"] == PARSED_FORM["username"] + assert parsed_form_attribute["password"] == "hello123" + assert parsed_form_attribute["photo"] == "[Unparsable]" + else: + events = capture_events() + + client.post( + "/body/form", + data=BODY_FORM.encode("utf-8"), + headers={ + "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", + }, + ) + + (event, transaction_event) = events + assert event["request"]["data"].keys() == PARSED_FORM.keys() + assert event["request"]["data"]["username"] == PARSED_FORM["username"] + assert event["request"]["data"]["password"] == "[Filtered]" + assert event["request"]["data"]["photo"] == "" + assert event["_meta"]["request"]["data"]["photo"] == { + "": {"rem": [["!raw", "x"]]} + } + + assert transaction_event["request"]["data"].keys() == PARSED_FORM.keys() + assert ( + transaction_event["request"]["data"]["username"] == PARSED_FORM["username"] + ) + assert transaction_event["request"]["data"]["password"] == "[Filtered]" + assert transaction_event["request"]["data"]["photo"] == "" + assert transaction_event["_meta"]["request"]["data"]["photo"] == { + "": {"rem": [["!raw", "x"]]} + } @pytest.mark.asyncio -async def test_request_body_too_big(sentry_init, capture_events): +@pytest.mark.parametrize("span_streaming", [True, False]) +async def test_request_body_too_big( + sentry_init, capture_events, capture_items, span_streaming +): sentry_init( traces_sample_rate=1.0, send_default_pii=True, integrations=[StarletteIntegration()], + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static", + }, ) starlette_app = starlette_app_factory() - events = capture_events() - client = TestClient(starlette_app) - client.post( - "/body/form", - data=BODY_FORM.encode("utf-8"), - headers={ - "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", - "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", - }, - ) - (event, transaction_event) = events - assert event["request"]["cookies"] == { - "tasty_cookie": "strawberry", - "yummy_cookie": "choco", - } - # Because request is too big only the AnnotatedValue is extracted. - assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}} - - assert transaction_event["request"]["cookies"] == { - "tasty_cookie": "strawberry", - "yummy_cookie": "choco", - } - # Because request is too big only the AnnotatedValue is extracted. - assert transaction_event["_meta"]["request"]["data"] == { - "": {"rem": [["!config", "x"]]} - } + if span_streaming: + items = capture_items("event", "span") + + client.post( + "/body/form", + data=BODY_FORM.encode("utf-8"), + headers={ + "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", + "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", + }, + ) + + (event,) = (item.payload for item in items if item.type == "event") + assert event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + # Because request is too big only the AnnotatedValue is extracted. + assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}} + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + server_span = next( + span for span in spans if span["attributes"]["sentry.op"] == "http.server" + ) + + # Because request is too big only the AnnotatedValue is extracted. + assert ( + server_span["attributes"][SPANDATA.HTTP_REQUEST_BODY_DATA] + == "[Exceeds maximum size]" + ) + else: + events = capture_events() + + client.post( + "/body/form", + data=BODY_FORM.encode("utf-8"), + headers={ + "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", + "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", + }, + ) + + (event, transaction_event) = events + assert event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + # Because request is too big only the AnnotatedValue is extracted. + assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}} + + assert transaction_event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + # Because request is too big only the AnnotatedValue is extracted. + assert transaction_event["_meta"]["request"]["data"] == { + "": {"rem": [["!config", "x"]]} + } @pytest.mark.asyncio @@ -1022,134 +1169,6 @@ def test_active_thread_id_span_streaming(sentry_init, capture_items, endpoint): assert str(data["active"]) == segments[0]["attributes"]["thread.id"] -def _post_body_app(handler_awaitable): - async def _handler(request): - await handler_awaitable(request) - return starlette.responses.JSONResponse({"ok": True}) - - return starlette.applications.Starlette( - routes=[starlette.routing.Route("/body", _handler, methods=["POST"])], - ) - - -@pytest.mark.parametrize("middleware_spans", [False, True]) -def test_request_body_data_does_not_scrub_pii_span_streaming( - sentry_init, capture_items, middleware_spans -): - sentry_init( - auto_enabling_integrations=False, - integrations=[StarletteIntegration(middleware_spans=middleware_spans)], - traces_sample_rate=1.0, - _experiments={"trace_lifecycle": "stream"}, - ) - - async def _read_json(request): - await request.json() - - items = capture_items("span") - - client = TestClient(_post_body_app(_read_json)) - response = client.post( - "/body", - json={ - "password": "ohno", - "authorization": "Bearer token", - "message": "hello", - }, - ) - assert response.status_code == 200 - - sentry_sdk.flush() - - segments = [item.payload for item in items if item.payload.get("is_segment")] - assert len(segments) == 1 - attr = segments[0]["attributes"]["http.request.body.data"] - - # Going forward, the sanitization of data will need to happen within the `before_send_span` hooks - # See https://sentry.slack.com/archives/C09RR0KD2N7/p1776951331206129?thread_ts=1776951227.440659&cid=C09RR0KD2N7 - assert "ohno" in attr - assert "Bearer token" in attr - assert "hello" in attr - - -@pytest.mark.skipif( - STARLETTE_VERSION < (0, 21), - reason="Requires Starlette >= 0.21, because earlier versions use a requests-based TestClient which does not support the 'content' kwarg", -) -@pytest.mark.parametrize("middleware_spans", [False, True]) -def test_request_body_data_annotated_value_top_level_span_streaming( - sentry_init, capture_items, middleware_spans -): - sentry_init( - auto_enabling_integrations=False, - integrations=[StarletteIntegration(middleware_spans=middleware_spans)], - traces_sample_rate=1.0, - _experiments={"trace_lifecycle": "stream"}, - ) - - async def _read_body(request): - await request.body() - - items = capture_items("span") - - client = TestClient(_post_body_app(_read_body)) - response = client.post( - "/body", - content=b"not json and not form", - headers={"content-type": "application/octet-stream"}, - ) - assert response.status_code == 200 - - sentry_sdk.flush() - - segments = [item.payload for item in items if item.payload.get("is_segment")] - assert len(segments) == 1 - attr = segments[0]["attributes"]["http.request.body.data"] - - assert isinstance(attr, str) - assert ( - attr == '""' - ) # AnnotatedValue.removed_because_raw_data is called because the content was not able to be parsed, and replaces the value with an empty string - - -@pytest.mark.parametrize("middleware_spans", [False, True]) -def test_request_body_data_annotated_value_nested_span_streaming( - sentry_init, capture_items, middleware_spans -): - pytest.importorskip("multipart") - - sentry_init( - auto_enabling_integrations=False, - integrations=[StarletteIntegration(middleware_spans=middleware_spans)], - traces_sample_rate=1.0, - _experiments={"trace_lifecycle": "stream"}, - ) - - async def _read_form(request): - await request.form() - - items = capture_items("span") - - client = TestClient(_post_body_app(_read_form)) - response = client.post( - "/body", - data={"name": "erica"}, - files={"avatar": ("photo.jpg", b"fake-bytes", "image/jpeg")}, - ) - assert response.status_code == 200 - - sentry_sdk.flush() - - segments = [item.payload for item in items if item.payload.get("is_segment")] - assert len(segments) == 1 - attr = segments[0]["attributes"]["http.request.body.data"] - - assert isinstance(attr, str) - parsed = json.loads(attr) - assert parsed["name"] == "erica" - assert "fake-bytes" not in attr - - def test_original_request_not_scrubbed(sentry_init, capture_events): sentry_init(integrations=[StarletteIntegration()]) From b92156c3703053c9d1da6721c4ac5076911c922d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 09:29:28 +0200 Subject: [PATCH 02/13] update const --- sentry_sdk/consts.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 008256e110..9147032ad7 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -819,6 +819,12 @@ class SPANDATA: Example: GET """ + HTTP_REQUEST_BODY_DATA = "http.request.body.data" + """ + HTTP request body data. Can be given as string or structural data of any format. + Example: "[{\"role\": \"user\", \"message\": \"hello\"}]" + """ + HTTP_REQUEST_METHOD = "http.request.method" """ The HTTP method used. From 92f1d788f0a7b106c6a0754d516951230f124126 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 09:42:16 +0200 Subject: [PATCH 03/13] types --- sentry_sdk/integrations/starlette.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index fdc3b09efe..098fb99463 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -47,7 +47,10 @@ import starlette # type: ignore from starlette import __version__ as STARLETTE_VERSION from starlette.applications import Starlette # type: ignore - from starlette.datastructures import UploadFile # type: ignore + from starlette.datastructures import ( + FormData, + UploadFile, # type: ignore + ) from starlette.middleware import Middleware # type: ignore from starlette.middleware.authentication import ( # type: ignore AuthenticationMiddleware, @@ -492,7 +495,7 @@ def _is_async_callable(obj: "Any") -> bool: ) -def _patch_json_request_body_accessor(): +def _patch_json_request_body_accessor() -> None: """ Caches request body data on the ASGI scope, so that the body can be attached to telemetry after the request handler runs. Without the cache, consuming the stream can cause the application to hang. @@ -500,15 +503,17 @@ def _patch_json_request_body_accessor(): _original_json = Request.json @functools.wraps(_original_json) - async def sentry_json(self: "Request"): - request_json = await _original_json(self) - self.scope["state"][_SCOPE_STATE_JSON_REQUEST_BODY_KEY] = request_json + async def sentry_json(self: "Request", *args: "Any", **kwargs: "Any") -> "Any": + request_json = await _original_json(self, *args, **kwargs) + self.scope.setdefault("state", {})[_SCOPE_STATE_JSON_REQUEST_BODY_KEY] = ( + request_json + ) return request_json Request.json = sentry_json -def _patch_formdata_request_body_accessor(): +def _patch_formdata_request_body_accessor() -> None: """ Caches request body data on the ASGI scope, so that the body can be attached to telemetry after the request handler runs. Without the cache, consuming the stream can cause the application to hang. @@ -516,10 +521,11 @@ def _patch_formdata_request_body_accessor(): _original_form = Request.form @functools.wraps(_original_form) - async def sentry_form(self: "Request"): - print("wrapped form") - request_formdata = await _original_form(self) - self.scope["state"][_SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY] = request_formdata + async def sentry_form(self: "Request", *args: "Any", **kwargs: "Any") -> "FormData": + request_formdata = await _original_form(self, *args, **kwargs) + self.scope.setdefault("state", {})[_SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY] = ( + request_formdata + ) return request_formdata Request.form = sentry_form @@ -555,7 +561,7 @@ def _serialize_cached_request_body_attribute( return json.dumps(form_data) -async def _wrap_async_handler(handler, *args, **kwargs): +async def _wrap_async_handler(handler, *args: "Any", **kwargs: "Any") -> "Any": """ Wraps an asynchronous handler function to attach request info to the server segment span. The request body cached on the ASGI scope is attached to streamed spans, but consuming the request body in the event From 6a30714d2ccfa60e9b9094548c06079d81039792 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 09:45:30 +0200 Subject: [PATCH 04/13] more types --- sentry_sdk/integrations/starlette.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index 098fb99463..a7ebd498da 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -47,9 +47,9 @@ import starlette # type: ignore from starlette import __version__ as STARLETTE_VERSION from starlette.applications import Starlette # type: ignore - from starlette.datastructures import ( + from starlette.datastructures import ( # type: ignore FormData, - UploadFile, # type: ignore + UploadFile, ) from starlette.middleware import Middleware # type: ignore from starlette.middleware.authentication import ( # type: ignore @@ -561,7 +561,9 @@ def _serialize_cached_request_body_attribute( return json.dumps(form_data) -async def _wrap_async_handler(handler, *args: "Any", **kwargs: "Any") -> "Any": +async def _wrap_async_handler( + handler: "Callable[..., Awaitable[Any]]", *args: "Any", **kwargs: "Any" +) -> "Any": """ Wraps an asynchronous handler function to attach request info to the server segment span. The request body cached on the ASGI scope is attached to streamed spans, but consuming the request body in the event From 064ed6ad649ff483ce6aee278a647858077378e6 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 09:56:54 +0200 Subject: [PATCH 05/13] defensive access --- sentry_sdk/integrations/starlette.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index a7ebd498da..5defa31e69 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -537,14 +537,19 @@ def _serialize_cached_request_body_attribute( """ Returns a stringified JSON representation of the request body if the request body is cached on the ASGI scope and within size bounds. """ + scope_state = request.scope.get("state", {}) if ( "content-length" not in request.headers - or _SCOPE_STATE_JSON_REQUEST_BODY_KEY not in request.scope["state"] - and _SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY not in request.scope["state"] + or _SCOPE_STATE_JSON_REQUEST_BODY_KEY not in scope_state + and _SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY not in scope_state ): return None - content_length = int(request.headers["content-length"]) + try: + content_length = int(request.headers["content-length"]) + except ValueError: + return None + if content_length and not request_body_within_bounds(client, content_length): return OVER_SIZE_LIMIT_SUBSTITUTE From bc8ceb2846b31f33fc5ad9f3e13f93bd11cb58fa Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 10:15:03 +0200 Subject: [PATCH 06/13] patch different form method --- sentry_sdk/integrations/starlette.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index 5defa31e69..def103b5b7 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -503,14 +503,14 @@ def _patch_json_request_body_accessor() -> None: _original_json = Request.json @functools.wraps(_original_json) - async def sentry_json(self: "Request", *args: "Any", **kwargs: "Any") -> "Any": + async def wrapped_json(self: "Request", *args: "Any", **kwargs: "Any") -> "Any": request_json = await _original_json(self, *args, **kwargs) self.scope.setdefault("state", {})[_SCOPE_STATE_JSON_REQUEST_BODY_KEY] = ( request_json ) return request_json - Request.json = sentry_json + Request.json = wrapped_json def _patch_formdata_request_body_accessor() -> None: @@ -518,17 +518,22 @@ def _patch_formdata_request_body_accessor() -> None: Caches request body data on the ASGI scope, so that the body can be attached to telemetry after the request handler runs. Without the cache, consuming the stream can cause the application to hang. """ - _original_form = Request.form + if not hasattr(Request, "_get_form"): + return + + _original_form = Request._get_form @functools.wraps(_original_form) - async def sentry_form(self: "Request", *args: "Any", **kwargs: "Any") -> "FormData": + async def wrapped_form( + self: "Request", *args: "Any", **kwargs: "Any" + ) -> "FormData": request_formdata = await _original_form(self, *args, **kwargs) self.scope.setdefault("state", {})[_SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY] = ( request_formdata ) return request_formdata - Request.form = sentry_form + Request._get_form = wrapped_form def _serialize_cached_request_body_attribute( From b029bc02297e15a190705c278cbf7d227e081d2b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 10:29:15 +0200 Subject: [PATCH 07/13] skip tests when patched method does not exist --- tests/integrations/starlette/test_starlette.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/integrations/starlette/test_starlette.py b/tests/integrations/starlette/test_starlette.py index 8d281319bc..78c95ddfa4 100644 --- a/tests/integrations/starlette/test_starlette.py +++ b/tests/integrations/starlette/test_starlette.py @@ -375,6 +375,10 @@ async def test_request_info_json_body( @pytest.mark.asyncio +@pytest.mark.skipif( + STARLETTE_VERSION < (0, 24), + reason="Patched `_get_form()` was added in version 0.24 with https://github.com/Kludex/starlette/commit/c568b55dff8be94b9c917e186e512ab53d7310e1", +) @pytest.mark.parametrize("span_streaming", [True, False]) async def test_formdata_request_body( sentry_init, capture_events, capture_items, span_streaming @@ -456,6 +460,10 @@ async def test_formdata_request_body( @pytest.mark.asyncio +@pytest.mark.skipif( + STARLETTE_VERSION < (0, 24), + reason="Patched `_get_form()` was added in version 0.24 with https://github.com/Kludex/starlette/commit/c568b55dff8be94b9c917e186e512ab53d7310e1", +) @pytest.mark.parametrize("span_streaming", [True, False]) async def test_request_body_too_big( sentry_init, capture_events, capture_items, span_streaming From 5004aae4d2ce2648048358d7e588fcd095e93ab9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 10:41:43 +0200 Subject: [PATCH 08/13] update docstring --- sentry_sdk/integrations/starlette.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index def103b5b7..b7b62794c9 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -498,7 +498,8 @@ def _is_async_callable(obj: "Any") -> bool: def _patch_json_request_body_accessor() -> None: """ Caches request body data on the ASGI scope, so that the body can be attached to telemetry after the request handler runs. - Without the cache, consuming the stream can cause the application to hang. + Without the cache, consuming the stream causes applications to hang when middleware or handlers consume the raw + `receive()` callable exposed by Starlette. """ _original_json = Request.json @@ -516,7 +517,8 @@ async def wrapped_json(self: "Request", *args: "Any", **kwargs: "Any") -> "Any": def _patch_formdata_request_body_accessor() -> None: """ Caches request body data on the ASGI scope, so that the body can be attached to telemetry after the request handler runs. - Without the cache, consuming the stream can cause the application to hang. + Without the cache, consuming the stream causes applications to hang when middleware or handlers consume the raw + `receive()` callable exposed by Starlette. """ if not hasattr(Request, "_get_form"): return From 83fa16dc2732e9d7a7dd05be00fe4234901e4a3f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 10:59:55 +0200 Subject: [PATCH 09/13] use cached attributes directly --- sentry_sdk/integrations/starlette.py | 85 +++---------------- .../integrations/starlette/test_starlette.py | 39 --------- 2 files changed, 13 insertions(+), 111 deletions(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index b7b62794c9..86f3d80e95 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -48,7 +48,6 @@ from starlette import __version__ as STARLETTE_VERSION from starlette.applications import Starlette # type: ignore from starlette.datastructures import ( # type: ignore - FormData, UploadFile, ) from starlette.middleware import Middleware # type: ignore @@ -92,9 +91,6 @@ TRANSACTION_STYLE_VALUES = ("endpoint", "url") -_SCOPE_STATE_JSON_REQUEST_BODY_KEY = "sentry_sdk.json_request_body" -_SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY = "sentry_sdk.formdata_request_body" - class StarletteIntegration(Integration): identifier = "starlette" @@ -153,16 +149,6 @@ def setup_once() -> None: if version >= (0, 24): patch_templates() - def setup_once_with_options( - self: "StarletteIntegration", options: "Optional[dict[str, Any]]" = None - ) -> None: - is_span_streaming_enabled = has_span_streaming_enabled(options) - if not is_span_streaming_enabled: - return - - _patch_json_request_body_accessor() - _patch_formdata_request_body_accessor() - def _enable_span_for_middleware(middleware_class: "Any") -> type: old_call = middleware_class.__call__ @@ -495,61 +481,13 @@ def _is_async_callable(obj: "Any") -> bool: ) -def _patch_json_request_body_accessor() -> None: - """ - Caches request body data on the ASGI scope, so that the body can be attached to telemetry after the request handler runs. - Without the cache, consuming the stream causes applications to hang when middleware or handlers consume the raw - `receive()` callable exposed by Starlette. - """ - _original_json = Request.json - - @functools.wraps(_original_json) - async def wrapped_json(self: "Request", *args: "Any", **kwargs: "Any") -> "Any": - request_json = await _original_json(self, *args, **kwargs) - self.scope.setdefault("state", {})[_SCOPE_STATE_JSON_REQUEST_BODY_KEY] = ( - request_json - ) - return request_json - - Request.json = wrapped_json - - -def _patch_formdata_request_body_accessor() -> None: - """ - Caches request body data on the ASGI scope, so that the body can be attached to telemetry after the request handler runs. - Without the cache, consuming the stream causes applications to hang when middleware or handlers consume the raw - `receive()` callable exposed by Starlette. - """ - if not hasattr(Request, "_get_form"): - return - - _original_form = Request._get_form - - @functools.wraps(_original_form) - async def wrapped_form( - self: "Request", *args: "Any", **kwargs: "Any" - ) -> "FormData": - request_formdata = await _original_form(self, *args, **kwargs) - self.scope.setdefault("state", {})[_SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY] = ( - request_formdata - ) - return request_formdata - - Request._get_form = wrapped_form - - -def _serialize_cached_request_body_attribute( +def _get_cached_request_body_attribute( client: "sentry_sdk.client.BaseClient", request: "Request" ) -> "Optional[str]": """ - Returns a stringified JSON representation of the request body if the request body is cached on the ASGI scope and within size bounds. + Returns a stringified JSON representation of the request body if the request body is cached and within size bounds. """ - scope_state = request.scope.get("state", {}) - if ( - "content-length" not in request.headers - or _SCOPE_STATE_JSON_REQUEST_BODY_KEY not in scope_state - and _SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY not in scope_state - ): + if "content-length" not in request.headers: return None try: @@ -560,13 +498,16 @@ def _serialize_cached_request_body_attribute( if content_length and not request_body_within_bounds(client, content_length): return OVER_SIZE_LIMIT_SUBSTITUTE - if _SCOPE_STATE_JSON_REQUEST_BODY_KEY in request.scope["state"]: - return json.dumps(request.scope["state"][_SCOPE_STATE_JSON_REQUEST_BODY_KEY]) + json_body = getattr(request, "_json", None) + if json_body is not None: + return json.dumps(json_body) - form = request.scope["state"][_SCOPE_STATE_FORMDATA_REQUEST_BODY_KEY] + formdata_body = getattr(request, "_form", None) + if formdata_body is None: + return form_data = {} - for key, val in form.items(): + for key, val in formdata_body.items(): is_file = isinstance(val, UploadFile) form_data[key] = val if not is_file else "[Unparsable]" @@ -627,13 +568,13 @@ def event_processor(event: "Event", hint: "Dict[str, Any]") -> "Event": current_span = _get_current_streamed_span() if type(current_span) is StreamedSpan: - serialized_request_body = _serialize_cached_request_body_attribute( + request_body = _get_cached_request_body_attribute( client=client, request=request ) - if serialized_request_body: + if request_body: current_span._segment.set_attribute( SPANDATA.HTTP_REQUEST_BODY_DATA, - serialized_request_body, + request_body, ) diff --git a/tests/integrations/starlette/test_starlette.py b/tests/integrations/starlette/test_starlette.py index 78c95ddfa4..22ae7c55a4 100644 --- a/tests/integrations/starlette/test_starlette.py +++ b/tests/integrations/starlette/test_starlette.py @@ -21,14 +21,8 @@ from starlette.middleware import Middleware from starlette.middleware.authentication import AuthenticationMiddleware from starlette.middleware.trustedhost import TrustedHostMiddleware -from starlette.requests import Request from starlette.testclient import TestClient -try: - from starlette.middleware.exceptions import ExceptionMiddleware -except ImportError: - from starlette.exceptions import ExceptionMiddleware - import sentry_sdk from sentry_sdk import capture_message, get_baggage, get_traceparent from sentry_sdk.consts import SPANDATA @@ -279,31 +273,6 @@ async def my_send(*args, **kwargs): await self.app(scope, partial_receive, partial_send) -@pytest.fixture(autouse=True) -def reset_starlette_integration(uninstall_integration): - original_request_json = Request.json - original_request_form = Request.form - original_starlette_call = starlette.applications.Starlette.__call__ - original_request_response = starlette.routing.request_response - original_middleware_init = Middleware.__init__ - original_authentication_call = AuthenticationMiddleware.__call__ - original_exception_middleware_init = ExceptionMiddleware.__init__ - original_exception_middleware_call = ExceptionMiddleware.__call__ - - yield - - Request.json = original_request_json - Request.form = original_request_form - starlette.applications.Starlette.__call__ = original_starlette_call - starlette.routing.request_response = original_request_response - Middleware.__init__ = original_middleware_init - AuthenticationMiddleware.__call__ = original_authentication_call - ExceptionMiddleware.__init__ = original_exception_middleware_init - ExceptionMiddleware.__call__ = original_exception_middleware_call - - uninstall_integration("starlette") - - @pytest.mark.asyncio @pytest.mark.parametrize("span_streaming", [True, False]) async def test_request_info_json_body( @@ -375,10 +344,6 @@ async def test_request_info_json_body( @pytest.mark.asyncio -@pytest.mark.skipif( - STARLETTE_VERSION < (0, 24), - reason="Patched `_get_form()` was added in version 0.24 with https://github.com/Kludex/starlette/commit/c568b55dff8be94b9c917e186e512ab53d7310e1", -) @pytest.mark.parametrize("span_streaming", [True, False]) async def test_formdata_request_body( sentry_init, capture_events, capture_items, span_streaming @@ -460,10 +425,6 @@ async def test_formdata_request_body( @pytest.mark.asyncio -@pytest.mark.skipif( - STARLETTE_VERSION < (0, 24), - reason="Patched `_get_form()` was added in version 0.24 with https://github.com/Kludex/starlette/commit/c568b55dff8be94b9c917e186e512ab53d7310e1", -) @pytest.mark.parametrize("span_streaming", [True, False]) async def test_request_body_too_big( sentry_init, capture_events, capture_items, span_streaming From 8582cc50e7703b8e3a27974e1dfab95cab523d66 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 11:01:29 +0200 Subject: [PATCH 10/13] explicit None return --- sentry_sdk/integrations/starlette.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index 86f3d80e95..fb0a15ae47 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -504,7 +504,7 @@ def _get_cached_request_body_attribute( formdata_body = getattr(request, "_form", None) if formdata_body is None: - return + return None form_data = {} for key, val in formdata_body.items(): From 2637c8a27a42366cfe433664f126d8424de8239e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 11:07:46 +0200 Subject: [PATCH 11/13] docstring --- sentry_sdk/integrations/starlette.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index fb0a15ae47..38dbab8a26 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -519,7 +519,7 @@ async def _wrap_async_handler( ) -> "Any": """ Wraps an asynchronous handler function to attach request info to the server segment span. - The request body cached on the ASGI scope is attached to streamed spans, but consuming the request body in the event + The request body cached on the Starlette Request object is attached to streamed spans, but consuming the request body in the event processor can still cause application hangs. """ client = sentry_sdk.get_client() From 9d2157e165c81f817ad37332c35bc1672e015630 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 13:17:15 +0200 Subject: [PATCH 12/13] docstring --- sentry_sdk/integrations/starlette.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index 38dbab8a26..9602905884 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -518,7 +518,7 @@ async def _wrap_async_handler( handler: "Callable[..., Awaitable[Any]]", *args: "Any", **kwargs: "Any" ) -> "Any": """ - Wraps an asynchronous handler function to attach request info to the server segment span. + Wraps an asynchronous handler function to attach request info to errors and the server segment span. The request body cached on the Starlette Request object is attached to streamed spans, but consuming the request body in the event processor can still cause application hangs. """ From b7787d9e959b280d5daaa3c0ff8181eeb3c8ebd7 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Mon, 18 May 2026 13:25:27 +0200 Subject: [PATCH 13/13] handle null JSON --- sentry_sdk/integrations/starlette.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index 9602905884..691a3dc0bb 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -498,9 +498,8 @@ def _get_cached_request_body_attribute( if content_length and not request_body_within_bounds(client, content_length): return OVER_SIZE_LIMIT_SUBSTITUTE - json_body = getattr(request, "_json", None) - if json_body is not None: - return json.dumps(json_body) + if hasattr(request, "_json"): + return json.dumps(request._json) formdata_body = getattr(request, "_form", None) if formdata_body is None: