From 6a37c6388d78d6d945534ae1721a811e69b8c3b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josh=20Goldberg=20=E2=9C=A8?= Date: Thu, 21 May 2026 10:42:35 -0400 Subject: [PATCH 1/7] feat(ourlogs): add truncate RPC parameter for logs events query Adds a `truncate` query parameter to the organization events endpoint that truncates string column values at the RPC layer for the OurLogs dataset, reducing payload size for wide viewports. --- .../api/endpoints/organization_events.py | 21 ++++++ src/sentry/snuba/ourlogs.py | 2 + src/sentry/snuba/rpc_dataset_common.py | 11 ++- tests/sentry/snuba/test_rpc_dataset_common.py | 74 +++++++++++++++++++ .../test_organization_events_ourlogs.py | 42 +++++++++++ 5 files changed, 148 insertions(+), 2 deletions(-) diff --git a/src/sentry/api/endpoints/organization_events.py b/src/sentry/api/endpoints/organization_events.py index 17429fd1c0c354..6ef02167854108 100644 --- a/src/sentry/api/endpoints/organization_events.py +++ b/src/sentry/api/endpoints/organization_events.py @@ -117,6 +117,7 @@ def get_features(self, organization: Organization, request: Request) -> Mapping[ "organizations:dynamic-sampling", "organizations:on-demand-metrics-extraction", "organizations:on-demand-metrics-extraction-widgets", + "organizations:on-demand-metrics-extraction-experimental", ] batch_features = features.batch_has( feature_names, @@ -237,6 +238,16 @@ def get(self, request: Request, organization: Organization) -> Response: use_aggregate_conditions = request.GET.get("allowAggregateConditions", "1") == "1" + max_string_length: int | None = None + truncate_str = request.GET.get("truncate") + if truncate_str is not None: + try: + max_string_length = int(truncate_str) + if max_string_length < 1: + raise ValueError + except ValueError: + return Response({"detail": "truncate must be a positive integer"}, status=400) + def _data_fn( dataset_query: DatasetQuery, offset: int, @@ -600,6 +611,11 @@ def flex_time_data_fn(limit, page_token): sampling_mode=snuba_params.sampling_mode, page_token=page_token, additional_queries=additional_queries, + **( + {"max_string_length": max_string_length} + if scoped_dataset == OurLogs + else {} + ), ) return EAPPageTokenPaginator(data_fn=flex_time_data_fn), EAPPageTokenCursor @@ -620,6 +636,11 @@ def data_fn(offset, limit): config=config, sampling_mode=snuba_params.sampling_mode, additional_queries=additional_queries, + **( + {"max_string_length": max_string_length} + if scoped_dataset == OurLogs + else {} + ), ) if save_discover_dataset_decision and discover_saved_query_id: diff --git a/src/sentry/snuba/ourlogs.py b/src/sentry/snuba/ourlogs.py index 1707e73b03d73e..287b7dff66a51e 100644 --- a/src/sentry/snuba/ourlogs.py +++ b/src/sentry/snuba/ourlogs.py @@ -40,6 +40,7 @@ def run_table_query( search_resolver: SearchResolver | None = None, page_token: PageToken | None = None, additional_queries: AdditionalQueries | None = None, + max_string_length: int | None = None, ) -> EAPResponse: """timestamp_precise is always displayed in the UI in lieu of timestamp but since the TraceItem table isn't a DateTime64 so we need to always order by it regardless of what is actually passed to the orderby. @@ -78,6 +79,7 @@ def run_table_query( ), page_token=page_token, additional_queries=additional_queries, + max_string_length=max_string_length, ), debug=params.debug, ) diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py index 37bcdc4dade70d..ac99fd05217fc3 100644 --- a/src/sentry/snuba/rpc_dataset_common.py +++ b/src/sentry/snuba/rpc_dataset_common.py @@ -91,6 +91,7 @@ class TableQuery: page_token: PageToken | None = None additional_queries: AdditionalQueries | None = None extra_conditions: TraceItemFilter | None = None + max_string_length: int | None = None @dataclass @@ -238,10 +239,13 @@ def filter_project(cls, project: Project) -> bool: @classmethod def build_rpc_table_row_context(cls, query: TableQuery) -> dict[str, Any]: - return { + ctx: dict[str, Any] = { "project_ids": list(query.resolver.params.project_ids), "organization_id": query.resolver.params.organization_id, } + if query.max_string_length is not None: + ctx["max_string_length"] = query.max_string_length + return ctx @classmethod def get_table_rpc_request(cls, query: TableQuery) -> TableRequest: @@ -461,8 +465,9 @@ def process_column_values( final_data: SnubaData, attribute: Any, resolved_column: ResolvedColumn, - **_context_kwargs: Any, + **context_kwargs: Any, ) -> None: + max_string_length: int | None = context_kwargs.get("max_string_length") for index, result in enumerate(column_value.results): result_value: Any if result.is_null: @@ -470,6 +475,8 @@ def process_column_values( else: result_value = anyvalue_to_python(result) result_value = process_value(result_value) + if max_string_length is not None and isinstance(result_value, str): + result_value = result_value[:max_string_length] final_data[index][attribute] = resolved_column.process_column(result_value) @classmethod diff --git a/tests/sentry/snuba/test_rpc_dataset_common.py b/tests/sentry/snuba/test_rpc_dataset_common.py index b9586755bc2680..467f3ae7bef57a 100644 --- a/tests/sentry/snuba/test_rpc_dataset_common.py +++ b/tests/sentry/snuba/test_rpc_dataset_common.py @@ -1,7 +1,9 @@ from datetime import datetime, timedelta, timezone +from unittest.mock import MagicMock import pytest from sentry_protos.snuba.v1.downsampled_storage_pb2 import DownsampledStorageConfig +from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeValue from sentry.search.eap.types import SearchResolverConfig from sentry.search.events.types import SnubaParams @@ -14,6 +16,78 @@ from sentry.testutils.pytest.fixtures import django_db_all +def _make_column_value(string_values: list[str]) -> MagicMock: + column_value = MagicMock() + results = [] + for val in string_values: + av = AttributeValue() + av.val_str = val + results.append(av) + column_value.results = results + return column_value + + +def _identity_column() -> MagicMock: + resolved_column = MagicMock() + resolved_column.process_column = lambda v: v + return resolved_column + + +class TestProcessColumnValuesTruncation(TestCase): + def test_truncates_long_strings(self) -> None: + long_str = "x" * 100 + final_data: list[dict] = [{}] + RPCBase.process_column_values( + _make_column_value([long_str]), + final_data, + "attr", + _identity_column(), + max_string_length=10, + ) + assert final_data[0]["attr"] == "x" * 10 + + def test_no_truncation_without_param(self) -> None: + long_str = "x" * 100 + final_data: list[dict] = [{}] + RPCBase.process_column_values( + _make_column_value([long_str]), + final_data, + "attr", + _identity_column(), + ) + assert final_data[0]["attr"] == long_str + + def test_does_not_truncate_non_string_values(self) -> None: + av = AttributeValue() + av.val_int = 42 + column_value = MagicMock() + column_value.results = [av] + final_data: list[dict] = [{}] + RPCBase.process_column_values( + column_value, + final_data, + "attr", + _identity_column(), + max_string_length=1, + ) + assert final_data[0]["attr"] == 42 + + def test_null_values_are_unchanged(self) -> None: + av = AttributeValue() + av.is_null = True + column_value = MagicMock() + column_value.results = [av] + final_data: list[dict] = [{}] + RPCBase.process_column_values( + column_value, + final_data, + "attr", + _identity_column(), + max_string_length=1, + ) + assert final_data[0]["attr"] is None + + class TestBulkTableQueries(TestCase): def setUp(self) -> None: super().setUp() diff --git a/tests/snuba/api/endpoints/test_organization_events_ourlogs.py b/tests/snuba/api/endpoints/test_organization_events_ourlogs.py index 2411162f46b7c0..5063afdfd3487d 100644 --- a/tests/snuba/api/endpoints/test_organization_events_ourlogs.py +++ b/tests/snuba/api/endpoints/test_organization_events_ourlogs.py @@ -365,6 +365,48 @@ def test_pagelimit(self) -> None: assert response.status_code == 400 assert response.data["detail"] == "Invalid per_page value. Must be between 1 and 9999." + @pytest.mark.querybuilder + def test_truncate_param(self) -> None: + log = self.create_ourlog( + {"body": "hello world"}, + timestamp=self.ten_mins_ago, + ) + self.store_eap_items([log]) + response = self.do_request( + { + "field": ["log.body"], + "project": self.project.id, + "dataset": self.dataset, + "truncate": 5, + } + ) + assert response.status_code == 200, response.content + assert response.data["data"][0]["log.body"] == "hello" + + def test_truncate_param_invalid_type(self) -> None: + response = self.do_request( + { + "field": ["log.body"], + "project": self.project.id, + "dataset": self.dataset, + "truncate": "notanumber", + } + ) + assert response.status_code == 400 + assert response.data["detail"] == "truncate must be a positive integer" + + def test_truncate_param_invalid_value(self) -> None: + response = self.do_request( + { + "field": ["log.body"], + "project": self.project.id, + "dataset": self.dataset, + "truncate": 0, + } + ) + assert response.status_code == 400 + assert response.data["detail"] == "truncate must be a positive integer" + def test_homepage_query(self) -> None: """This query matches the one made on the logs homepage so that we can be sure everything is working at least for the initial load""" From cec835d9f49c3d67e0bf479ff5e1e6e48167269b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josh=20Goldberg=20=E2=9C=A8?= Date: Thu, 21 May 2026 10:51:29 -0400 Subject: [PATCH 2/7] fix(ourlogs): append ellipsis to truncated string values --- src/sentry/snuba/rpc_dataset_common.py | 3 ++- tests/sentry/snuba/test_rpc_dataset_common.py | 2 +- tests/snuba/api/endpoints/test_organization_events_ourlogs.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py index ac99fd05217fc3..a2bf02a0db7a2c 100644 --- a/src/sentry/snuba/rpc_dataset_common.py +++ b/src/sentry/snuba/rpc_dataset_common.py @@ -476,7 +476,8 @@ def process_column_values( result_value = anyvalue_to_python(result) result_value = process_value(result_value) if max_string_length is not None and isinstance(result_value, str): - result_value = result_value[:max_string_length] + if len(result_value) > max_string_length: + result_value = result_value[:max_string_length] + "..." final_data[index][attribute] = resolved_column.process_column(result_value) @classmethod diff --git a/tests/sentry/snuba/test_rpc_dataset_common.py b/tests/sentry/snuba/test_rpc_dataset_common.py index 467f3ae7bef57a..a0549efb7473e7 100644 --- a/tests/sentry/snuba/test_rpc_dataset_common.py +++ b/tests/sentry/snuba/test_rpc_dataset_common.py @@ -44,7 +44,7 @@ def test_truncates_long_strings(self) -> None: _identity_column(), max_string_length=10, ) - assert final_data[0]["attr"] == "x" * 10 + assert final_data[0]["attr"] == "x" * 10 + "..." def test_no_truncation_without_param(self) -> None: long_str = "x" * 100 diff --git a/tests/snuba/api/endpoints/test_organization_events_ourlogs.py b/tests/snuba/api/endpoints/test_organization_events_ourlogs.py index 5063afdfd3487d..d2350de14690c0 100644 --- a/tests/snuba/api/endpoints/test_organization_events_ourlogs.py +++ b/tests/snuba/api/endpoints/test_organization_events_ourlogs.py @@ -381,7 +381,7 @@ def test_truncate_param(self) -> None: } ) assert response.status_code == 200, response.content - assert response.data["data"][0]["log.body"] == "hello" + assert response.data["data"][0]["log.body"] == "hello..." def test_truncate_param_invalid_type(self) -> None: response = self.do_request( From 5812a12bfea67683f855d8aaa0f99e172501b788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josh=20Goldberg=20=E2=9C=A8?= Date: Thu, 21 May 2026 10:51:41 -0400 Subject: [PATCH 3/7] remove unrelated line --- src/sentry/api/endpoints/organization_events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sentry/api/endpoints/organization_events.py b/src/sentry/api/endpoints/organization_events.py index 6ef02167854108..a7b733cbbc4b42 100644 --- a/src/sentry/api/endpoints/organization_events.py +++ b/src/sentry/api/endpoints/organization_events.py @@ -117,7 +117,6 @@ def get_features(self, organization: Organization, request: Request) -> Mapping[ "organizations:dynamic-sampling", "organizations:on-demand-metrics-extraction", "organizations:on-demand-metrics-extraction-widgets", - "organizations:on-demand-metrics-extraction-experimental", ] batch_features = features.batch_has( feature_names, From 1561bf37c6e1c724834518faccde7ea09e6f323f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josh=20Goldberg=20=E2=9C=A8?= Date: Thu, 21 May 2026 11:14:10 -0400 Subject: [PATCH 4/7] Added a comment about long-term goals --- src/sentry/snuba/rpc_dataset_common.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py index a2bf02a0db7a2c..067078a30643cc 100644 --- a/src/sentry/snuba/rpc_dataset_common.py +++ b/src/sentry/snuba/rpc_dataset_common.py @@ -475,9 +475,13 @@ def process_column_values( else: result_value = anyvalue_to_python(result) result_value = process_value(result_value) + + # Note: post-query truncation may not be our preferred method long-term. + # We may want to set up a function that filters/truncates at the EAP side. if max_string_length is not None and isinstance(result_value, str): if len(result_value) > max_string_length: result_value = result_value[:max_string_length] + "..." + final_data[index][attribute] = resolved_column.process_column(result_value) @classmethod From b43d8a4c1585ff6a7dbd1f1fe2c5533504ddf654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josh=20Goldberg=20=E2=9C=A8?= Date: Thu, 21 May 2026 11:18:39 -0400 Subject: [PATCH 5/7] Added metric --- src/sentry/snuba/rpc_dataset_common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py index 067078a30643cc..ceb1ff6eae4106 100644 --- a/src/sentry/snuba/rpc_dataset_common.py +++ b/src/sentry/snuba/rpc_dataset_common.py @@ -62,7 +62,7 @@ from sentry.search.events.fields import get_function_alias, is_function from sentry.search.events.types import SAMPLING_MODES, EventsMeta, SnubaData, SnubaParams from sentry.snuba.discover import OTHER_KEY, create_groupby_dict, create_result_key, zerofill -from sentry.utils import json, snuba_rpc +from sentry.utils import json, metrics, snuba_rpc from sentry.utils.snuba import SnubaTSResult, process_value logger = logging.getLogger("sentry.snuba.spans_rpc") @@ -481,6 +481,10 @@ def process_column_values( if max_string_length is not None and isinstance(result_value, str): if len(result_value) > max_string_length: result_value = result_value[:max_string_length] + "..." + metrics.incr( + "snuba.rpc.process_column_values.truncated", + tags={"field": attribute}, + ) final_data[index][attribute] = resolved_column.process_column(result_value) From 215fa68442f12b813dfc7c20a9a7cc4775597242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josh=20Goldberg=20=E2=9C=A8?= Date: Thu, 21 May 2026 13:21:50 -0400 Subject: [PATCH 6/7] make the server minimum 64 --- .../api/endpoints/organization_events.py | 4 ++-- .../test_organization_events_ourlogs.py | 20 +++++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/sentry/api/endpoints/organization_events.py b/src/sentry/api/endpoints/organization_events.py index a7b733cbbc4b42..23624a2645bebc 100644 --- a/src/sentry/api/endpoints/organization_events.py +++ b/src/sentry/api/endpoints/organization_events.py @@ -242,10 +242,10 @@ def get(self, request: Request, organization: Organization) -> Response: if truncate_str is not None: try: max_string_length = int(truncate_str) - if max_string_length < 1: + if max_string_length < 64: raise ValueError except ValueError: - return Response({"detail": "truncate must be a positive integer"}, status=400) + return Response({"detail": "truncate must be a positive integer >= 64"}, status=400) def _data_fn( dataset_query: DatasetQuery, diff --git a/tests/snuba/api/endpoints/test_organization_events_ourlogs.py b/tests/snuba/api/endpoints/test_organization_events_ourlogs.py index d2350de14690c0..0ddc828702e16f 100644 --- a/tests/snuba/api/endpoints/test_organization_events_ourlogs.py +++ b/tests/snuba/api/endpoints/test_organization_events_ourlogs.py @@ -377,7 +377,7 @@ def test_truncate_param(self) -> None: "field": ["log.body"], "project": self.project.id, "dataset": self.dataset, - "truncate": 5, + "truncate": 64, } ) assert response.status_code == 200, response.content @@ -393,9 +393,9 @@ def test_truncate_param_invalid_type(self) -> None: } ) assert response.status_code == 400 - assert response.data["detail"] == "truncate must be a positive integer" + assert response.data["detail"] == "truncate must be a positive integer >= 64" - def test_truncate_param_invalid_value(self) -> None: + def test_truncate_param_zero_value(self) -> None: response = self.do_request( { "field": ["log.body"], @@ -405,7 +405,19 @@ def test_truncate_param_invalid_value(self) -> None: } ) assert response.status_code == 400 - assert response.data["detail"] == "truncate must be a positive integer" + assert response.data["detail"] == "truncate must be a positive integer >= 64" + + def test_truncate_param_small_value(self) -> None: + response = self.do_request( + { + "field": ["log.body"], + "project": self.project.id, + "dataset": self.dataset, + "truncate": 63, + } + ) + assert response.status_code == 400 + assert response.data["detail"] == "truncate must be a positive integer >= 64" def test_homepage_query(self) -> None: """This query matches the one made on the logs homepage so that we can be sure everything is working at least From 5e7c02529a4800946de977aa04111b10675fef15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josh=20Goldberg=20=E2=9C=A8?= Date: Thu, 21 May 2026 13:32:46 -0400 Subject: [PATCH 7/7] lil test fix --- .../snuba/api/endpoints/test_organization_events_ourlogs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/snuba/api/endpoints/test_organization_events_ourlogs.py b/tests/snuba/api/endpoints/test_organization_events_ourlogs.py index 0ddc828702e16f..38c730978a2083 100644 --- a/tests/snuba/api/endpoints/test_organization_events_ourlogs.py +++ b/tests/snuba/api/endpoints/test_organization_events_ourlogs.py @@ -367,8 +367,9 @@ def test_pagelimit(self) -> None: @pytest.mark.querybuilder def test_truncate_param(self) -> None: + long_body = "a" * 100 log = self.create_ourlog( - {"body": "hello world"}, + {"body": long_body}, timestamp=self.ten_mins_ago, ) self.store_eap_items([log]) @@ -381,7 +382,7 @@ def test_truncate_param(self) -> None: } ) assert response.status_code == 200, response.content - assert response.data["data"][0]["log.body"] == "hello..." + assert response.data["data"][0]["log.body"] == "a" * 64 + "..." def test_truncate_param_invalid_type(self) -> None: response = self.do_request(