diff --git a/src/sentry/api/endpoints/organization_events.py b/src/sentry/api/endpoints/organization_events.py index 17429fd1c0c3..23624a2645be 100644 --- a/src/sentry/api/endpoints/organization_events.py +++ b/src/sentry/api/endpoints/organization_events.py @@ -237,6 +237,16 @@ def get(self, request: Request, organization: Organization) -> Response: use_aggregate_conditions = request.GET.get("allowAggregateConditions", "1") == "1" + max_string_length: int | None = None + truncate_str = request.GET.get("truncate") + if truncate_str is not None: + try: + max_string_length = int(truncate_str) + if max_string_length < 64: + raise ValueError + except ValueError: + return Response({"detail": "truncate must be a positive integer >= 64"}, status=400) + def _data_fn( dataset_query: DatasetQuery, offset: int, @@ -600,6 +610,11 @@ def flex_time_data_fn(limit, page_token): sampling_mode=snuba_params.sampling_mode, page_token=page_token, additional_queries=additional_queries, + **( + {"max_string_length": max_string_length} + if scoped_dataset == OurLogs + else {} + ), ) return EAPPageTokenPaginator(data_fn=flex_time_data_fn), EAPPageTokenCursor @@ -620,6 +635,11 @@ def data_fn(offset, limit): config=config, sampling_mode=snuba_params.sampling_mode, additional_queries=additional_queries, + **( + {"max_string_length": max_string_length} + if scoped_dataset == OurLogs + else {} + ), ) if save_discover_dataset_decision and discover_saved_query_id: diff --git a/src/sentry/snuba/ourlogs.py b/src/sentry/snuba/ourlogs.py index 1707e73b03d7..287b7dff66a5 100644 --- a/src/sentry/snuba/ourlogs.py +++ b/src/sentry/snuba/ourlogs.py @@ -40,6 +40,7 @@ def run_table_query( search_resolver: SearchResolver | None = None, page_token: PageToken | None = None, additional_queries: AdditionalQueries | None = None, + max_string_length: int | None = None, ) -> EAPResponse: """timestamp_precise is always displayed in the UI in lieu of timestamp but since the TraceItem table isn't a DateTime64 so we need to always order by it regardless of what is actually passed to the orderby. @@ -78,6 +79,7 @@ def run_table_query( ), page_token=page_token, additional_queries=additional_queries, + max_string_length=max_string_length, ), debug=params.debug, ) diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py index 37bcdc4dade7..ceb1ff6eae41 100644 --- a/src/sentry/snuba/rpc_dataset_common.py +++ b/src/sentry/snuba/rpc_dataset_common.py @@ -62,7 +62,7 @@ from sentry.search.events.fields import get_function_alias, is_function from sentry.search.events.types import SAMPLING_MODES, EventsMeta, SnubaData, SnubaParams from sentry.snuba.discover import OTHER_KEY, create_groupby_dict, create_result_key, zerofill -from sentry.utils import json, snuba_rpc +from sentry.utils import json, metrics, snuba_rpc from sentry.utils.snuba import SnubaTSResult, process_value logger = logging.getLogger("sentry.snuba.spans_rpc") @@ -91,6 +91,7 @@ class TableQuery: page_token: PageToken | None = None additional_queries: AdditionalQueries | None = None extra_conditions: TraceItemFilter | None = None + max_string_length: int | None = None @dataclass @@ -238,10 +239,13 @@ def filter_project(cls, project: Project) -> bool: @classmethod def build_rpc_table_row_context(cls, query: TableQuery) -> dict[str, Any]: - return { + ctx: dict[str, Any] = { "project_ids": list(query.resolver.params.project_ids), "organization_id": query.resolver.params.organization_id, } + if query.max_string_length is not None: + ctx["max_string_length"] = query.max_string_length + return ctx @classmethod def get_table_rpc_request(cls, query: TableQuery) -> TableRequest: @@ -461,8 +465,9 @@ def process_column_values( final_data: SnubaData, attribute: Any, resolved_column: ResolvedColumn, - **_context_kwargs: Any, + **context_kwargs: Any, ) -> None: + max_string_length: int | None = context_kwargs.get("max_string_length") for index, result in enumerate(column_value.results): result_value: Any if result.is_null: @@ -470,6 +475,17 @@ def process_column_values( else: result_value = anyvalue_to_python(result) result_value = process_value(result_value) + + # Note: post-query truncation may not be our preferred method long-term. + # We may want to set up a function that filters/truncates at the EAP side. + if max_string_length is not None and isinstance(result_value, str): + if len(result_value) > max_string_length: + result_value = result_value[:max_string_length] + "..." + metrics.incr( + "snuba.rpc.process_column_values.truncated", + tags={"field": attribute}, + ) + final_data[index][attribute] = resolved_column.process_column(result_value) @classmethod diff --git a/tests/sentry/snuba/test_rpc_dataset_common.py b/tests/sentry/snuba/test_rpc_dataset_common.py index b9586755bc26..a0549efb7473 100644 --- a/tests/sentry/snuba/test_rpc_dataset_common.py +++ b/tests/sentry/snuba/test_rpc_dataset_common.py @@ -1,7 +1,9 @@ from datetime import datetime, timedelta, timezone +from unittest.mock import MagicMock import pytest from sentry_protos.snuba.v1.downsampled_storage_pb2 import DownsampledStorageConfig +from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeValue from sentry.search.eap.types import SearchResolverConfig from sentry.search.events.types import SnubaParams @@ -14,6 +16,78 @@ from sentry.testutils.pytest.fixtures import django_db_all +def _make_column_value(string_values: list[str]) -> MagicMock: + column_value = MagicMock() + results = [] + for val in string_values: + av = AttributeValue() + av.val_str = val + results.append(av) + column_value.results = results + return column_value + + +def _identity_column() -> MagicMock: + resolved_column = MagicMock() + resolved_column.process_column = lambda v: v + return resolved_column + + +class TestProcessColumnValuesTruncation(TestCase): + def test_truncates_long_strings(self) -> None: + long_str = "x" * 100 + final_data: list[dict] = [{}] + RPCBase.process_column_values( + _make_column_value([long_str]), + final_data, + "attr", + _identity_column(), + max_string_length=10, + ) + assert final_data[0]["attr"] == "x" * 10 + "..." + + def test_no_truncation_without_param(self) -> None: + long_str = "x" * 100 + final_data: list[dict] = [{}] + RPCBase.process_column_values( + _make_column_value([long_str]), + final_data, + "attr", + _identity_column(), + ) + assert final_data[0]["attr"] == long_str + + def test_does_not_truncate_non_string_values(self) -> None: + av = AttributeValue() + av.val_int = 42 + column_value = MagicMock() + column_value.results = [av] + final_data: list[dict] = [{}] + RPCBase.process_column_values( + column_value, + final_data, + "attr", + _identity_column(), + max_string_length=1, + ) + assert final_data[0]["attr"] == 42 + + def test_null_values_are_unchanged(self) -> None: + av = AttributeValue() + av.is_null = True + column_value = MagicMock() + column_value.results = [av] + final_data: list[dict] = [{}] + RPCBase.process_column_values( + column_value, + final_data, + "attr", + _identity_column(), + max_string_length=1, + ) + assert final_data[0]["attr"] is None + + class TestBulkTableQueries(TestCase): def setUp(self) -> None: super().setUp() diff --git a/tests/snuba/api/endpoints/test_organization_events_ourlogs.py b/tests/snuba/api/endpoints/test_organization_events_ourlogs.py index 2411162f46b7..38c730978a20 100644 --- a/tests/snuba/api/endpoints/test_organization_events_ourlogs.py +++ b/tests/snuba/api/endpoints/test_organization_events_ourlogs.py @@ -365,6 +365,61 @@ def test_pagelimit(self) -> None: assert response.status_code == 400 assert response.data["detail"] == "Invalid per_page value. Must be between 1 and 9999." + @pytest.mark.querybuilder + def test_truncate_param(self) -> None: + long_body = "a" * 100 + log = self.create_ourlog( + {"body": long_body}, + timestamp=self.ten_mins_ago, + ) + self.store_eap_items([log]) + response = self.do_request( + { + "field": ["log.body"], + "project": self.project.id, + "dataset": self.dataset, + "truncate": 64, + } + ) + assert response.status_code == 200, response.content + assert response.data["data"][0]["log.body"] == "a" * 64 + "..." + + def test_truncate_param_invalid_type(self) -> None: + response = self.do_request( + { + "field": ["log.body"], + "project": self.project.id, + "dataset": self.dataset, + "truncate": "notanumber", + } + ) + assert response.status_code == 400 + assert response.data["detail"] == "truncate must be a positive integer >= 64" + + def test_truncate_param_zero_value(self) -> None: + response = self.do_request( + { + "field": ["log.body"], + "project": self.project.id, + "dataset": self.dataset, + "truncate": 0, + } + ) + assert response.status_code == 400 + assert response.data["detail"] == "truncate must be a positive integer >= 64" + + def test_truncate_param_small_value(self) -> None: + response = self.do_request( + { + "field": ["log.body"], + "project": self.project.id, + "dataset": self.dataset, + "truncate": 63, + } + ) + assert response.status_code == 400 + assert response.data["detail"] == "truncate must be a positive integer >= 64" + def test_homepage_query(self) -> None: """This query matches the one made on the logs homepage so that we can be sure everything is working at least for the initial load"""