Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions src/sentry/api/endpoints/organization_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,16 @@ def get(self, request: Request, organization: Organization) -> Response:

use_aggregate_conditions = request.GET.get("allowAggregateConditions", "1") == "1"

max_string_length: int | None = None
truncate_str = request.GET.get("truncate")
if truncate_str is not None:
try:
max_string_length = int(truncate_str)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh ok, this one does cast int here, so at least backend side is protected, sweet

if max_string_length < 64:
raise ValueError
except ValueError:
return Response({"detail": "truncate must be a positive integer >= 64"}, status=400)

def _data_fn(
dataset_query: DatasetQuery,
offset: int,
Expand Down Expand Up @@ -600,6 +610,11 @@ def flex_time_data_fn(limit, page_token):
sampling_mode=snuba_params.sampling_mode,
page_token=page_token,
additional_queries=additional_queries,
**(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you just pass this directly to run_table_query instead of a kwargs or put it in the config?

{"max_string_length": max_string_length}
if scoped_dataset == OurLogs
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we either error at the API level when this is passed on non logs datasets, or just enable it everywhere?

else {}
),
)

return EAPPageTokenPaginator(data_fn=flex_time_data_fn), EAPPageTokenCursor
Expand All @@ -620,6 +635,11 @@ def data_fn(offset, limit):
config=config,
sampling_mode=snuba_params.sampling_mode,
additional_queries=additional_queries,
**(
{"max_string_length": max_string_length}
if scoped_dataset == OurLogs
else {}
),
)

if save_discover_dataset_decision and discover_saved_query_id:
Expand Down
2 changes: 2 additions & 0 deletions src/sentry/snuba/ourlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def run_table_query(
search_resolver: SearchResolver | None = None,
page_token: PageToken | None = None,
additional_queries: AdditionalQueries | None = None,
max_string_length: int | None = None,
) -> EAPResponse:
"""timestamp_precise is always displayed in the UI in lieu of timestamp but since the TraceItem table isn't a DateTime64
so we need to always order by it regardless of what is actually passed to the orderby.
Expand Down Expand Up @@ -78,6 +79,7 @@ def run_table_query(
),
page_token=page_token,
additional_queries=additional_queries,
max_string_length=max_string_length,
),
debug=params.debug,
)
22 changes: 19 additions & 3 deletions src/sentry/snuba/rpc_dataset_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
from sentry.search.events.fields import get_function_alias, is_function
from sentry.search.events.types import SAMPLING_MODES, EventsMeta, SnubaData, SnubaParams
from sentry.snuba.discover import OTHER_KEY, create_groupby_dict, create_result_key, zerofill
from sentry.utils import json, snuba_rpc
from sentry.utils import json, metrics, snuba_rpc
from sentry.utils.snuba import SnubaTSResult, process_value

logger = logging.getLogger("sentry.snuba.spans_rpc")
Expand Down Expand Up @@ -91,6 +91,7 @@ class TableQuery:
page_token: PageToken | None = None
additional_queries: AdditionalQueries | None = None
extra_conditions: TraceItemFilter | None = None
max_string_length: int | None = None


@dataclass
Expand Down Expand Up @@ -238,10 +239,13 @@ def filter_project(cls, project: Project) -> bool:

@classmethod
def build_rpc_table_row_context(cls, query: TableQuery) -> dict[str, Any]:
return {
ctx: dict[str, Any] = {
"project_ids": list(query.resolver.params.project_ids),
"organization_id": query.resolver.params.organization_id,
}
if query.max_string_length is not None:
ctx["max_string_length"] = query.max_string_length
return ctx

@classmethod
def get_table_rpc_request(cls, query: TableQuery) -> TableRequest:
Expand Down Expand Up @@ -461,15 +465,27 @@ def process_column_values(
final_data: SnubaData,
attribute: Any,
resolved_column: ResolvedColumn,
**_context_kwargs: Any,
**context_kwargs: Any,
) -> None:
max_string_length: int | None = context_kwargs.get("max_string_length")
for index, result in enumerate(column_value.results):
result_value: Any
if result.is_null:
result_value = None
else:
result_value = anyvalue_to_python(result)
result_value = process_value(result_value)

# Note: post-query truncation may not be our preferred method long-term.
# We may want to set up a function that filters/truncates at the EAP side.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my own learning, why is this not the preferred approach for this PR, time/scope?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please enjoy this 84-reply thread 😄 https://sentry.slack.com/archives/C08CR80T3RB/p1778786746769669

(Kevan and Will can speak to this better than me)

if max_string_length is not None and isinstance(result_value, str):
if len(result_value) > max_string_length:
result_value = result_value[:max_string_length] + "..."
Comment thread
JoshuaKGoldberg marked this conversation as resolved.
metrics.incr(
"snuba.rpc.process_column_values.truncated",
tags={"field": attribute},
)

final_data[index][attribute] = resolved_column.process_column(result_value)

@classmethod
Expand Down
74 changes: 74 additions & 0 deletions tests/sentry/snuba/test_rpc_dataset_common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock

import pytest
from sentry_protos.snuba.v1.downsampled_storage_pb2 import DownsampledStorageConfig
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeValue

from sentry.search.eap.types import SearchResolverConfig
from sentry.search.events.types import SnubaParams
Expand All @@ -14,6 +16,78 @@
from sentry.testutils.pytest.fixtures import django_db_all


def _make_column_value(string_values: list[str]) -> MagicMock:
column_value = MagicMock()
results = []
for val in string_values:
av = AttributeValue()
av.val_str = val
results.append(av)
column_value.results = results
return column_value


def _identity_column() -> MagicMock:
resolved_column = MagicMock()
resolved_column.process_column = lambda v: v
return resolved_column


class TestProcessColumnValuesTruncation(TestCase):
def test_truncates_long_strings(self) -> None:
long_str = "x" * 100
final_data: list[dict] = [{}]
RPCBase.process_column_values(
_make_column_value([long_str]),
final_data,
"attr",
_identity_column(),
max_string_length=10,
)
assert final_data[0]["attr"] == "x" * 10 + "..."

def test_no_truncation_without_param(self) -> None:
long_str = "x" * 100
final_data: list[dict] = [{}]
RPCBase.process_column_values(
_make_column_value([long_str]),
final_data,
"attr",
_identity_column(),
)
assert final_data[0]["attr"] == long_str

def test_does_not_truncate_non_string_values(self) -> None:
av = AttributeValue()
av.val_int = 42
column_value = MagicMock()
column_value.results = [av]
final_data: list[dict] = [{}]
RPCBase.process_column_values(
column_value,
final_data,
"attr",
_identity_column(),
max_string_length=1,
)
assert final_data[0]["attr"] == 42

def test_null_values_are_unchanged(self) -> None:
av = AttributeValue()
av.is_null = True
column_value = MagicMock()
column_value.results = [av]
final_data: list[dict] = [{}]
RPCBase.process_column_values(
column_value,
final_data,
"attr",
_identity_column(),
max_string_length=1,
)
assert final_data[0]["attr"] is None


class TestBulkTableQueries(TestCase):
def setUp(self) -> None:
super().setUp()
Expand Down
55 changes: 55 additions & 0 deletions tests/snuba/api/endpoints/test_organization_events_ourlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,61 @@ def test_pagelimit(self) -> None:
assert response.status_code == 400
assert response.data["detail"] == "Invalid per_page value. Must be between 1 and 9999."

@pytest.mark.querybuilder
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this test needs to be part of the smoke suite

def test_truncate_param(self) -> None:
long_body = "a" * 100
log = self.create_ourlog(
{"body": long_body},
timestamp=self.ten_mins_ago,
)
self.store_eap_items([log])
response = self.do_request(
{
"field": ["log.body"],
"project": self.project.id,
"dataset": self.dataset,
"truncate": 64,
}
)
assert response.status_code == 200, response.content
Comment thread
JoshuaKGoldberg marked this conversation as resolved.
assert response.data["data"][0]["log.body"] == "a" * 64 + "..."

def test_truncate_param_invalid_type(self) -> None:
response = self.do_request(
{
"field": ["log.body"],
"project": self.project.id,
"dataset": self.dataset,
"truncate": "notanumber",
}
)
assert response.status_code == 400
assert response.data["detail"] == "truncate must be a positive integer >= 64"

def test_truncate_param_zero_value(self) -> None:
response = self.do_request(
{
"field": ["log.body"],
"project": self.project.id,
"dataset": self.dataset,
"truncate": 0,
}
)
assert response.status_code == 400
assert response.data["detail"] == "truncate must be a positive integer >= 64"

def test_truncate_param_small_value(self) -> None:
response = self.do_request(
{
"field": ["log.body"],
"project": self.project.id,
"dataset": self.dataset,
"truncate": 63,
}
)
assert response.status_code == 400
assert response.data["detail"] == "truncate must be a positive integer >= 64"

def test_homepage_query(self) -> None:
"""This query matches the one made on the logs homepage so that we can be sure everything is working at least
for the initial load"""
Expand Down
Loading