-
-
Notifications
You must be signed in to change notification settings - Fork 4.7k
feat(ourlogs): add truncate RPC parameter for logs events query
#116008
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
6a37c63
cec835d
5812a12
1561bf3
b43d8a4
215fa68
5e7c025
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -237,6 +237,16 @@ def get(self, request: Request, organization: Organization) -> Response: | |
|
|
||
| use_aggregate_conditions = request.GET.get("allowAggregateConditions", "1") == "1" | ||
|
|
||
| max_string_length: int | None = None | ||
| truncate_str = request.GET.get("truncate") | ||
| if truncate_str is not None: | ||
| try: | ||
| max_string_length = int(truncate_str) | ||
| if max_string_length < 64: | ||
| raise ValueError | ||
| except ValueError: | ||
| return Response({"detail": "truncate must be a positive integer >= 64"}, status=400) | ||
|
|
||
| def _data_fn( | ||
| dataset_query: DatasetQuery, | ||
| offset: int, | ||
|
|
@@ -600,6 +610,11 @@ def flex_time_data_fn(limit, page_token): | |
| sampling_mode=snuba_params.sampling_mode, | ||
| page_token=page_token, | ||
| additional_queries=additional_queries, | ||
| **( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you just pass this directly to |
||
| {"max_string_length": max_string_length} | ||
| if scoped_dataset == OurLogs | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we either error at the API level when this is passed on non logs datasets, or just enable it everywhere? |
||
| else {} | ||
| ), | ||
| ) | ||
|
|
||
| return EAPPageTokenPaginator(data_fn=flex_time_data_fn), EAPPageTokenCursor | ||
|
|
@@ -620,6 +635,11 @@ def data_fn(offset, limit): | |
| config=config, | ||
| sampling_mode=snuba_params.sampling_mode, | ||
| additional_queries=additional_queries, | ||
| **( | ||
| {"max_string_length": max_string_length} | ||
| if scoped_dataset == OurLogs | ||
| else {} | ||
| ), | ||
| ) | ||
|
|
||
| if save_discover_dataset_decision and discover_saved_query_id: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,7 +62,7 @@ | |
| from sentry.search.events.fields import get_function_alias, is_function | ||
| from sentry.search.events.types import SAMPLING_MODES, EventsMeta, SnubaData, SnubaParams | ||
| from sentry.snuba.discover import OTHER_KEY, create_groupby_dict, create_result_key, zerofill | ||
| from sentry.utils import json, snuba_rpc | ||
| from sentry.utils import json, metrics, snuba_rpc | ||
| from sentry.utils.snuba import SnubaTSResult, process_value | ||
|
|
||
| logger = logging.getLogger("sentry.snuba.spans_rpc") | ||
|
|
@@ -91,6 +91,7 @@ class TableQuery: | |
| page_token: PageToken | None = None | ||
| additional_queries: AdditionalQueries | None = None | ||
| extra_conditions: TraceItemFilter | None = None | ||
| max_string_length: int | None = None | ||
|
|
||
|
|
||
| @dataclass | ||
|
|
@@ -238,10 +239,13 @@ def filter_project(cls, project: Project) -> bool: | |
|
|
||
| @classmethod | ||
| def build_rpc_table_row_context(cls, query: TableQuery) -> dict[str, Any]: | ||
| return { | ||
| ctx: dict[str, Any] = { | ||
| "project_ids": list(query.resolver.params.project_ids), | ||
| "organization_id": query.resolver.params.organization_id, | ||
| } | ||
| if query.max_string_length is not None: | ||
| ctx["max_string_length"] = query.max_string_length | ||
| return ctx | ||
|
|
||
| @classmethod | ||
| def get_table_rpc_request(cls, query: TableQuery) -> TableRequest: | ||
|
|
@@ -461,15 +465,27 @@ def process_column_values( | |
| final_data: SnubaData, | ||
| attribute: Any, | ||
| resolved_column: ResolvedColumn, | ||
| **_context_kwargs: Any, | ||
| **context_kwargs: Any, | ||
| ) -> None: | ||
| max_string_length: int | None = context_kwargs.get("max_string_length") | ||
| for index, result in enumerate(column_value.results): | ||
| result_value: Any | ||
| if result.is_null: | ||
| result_value = None | ||
| else: | ||
| result_value = anyvalue_to_python(result) | ||
| result_value = process_value(result_value) | ||
|
|
||
| # Note: post-query truncation may not be our preferred method long-term. | ||
| # We may want to set up a function that filters/truncates at the EAP side. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For my own learning, why is this not the preferred approach for this PR, time/scope?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please enjoy this 84-reply thread 😄 https://sentry.slack.com/archives/C08CR80T3RB/p1778786746769669 (Kevan and Will can speak to this better than me) |
||
| if max_string_length is not None and isinstance(result_value, str): | ||
| if len(result_value) > max_string_length: | ||
| result_value = result_value[:max_string_length] + "..." | ||
|
JoshuaKGoldberg marked this conversation as resolved.
|
||
| metrics.incr( | ||
| "snuba.rpc.process_column_values.truncated", | ||
| tags={"field": attribute}, | ||
| ) | ||
|
|
||
| final_data[index][attribute] = resolved_column.process_column(result_value) | ||
|
|
||
| @classmethod | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -365,6 +365,61 @@ def test_pagelimit(self) -> None: | |
| assert response.status_code == 400 | ||
| assert response.data["detail"] == "Invalid per_page value. Must be between 1 and 9999." | ||
|
|
||
| @pytest.mark.querybuilder | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this test needs to be part of the smoke suite |
||
| def test_truncate_param(self) -> None: | ||
| long_body = "a" * 100 | ||
| log = self.create_ourlog( | ||
| {"body": long_body}, | ||
| timestamp=self.ten_mins_ago, | ||
| ) | ||
| self.store_eap_items([log]) | ||
| response = self.do_request( | ||
| { | ||
| "field": ["log.body"], | ||
| "project": self.project.id, | ||
| "dataset": self.dataset, | ||
| "truncate": 64, | ||
| } | ||
| ) | ||
| assert response.status_code == 200, response.content | ||
|
JoshuaKGoldberg marked this conversation as resolved.
|
||
| assert response.data["data"][0]["log.body"] == "a" * 64 + "..." | ||
|
|
||
| def test_truncate_param_invalid_type(self) -> None: | ||
| response = self.do_request( | ||
| { | ||
| "field": ["log.body"], | ||
| "project": self.project.id, | ||
| "dataset": self.dataset, | ||
| "truncate": "notanumber", | ||
| } | ||
| ) | ||
| assert response.status_code == 400 | ||
| assert response.data["detail"] == "truncate must be a positive integer >= 64" | ||
|
|
||
| def test_truncate_param_zero_value(self) -> None: | ||
| response = self.do_request( | ||
| { | ||
| "field": ["log.body"], | ||
| "project": self.project.id, | ||
| "dataset": self.dataset, | ||
| "truncate": 0, | ||
| } | ||
| ) | ||
| assert response.status_code == 400 | ||
| assert response.data["detail"] == "truncate must be a positive integer >= 64" | ||
|
|
||
| def test_truncate_param_small_value(self) -> None: | ||
| response = self.do_request( | ||
| { | ||
| "field": ["log.body"], | ||
| "project": self.project.id, | ||
| "dataset": self.dataset, | ||
| "truncate": 63, | ||
| } | ||
| ) | ||
| assert response.status_code == 400 | ||
| assert response.data["detail"] == "truncate must be a positive integer >= 64" | ||
|
|
||
| def test_homepage_query(self) -> None: | ||
| """This query matches the one made on the logs homepage so that we can be sure everything is working at least | ||
| for the initial load""" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ahh ok, this one does cast int here, so at least backend side is protected, sweet