From a3ecbe57f7fa01fe2df76ee55035836cb2fd694a Mon Sep 17 00:00:00 2001 From: Charles Cheng Date: Fri, 3 Jul 2026 15:46:07 +0800 Subject: [PATCH] fix(workflow): preserve non-ASCII characters in LLM agent node input Node input passed to an LLM agent (workflow node input and delegated-task function-call args) was serialized with json.dumps' default ensure_ascii=True, escaping non-Latin characters to \uXXXX. This bloats prompt tokens and degrades model responses for non-English inputs. Serialize with ensure_ascii=False so characters reach the model as-is, matching how the output-schema path already serializes responses. --- src/google/adk/flows/llm_flows/contents.py | 2 +- src/google/adk/workflow/_llm_agent_wrapper.py | 2 +- .../flows/llm_flows/test_contents.py | 38 ++++++++++++++++++ .../workflow/test_llm_agent_as_node.py | 39 +++++++++++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) diff --git a/src/google/adk/flows/llm_flows/contents.py b/src/google/adk/flows/llm_flows/contents.py index 8d1395769f4..6901472b0be 100644 --- a/src/google/adk/flows/llm_flows/contents.py +++ b/src/google/adk/flows/llm_flows/contents.py @@ -360,7 +360,7 @@ def _build_task_input_user_content( try: import json as _json - text = _json.dumps(dict(fc.args)) + text = _json.dumps(dict(fc.args), ensure_ascii=False) except (TypeError, ValueError): text = str(fc.args) parts = [types.Part(text=text)] diff --git a/src/google/adk/workflow/_llm_agent_wrapper.py b/src/google/adk/workflow/_llm_agent_wrapper.py index a4f0dc900ed..4ce47e7f126 100644 --- a/src/google/adk/workflow/_llm_agent_wrapper.py +++ b/src/google/adk/workflow/_llm_agent_wrapper.py @@ -194,7 +194,7 @@ def _node_input_to_content(node_input: Any) -> types.Content: elif isinstance(node_input, BaseModel): text = node_input.model_dump_json() elif isinstance(node_input, (dict, list)): - text = json.dumps(node_input) + text = json.dumps(node_input, ensure_ascii=False) else: text = str(node_input) return types.Content(role='user', parts=[types.Part(text=text)]) diff --git a/tests/unittests/flows/llm_flows/test_contents.py b/tests/unittests/flows/llm_flows/test_contents.py index 8b35a8ba70c..7b0b5e70bab 100644 --- a/tests/unittests/flows/llm_flows/test_contents.py +++ b/tests/unittests/flows/llm_flows/test_contents.py @@ -1598,3 +1598,41 @@ def test_rearrange_async_function_responses_early_returns_when_no_responses(): events ) assert result is events + + +def test_task_input_user_content_preserves_non_ascii(): + """Delegated task input must not escape non-ASCII FC args (issue #6279). + + A chat coordinator delegates to a task sub-agent via a function call; the + task agent's first user turn is rebuilt from the FC args. Escaping non-Latin + characters to ``\\uXXXX`` there bloats prompt tokens and degrades responses. + """ + fc_id = "fc_task_1" + events = [ + Event( + invocation_id="inv1", + author="coordinator", + content=types.Content( + role="model", + parts=[ + types.Part( + function_call=types.FunctionCall( + id=fc_id, + name="delegate", + args={"query": "שלום עולם", "city": "北京"}, + ) + ) + ], + ), + ), + ] + + content = contents._build_task_input_user_content( # pylint: disable=protected-access + events, isolation_scope=fc_id + ) + + assert content is not None and content.parts + text = content.parts[0].text + assert "שלום עולם" in text + assert "北京" in text + assert "\\u" not in text diff --git a/tests/unittests/workflow/test_llm_agent_as_node.py b/tests/unittests/workflow/test_llm_agent_as_node.py index 7b784be4ccd..3adaefb30b7 100644 --- a/tests/unittests/workflow/test_llm_agent_as_node.py +++ b/tests/unittests/workflow/test_llm_agent_as_node.py @@ -234,6 +234,45 @@ async def test_single_turn_input_event_inherits_branch_and_scope( assert event.isolation_scope == 'scope-1' +# --- _node_input_to_content non-ASCII handling --- + + +class TestNodeInputNonAscii: + """Node input must reach the LLM without escaping non-ASCII characters. + + Escaping (e.g. json.dumps' default ensure_ascii=True) turns each non-Latin + character into a ``\\uXXXX`` sequence, which bloats prompt tokens and + degrades model responses for non-English inputs (issue #6279). + """ + + def _text_of(self, node_input: Any) -> str: + from google.adk.workflow._llm_agent_wrapper import _node_input_to_content + + content = _node_input_to_content(node_input) + assert content.parts and content.parts[0].text is not None + return content.parts[0].text + + def test_dict_input_preserves_non_ascii(self): + text = self._text_of({'query': 'שלום עולם', 'city': '北京'}) + assert 'שלום עולם' in text + assert '北京' in text + assert '\\u' not in text + + def test_list_input_preserves_non_ascii(self): + text = self._text_of(['שלום', '你好']) + assert 'שלום' in text + assert '你好' in text + assert '\\u' not in text + + def test_base_model_input_preserves_non_ascii(self): + class _Payload(BaseModel): + topic: str + + text = self._text_of(_Payload(topic='עברית 中文')) + assert 'עברית 中文' in text + assert '\\u' not in text + + # --- build_node auto-wrapping ---