Skip to content

Commit 762acd4

Browse files
authored
fix summary language respects user primary language setting (#6470)
## Summary - Summaries were generated in the conversation language instead of the user's primary language setting - Added `output_language_code` param to `get_transcript_structure`, `get_reprocess_transcript_structure`, `extract_action_items`, and `get_message_structure` - `_get_structured` now fetches the user's language preference and passes it as the output language — falls back to conversation language if no preference is set ## Demo https://github.com/user-attachments/assets/3a9b793f-0285-43b6-b45a-3625a3f66b79 🤖 Generated with [Claude Code](https://claude.com/claude-code)
2 parents f668af8 + 75068e0 commit 762acd4

4 files changed

Lines changed: 44 additions & 15 deletions

File tree

app/pubspec.lock

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,10 @@ packages:
253253
dependency: transitive
254254
description:
255255
name: characters
256-
sha256: faf38497bda5ead2a8c7615f4f7939df04333478bf32e4173fcb06d428b5716b
256+
sha256: f71061c654a3380576a52b451dd5532377954cf9dbd272a78fc8479606670803
257257
url: "https://pub.dev"
258258
source: hosted
259-
version: "1.4.1"
259+
version: "1.4.0"
260260
checked_yaml:
261261
dependency: transitive
262262
description:
@@ -1457,18 +1457,18 @@ packages:
14571457
dependency: transitive
14581458
description:
14591459
name: matcher
1460-
sha256: "12956d0ad8390bbcc63ca2e1469c0619946ccb52809807067a7020d57e647aa6"
1460+
sha256: dc58c723c3c24bf8d3e2d3ad3f2f9d7bd9cf43ec6feaa64181775e60190153f2
14611461
url: "https://pub.dev"
14621462
source: hosted
1463-
version: "0.12.18"
1463+
version: "0.12.17"
14641464
material_color_utilities:
14651465
dependency: transitive
14661466
description:
14671467
name: material_color_utilities
1468-
sha256: "9c337007e82b1889149c82ed242ed1cb24a66044e30979c44912381e9be4c48b"
1468+
sha256: f7142bb1154231d7ea5f96bc7bde4bda2a0945d2806bb11670e30b850d56bdec
14691469
url: "https://pub.dev"
14701470
source: hosted
1471-
version: "0.13.0"
1471+
version: "0.11.1"
14721472
mcumgr_flutter:
14731473
dependency: "direct main"
14741474
description:
@@ -2219,10 +2219,10 @@ packages:
22192219
dependency: transitive
22202220
description:
22212221
name: test_api
2222-
sha256: "19a78f63e83d3a61f00826d09bc2f60e191bf3504183c001262be6ac75589fb8"
2222+
sha256: ab2726c1a94d3176a45960b6234466ec367179b87dd74f1611adb1f3b5fb9d55
22232223
url: "https://pub.dev"
22242224
source: hosted
2225-
version: "0.7.8"
2225+
version: "0.7.7"
22262226
time:
22272227
dependency: transitive
22282228
description:

backend/utils/conversations/process_conversation.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def _get_structured(
8585
) -> Tuple[Structured, bool]:
8686
try:
8787
tz = notification_db.get_user_time_zone(uid)
88+
user_language = users_db.get_user_language_preference(uid) or language_code
8889

8990
# Fetch existing action items from past 2 days for deduplication
9091
existing_action_items = None
@@ -113,6 +114,7 @@ def _get_structured(
113114
language_code,
114115
tz,
115116
calendar_meeting_context=calendar_context,
117+
output_language_code=user_language,
116118
)
117119
with track_usage(uid, Features.CONVERSATION_ACTION_ITEMS):
118120
structured.action_items = extract_action_items(
@@ -122,13 +124,19 @@ def _get_structured(
122124
tz,
123125
existing_action_items=existing_action_items,
124126
calendar_meeting_context=calendar_context,
127+
output_language_code=user_language,
125128
)
126129
return structured, False
127130

128131
if conversation.text_source == ExternalIntegrationConversationSource.message:
129132
with track_usage(uid, Features.CONVERSATION_STRUCTURE):
130133
structured = get_message_structure(
131-
conversation.text, conversation.started_at, language_code, tz, conversation.text_source_spec
134+
conversation.text,
135+
conversation.started_at,
136+
language_code,
137+
tz,
138+
conversation.text_source_spec,
139+
output_language_code=user_language,
132140
)
133141
return structured, False
134142

@@ -153,6 +161,7 @@ def _get_structured(
153161
tz,
154162
conversation.structured.title,
155163
photos=conversation.photos,
164+
output_language_code=user_language,
156165
)
157166
with track_usage(uid, Features.CONVERSATION_ACTION_ITEMS):
158167
structured.action_items = extract_action_items(
@@ -162,6 +171,7 @@ def _get_structured(
162171
tz,
163172
photos=conversation.photos,
164173
existing_action_items=existing_action_items,
174+
output_language_code=user_language,
165175
)
166176
return structured, False
167177

@@ -185,6 +195,7 @@ def _get_structured(
185195
tz,
186196
photos=conversation.photos,
187197
calendar_meeting_context=calendar_context,
198+
output_language_code=user_language,
188199
)
189200
with track_usage(uid, Features.CONVERSATION_ACTION_ITEMS):
190201
structured.action_items = extract_action_items(
@@ -195,6 +206,7 @@ def _get_structured(
195206
photos=conversation.photos,
196207
existing_action_items=existing_action_items,
197208
calendar_meeting_context=calendar_context,
209+
output_language_code=user_language,
198210
)
199211
return structured, False
200212
except Exception as e:

backend/utils/llm/conversation_processing.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ def extract_action_items(
306306
photos: List[ConversationPhoto] = None,
307307
existing_action_items: List[dict] = None,
308308
calendar_meeting_context: 'CalendarMeetingContext' = None,
309+
output_language_code: str = None,
309310
) -> List[ActionItem]:
310311
"""
311312
Dedicated function to extract action items from conversation content.
@@ -538,9 +539,10 @@ def extract_action_items(
538539
' ', ''
539540
).strip()
540541

542+
response_language = output_language_code or language_code
541543
action_items_parser = PydanticOutputParser(pydantic_object=ActionItemsExtraction)
542544
# Second system message: conversation context + existing items (dynamic, per-conversation)
543-
context_message = 'The content language is {language_code}. Use the same language {language_code} for your response.\n\nContent:\n{conversation_context}{existing_items_context}'
545+
context_message = 'The content language is {language_code}. You MUST respond entirely in {response_language}.\n\nContent:\n{conversation_context}{existing_items_context}'
544546
prompt = ChatPromptTemplate.from_messages([('system', instructions_text), ('system', context_message)])
545547
chain = prompt | llm_medium_experiment.bind(prompt_cache_key="omi-extract-actions") | action_items_parser
546548

@@ -552,6 +554,7 @@ def extract_action_items(
552554
'conversation_context': conversation_context,
553555
'format_instructions': action_items_parser.get_format_instructions(),
554556
'language_code': language_code,
557+
'response_language': response_language,
555558
'started_at': started_at.isoformat(),
556559
'current_time': current_time.isoformat(),
557560
'tz': tz,
@@ -589,14 +592,17 @@ def get_transcript_structure(
589592
tz: str,
590593
photos: List[ConversationPhoto] = None,
591594
calendar_meeting_context: 'CalendarMeetingContext' = None,
595+
output_language_code: str = None,
592596
) -> Structured:
593597
conversation_context = _build_conversation_context(transcript, photos, calendar_meeting_context)
594598
if not conversation_context:
595599
return Structured() # Should be caught by discard logic, but as a safeguard.
596600

601+
response_language = output_language_code or language_code
602+
597603
# First system message: task-specific instructions (static prefix enables cross-conversation caching)
604+
# NOTE: language instructions are in context_message (second message) to keep this prefix fully static.
598605
instructions_text = '''You are an expert content analyzer. Your task is to analyze the provided content (which could be a transcript, a series of photo descriptions from a wearable camera, or both) and provide structure and clarity.
599-
The content language is {language_code}. Use the same language {language_code} for your response.
600606
601607
CRITICAL: If CALENDAR MEETING CONTEXT is provided with participant names, you MUST use those names:
602608
- The conversation DEFINITELY happened between the named participants
@@ -641,7 +647,7 @@ def get_transcript_structure(
641647
).strip()
642648

643649
# Second system message: conversation context (dynamic, per-conversation)
644-
context_message = 'Content:\n{conversation_context}'
650+
context_message = 'The content language is {language_code}. You MUST respond entirely in {response_language}.\n\nContent:\n{conversation_context}'
645651
prompt = ChatPromptTemplate.from_messages([('system', instructions_text), ('system', context_message)])
646652
chain = prompt | llm_medium_experiment.bind(prompt_cache_key="omi-transcript-structure") | parser
647653

@@ -650,6 +656,7 @@ def get_transcript_structure(
650656
'conversation_context': conversation_context,
651657
'format_instructions': parser.get_format_instructions(),
652658
'language_code': language_code,
659+
'response_language': response_language,
653660
'started_at': started_at.isoformat(),
654661
'tz': tz,
655662
}
@@ -670,6 +677,7 @@ def get_reprocess_transcript_structure(
670677
tz: str,
671678
title: str,
672679
photos: List[ConversationPhoto] = None,
680+
output_language_code: str = None,
673681
) -> Structured:
674682
context_parts = []
675683
if transcript and transcript.strip():
@@ -684,9 +692,10 @@ def get_reprocess_transcript_structure(
684692
return Structured()
685693

686694
full_context = "\n\n".join(context_parts)
695+
response_language = output_language_code or language_code
687696

688697
prompt_text = '''You are an expert content analyzer. Your task is to analyze the provided content (which could be a transcript, a series of photo descriptions from a wearable camera, or both) and provide structure and clarity.
689-
The content language is {language_code}. Use the same language {language_code} for your response.
698+
The content language is {language_code}. You MUST respond entirely in {response_language}.
690699
691700
For the title, use ```{title}```, if it is empty, use the main topic of the content.
692701
For the overview, condense the content into a summary with the main topics discussed or scenes observed, making sure to capture the key points and important details.
@@ -732,6 +741,7 @@ def get_reprocess_transcript_structure(
732741
'title': title,
733742
'format_instructions': parser.get_format_instructions(),
734743
'language_code': language_code,
744+
'response_language': response_language,
735745
'started_at': started_at.isoformat(),
736746
'tz': tz,
737747
}

backend/utils/llm/external_integrations.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,17 @@
1515

1616

1717
def get_message_structure(
18-
text: str, started_at: datetime, language_code: str, tz: str, text_source_spec: str = None
18+
text: str,
19+
started_at: datetime,
20+
language_code: str,
21+
tz: str,
22+
text_source_spec: str = None,
23+
output_language_code: str = None,
1924
) -> Structured:
25+
response_language = output_language_code or language_code
2026
prompt_text = '''
2127
You are an expert message analyzer. Your task is to analyze the message content and provide structure and clarity.
22-
The message language is {language_code}. Use the same language {language_code} for your response.
28+
The message language is {language_code}. You MUST respond entirely in {response_language}.
2329
2430
For the title, create a concise title that captures the main topic of the message.
2531
For the overview, summarize the message with the main points discussed, make sure to capture the key information and important details.
@@ -38,6 +44,7 @@ def get_message_structure(
3844
response = chain.invoke(
3945
{
4046
'language_code': language_code,
47+
'response_language': response_language,
4148
'started_at': started_at.isoformat(),
4249
'tz': tz,
4350
'text': text,

0 commit comments

Comments
 (0)