Skip to content

Commit 429bf37

Browse files
vNeeL-codeGeminiclaude
committed
Hotfix: LiteRT regex emoji detokenizer & Release compile
Co-authored-by: Gemini <gemini@google.com> Co-authored-by: Claude <claude@anthropic.com>
1 parent 21657a6 commit 429bf37

1 file changed

Lines changed: 22 additions & 3 deletions

File tree

app/src/main/kotlin/com/gemma/api/GemmaEngine.kt

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ class GemmaEngine(private val context: Context) {
170170
continuation.resume("(・_・ヾ I... have no words")
171171
}
172172
} else {
173-
val cleaned = truncateRepetition(response)
173+
val cleaned = truncateRepetition(decodeHexTokens(response))
174174
Timber.d("Response complete: ${cleaned.take(50)}...")
175175
if (continuation.isActive) {
176176
continuation.resume(cleaned)
@@ -252,7 +252,7 @@ class GemmaEngine(private val context: Context) {
252252
}
253253

254254
override fun onDone() {
255-
onComplete(truncateRepetition(fullResponse))
255+
onComplete(truncateRepetition(decodeHexTokens(fullResponse)))
256256
}
257257

258258
override fun onError(throwable: Throwable) {
@@ -406,7 +406,7 @@ class GemmaEngine(private val context: Context) {
406406
if (continuation.isActive) {
407407
continuation.resume(
408408
if (response.isBlank()) "(empty response)"
409-
else truncateRepetition(response)
409+
else truncateRepetition(decodeHexTokens(response))
410410
)
411411
}
412412
}
@@ -435,6 +435,25 @@ class GemmaEngine(private val context: Context) {
435435
}
436436
}
437437

438+
/**
439+
* Decode literal byte tokens (e.g. <0xF0><0x9F><...>) into proper UTF-8 strings.
440+
* This fixes instances where LiteRT-LM's detokenizer fails on emojis or non-ascii sequences.
441+
*/
442+
private fun decodeHexTokens(response: String): String {
443+
val regex = """(<0x[0-9A-Fa-f]{2}>)+""".toRegex()
444+
return regex.replace(response) { match ->
445+
try {
446+
val hexTokens = match.value.split("<0x")
447+
.filter { it.isNotBlank() }
448+
.map { it.replace(">", "") }
449+
val bytes = hexTokens.map { it.toInt(16).toByte() }.toByteArray()
450+
String(bytes, Charsets.UTF_8)
451+
} catch (e: Exception) {
452+
match.value
453+
}
454+
}
455+
}
456+
438457
fun cleanup() {
439458
runCatching {
440459
synchronized(sessionLock) {

0 commit comments

Comments
 (0)