Skip to content

Commit 87a587f

Browse files
committed
add trimkv
1 parent 6427a69 commit 87a587f

1 file changed

Lines changed: 12 additions & 1 deletion

File tree

config/publications.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,17 @@ export interface Publication {
2020
}
2121

2222
export const publications: Publication[] = [
23+
{
24+
title: "Cache What Lasts: Token Retention for Memory-Bounded KV Cache in LLMs",
25+
authors: "Ngoc Bui, Shubham Sharma, Simran Lamba, Saumitra Mishra, Rex Ying",
26+
venue: "ICLR 2026",
27+
page: "trimkv",
28+
code: "https://github.com/ngocbh/trimkv",
29+
paper: "https://arxiv.org/abs/2512.03324",
30+
abstract: "We propose TRIM-KV, a learnable KV cache eviction method for long-context and long-horizon LLM inference. Instead of relying on recent attention as a proxy for importance, TRIM-KV predicts each token’s intrinsic long-term utility at creation time using a lightweight retention gate whose score decays over time. Under a fixed memory budget, the model evicts tokens with the lowest retention scores, preserving the most useful context with negligible inference overhead.",
31+
impact: "TRIM-KV reframes KV cache eviction as a trainable memory-retention problem rather than a hand-crafted heuristic. It consistently improves memory-bounded LLM inference across reasoning, procedural generation, conversational memory, and long-context understanding benchmarks, often outperforming stronger eviction baselines and in some cases even full-cache inference, while also exposing interpretable token-retention patterns.",
32+
tags: [Tag.GenerativeModel],
33+
},
2334
{
2435
title: "HEIST: A Graph Foundation Model for Spatial Transcriptomics and Proteomics Data",
2536
authors: "Hiren Madhu, João Felipe Rocha, Tinglin Huang, Siddharth Viswanath, Smita Krishnaswamy, Rex Ying",
@@ -84,7 +95,7 @@ export const publications: Publication[] = [
8495
paper: "https://arxiv.org/abs/2504.05019",
8596
abstract: "We tackle the challenge of simulating diverse human behaviors using large language models (LLMs), which often struggle to reflect the variability across individuals and subpopulations. We introduce Mixture of Personas (MoP), a probabilistic prompting approach that models population diversity through a contextual mixture of persona-based language model agents.",
8697
impact: "Our work shows that probabilistic persona modeling offers a powerful mechanism for capturing population-level diversity in LLM simulations, opening up new possibilities for social science research, data augmentation.",
87-
tags: [],
98+
tags: [Tag.Applications],
8899
},
89100
{
90101
title: "Learning Along the Arrow of Time: Hyperbolic Geometry for Backward-Compatible Representation Learning",

0 commit comments

Comments
 (0)