Skip to content

Commit 6710943

Browse files
committed
fix 3243
Prevent IOBuf TLS block-pool corruption on duplicate returns Returning a block that is already cached in TLS can create a portal_next cycle or drop a reference still owned by the pool when the per-thread cache hits its limit. That corrupts the free list and can hang later traversals such as share_tls_block() and remove_tls_block_chain(). Check the whole TLS chain before reinserting single blocks or chains, stop at the first overlap while preserving any unique prefix, and skip dec_ref() for blocks that are already owned by TLS on the threshold path. Add regression tests for head/non-head duplicate returns, partial overlap, and the IOBufAsZeroCopyOutputStream::BackUp() reproduction path. Fixes #3243.
1 parent d5dab28 commit 6710943

3 files changed

Lines changed: 382 additions & 7 deletions

File tree

src/butil/iobuf.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -323,31 +323,47 @@ IOBuf::Block* share_tls_block() {
323323
// NOTE: b MUST be non-NULL and all blocks linked SHOULD not be full.
324324
void release_tls_block_chain(IOBuf::Block* b) {
325325
TLSData& tls_data = g_tls_data;
326-
size_t n = 0;
326+
size_t n_hit_threshold = 0;
327327
if (tls_data.num_blocks >= max_blocks_per_thread()) {
328328
do {
329-
++n;
330329
IOBuf::Block* const saved_next = b->u.portal_next;
331-
b->dec_ref();
330+
// If b is already cached in TLS, dec_ref() would drop a reference
331+
// still owned by the TLS list and leave a dangling pointer behind.
332+
if (!is_in_tls_block_chain(tls_data.block_head, b)) {
333+
b->dec_ref();
334+
++n_hit_threshold;
335+
}
332336
b = saved_next;
333337
} while (b);
334-
g_num_hit_tls_threshold.fetch_add(n, butil::memory_order_relaxed);
338+
g_num_hit_tls_threshold.fetch_add(
339+
n_hit_threshold, butil::memory_order_relaxed);
335340
return;
336341
}
337342
IOBuf::Block* first_b = b;
338343
IOBuf::Block* last_b = NULL;
344+
size_t n_to_tls = 0;
339345
do {
340-
++n;
341346
CHECK(!b->full());
347+
// Guard against overlap with any node already cached in TLS, not just
348+
// block_head. Returning X into H -> X would create a 2-node cycle
349+
// X -> H -> X. If the overlap happens after a unique prefix such as
350+
// A -> X, keep the prefix by linking A directly to the old TLS head.
351+
if (is_in_tls_block_chain(tls_data.block_head, b)) {
352+
break;
353+
}
354+
++n_to_tls;
355+
last_b = b;
342356
if (b->u.portal_next == NULL) {
343-
last_b = b;
344357
break;
345358
}
346359
b = b->u.portal_next;
347360
} while (true);
361+
if (last_b == NULL) {
362+
return;
363+
}
348364
last_b->u.portal_next = tls_data.block_head;
349365
tls_data.block_head = first_b;
350-
tls_data.num_blocks += n;
366+
tls_data.num_blocks += n_to_tls;
351367
if (!tls_data.registered) {
352368
tls_data.registered = true;
353369
butil::thread_atexit(remove_tls_block_chain);

src/butil/iobuf_inl.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,12 +603,28 @@ void remove_tls_block_chain();
603603

604604
IOBuf::Block* acquire_tls_block();
605605

606+
static inline bool is_in_tls_block_chain(IOBuf::Block* head, IOBuf::Block* b) {
607+
for (IOBuf::Block* p = head; p != NULL; p = p->u.portal_next) {
608+
if (p == b) {
609+
return true;
610+
}
611+
}
612+
return false;
613+
}
614+
606615
// Return one block to TLS.
607616
inline void release_tls_block(IOBuf::Block* b) {
608617
if (!b) {
609618
return;
610619
}
611620
TLSData *tls_data = get_g_tls_data();
621+
// Guard against duplicate return anywhere in the TLS list. Checking only
622+
// block_head misses cases like H -> X where returning X again would create
623+
// a 2-node cycle X -> H -> X. The TLS list is short (soft-limited), so a
624+
// linear scan is cheap here.
625+
if (is_in_tls_block_chain(tls_data->block_head, b)) {
626+
return;
627+
}
612628
if (b->full()) {
613629
b->dec_ref();
614630
} else if (tls_data->num_blocks >= max_blocks_per_thread()) {

0 commit comments

Comments
 (0)