Re: [PR] Fix IOBuf TLS block pool self-loop caused by double-return (#3243) (brpc)

via GitHub Sun, 15 Mar 2026 19:13:42 -0700


Copilot commented on code in PR #3244:
URL: https://github.com/apache/brpc/pull/3244#discussion_r2937820083



##########
test/iobuf_unittest.cpp:
##########
@@ -1946,4 +1946,161 @@ TEST_F(IOBufTest, single_iobuf) {
     ASSERT_TRUE(p != nullptr);
 }
 
+// Reproduction test for https://github.com/apache/brpc/issues/3243
+//
+// release_tls_block() does not guard against a block being returned while it 
is
+// already the TLS list head.  The assignment `b->portal_next = block_head`
+// becomes `b->portal_next = b` (self-loop).  Any later traversal of the TLS
+// chain — remove_tls_block_chain() at thread exit, share_tls_block(), etc. —
+// spins forever, silently hanging the thread.
+//
+// The tests below run in a dedicated thread so the main thread's TLS is
+// unaffected and a self-loop can be detected and cleaned up safely.
+
+static void* double_return_release_tls_block_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    // Step 1: acquire a fresh block (removed from TLS or newly created).
+    butil::IOBuf::Block* b = butil::iobuf::acquire_tls_block();
+    if (!b) return NULL;
+
+    // Step 2: return it to TLS — b is now block_head.
+    butil::iobuf::release_tls_block(b);
+
+    // Step 3: return the *same* block again while it is still block_head.
+    // BUG: release_tls_block executes
+    //   b->portal_next = block_head   // == b  →  self-loop!
+    //   block_head = b                // no-op
+    butil::iobuf::release_tls_block(b);
+
+    // Detect the self-loop.
+    butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+    if (head && butil::iobuf::get_portal_next(head) == head) {
+        *self_loop = true;
+        // Break the cycle so thread_atexit(remove_tls_block_chain) won't hang.
+        // acquire_tls_block() sets portal_next = NULL, breaking the loop.
+        butil::iobuf::acquire_tls_block();
+        // block_head is still b (was self-referencing), acquire again to 
drain.
+        butil::iobuf::acquire_tls_block();
+        // TLS is now empty — thread can exit without hanging.
+    } else {
+        butil::iobuf::remove_tls_block_chain();
+    }
+    return NULL;
+}
+
+TEST_F(IOBufTest, release_tls_block_double_return_creates_self_loop) {
+    bool self_loop = false;
+    pthread_t tid;
+    ASSERT_EQ(0, pthread_create(&tid, NULL,
+                                double_return_release_tls_block_thread,
+                                &self_loop));
+    ASSERT_EQ(0, pthread_join(tid, NULL));
+
+    EXPECT_FALSE(self_loop)
+        << "release_tls_block() created a self-loop (b->portal_next == b) "
+           "when the same block was double-returned while already being the "
+           "TLS block_head.  This causes remove_tls_block_chain() to loop "
+           "infinitely at thread exit.  (GitHub issue #3243)";
+}
+
+static void* double_return_release_tls_block_chain_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    // Acquire a block and put it back as TLS head.
+    butil::IOBuf::Block* b = butil::iobuf::acquire_tls_block();
+    if (!b) return NULL;
+    butil::iobuf::release_tls_block(b);
+    // Now: block_head -> b -> NULL
+
+    // Return the same block through release_tls_block_chain.
+    // The chain is: b -> NULL  (b->portal_next was set to NULL by acquire).
+    // Internally: last_b = b, then last_b->portal_next = block_head = b
+    //   → self-loop!
+    b->u.portal_next = NULL; // simulate a single-block chain
+    butil::iobuf::release_tls_block_chain(b);
+
+    butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+    if (head && butil::iobuf::get_portal_next(head) == head) {
+        *self_loop = true;
+        butil::iobuf::acquire_tls_block();
+        butil::iobuf::acquire_tls_block();

Review Comment:
   Same leak issue here: the cycle-breaking calls to `acquire_tls_block()` 
discard the returned blocks without releasing their references, which can leak 
memory when the bug reproduces and trip the test suite's leak checks. Store the 
returned pointers and `dec_ref()` them after draining.
   



##########
src/butil/iobuf.cpp:
##########
@@ -334,18 +334,32 @@ void release_tls_block_chain(IOBuf::Block* b) {
         g_num_hit_tls_threshold.fetch_add(n, butil::memory_order_relaxed);
         return;
     }
+    IOBuf::Block* const old_head = tls_data.block_head;

Review Comment:
   This overlap protection is bypassed when `tls_data.num_blocks >= 
max_blocks_per_thread()`: the function immediately `dec_ref()`s every node in 
the incoming chain. If the incoming chain overlaps with the existing TLS list 
(the class of bug this change is addressing), that branch would `dec_ref()` 
blocks still referenced by TLS, leaving dangling pointers in the TLS free list. 
Overlap detection (or a safe handling strategy) should be applied before the 
threshold branch as well.



##########
test/iobuf_unittest.cpp:
##########
@@ -1946,4 +1946,161 @@ TEST_F(IOBufTest, single_iobuf) {
     ASSERT_TRUE(p != nullptr);
 }
 
+// Reproduction test for https://github.com/apache/brpc/issues/3243
+//
+// release_tls_block() does not guard against a block being returned while it 
is
+// already the TLS list head.  The assignment `b->portal_next = block_head`
+// becomes `b->portal_next = b` (self-loop).  Any later traversal of the TLS
+// chain — remove_tls_block_chain() at thread exit, share_tls_block(), etc. —
+// spins forever, silently hanging the thread.
+//
+// The tests below run in a dedicated thread so the main thread's TLS is
+// unaffected and a self-loop can be detected and cleaned up safely.
+
+static void* double_return_release_tls_block_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    // Step 1: acquire a fresh block (removed from TLS or newly created).
+    butil::IOBuf::Block* b = butil::iobuf::acquire_tls_block();
+    if (!b) return NULL;
+
+    // Step 2: return it to TLS — b is now block_head.
+    butil::iobuf::release_tls_block(b);
+
+    // Step 3: return the *same* block again while it is still block_head.
+    // BUG: release_tls_block executes
+    //   b->portal_next = block_head   // == b  →  self-loop!
+    //   block_head = b                // no-op
+    butil::iobuf::release_tls_block(b);
+
+    // Detect the self-loop.
+    butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+    if (head && butil::iobuf::get_portal_next(head) == head) {
+        *self_loop = true;
+        // Break the cycle so thread_atexit(remove_tls_block_chain) won't hang.
+        // acquire_tls_block() sets portal_next = NULL, breaking the loop.
+        butil::iobuf::acquire_tls_block();
+        // block_head is still b (was self-referencing), acquire again to 
drain.
+        butil::iobuf::acquire_tls_block();
+        // TLS is now empty — thread can exit without hanging.
+    } else {
+        butil::iobuf::remove_tls_block_chain();
+    }
+    return NULL;
+}
+
+TEST_F(IOBufTest, release_tls_block_double_return_creates_self_loop) {
+    bool self_loop = false;
+    pthread_t tid;
+    ASSERT_EQ(0, pthread_create(&tid, NULL,
+                                double_return_release_tls_block_thread,
+                                &self_loop));
+    ASSERT_EQ(0, pthread_join(tid, NULL));
+
+    EXPECT_FALSE(self_loop)
+        << "release_tls_block() created a self-loop (b->portal_next == b) "
+           "when the same block was double-returned while already being the "
+           "TLS block_head.  This causes remove_tls_block_chain() to loop "
+           "infinitely at thread exit.  (GitHub issue #3243)";
+}
+
+static void* double_return_release_tls_block_chain_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    // Acquire a block and put it back as TLS head.
+    butil::IOBuf::Block* b = butil::iobuf::acquire_tls_block();
+    if (!b) return NULL;
+    butil::iobuf::release_tls_block(b);
+    // Now: block_head -> b -> NULL
+
+    // Return the same block through release_tls_block_chain.
+    // The chain is: b -> NULL  (b->portal_next was set to NULL by acquire).
+    // Internally: last_b = b, then last_b->portal_next = block_head = b
+    //   → self-loop!
+    b->u.portal_next = NULL; // simulate a single-block chain
+    butil::iobuf::release_tls_block_chain(b);
+
+    butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+    if (head && butil::iobuf::get_portal_next(head) == head) {
+        *self_loop = true;
+        butil::iobuf::acquire_tls_block();
+        butil::iobuf::acquire_tls_block();
+    } else {
+        butil::iobuf::remove_tls_block_chain();
+    }
+    return NULL;
+}
+
+TEST_F(IOBufTest, release_tls_block_chain_overlap_creates_self_loop) {
+    bool self_loop = false;
+    pthread_t tid;
+    ASSERT_EQ(0, pthread_create(&tid, NULL,
+                                double_return_release_tls_block_chain_thread,
+                                &self_loop));
+    ASSERT_EQ(0, pthread_join(tid, NULL));

Review Comment:
   Same naming mismatch here: the test name implies the self-loop is created, 
but the test asserts that it should not happen. Consider renaming to match the 
expected outcome/regression intent.



##########
test/iobuf_unittest.cpp:
##########
@@ -1946,4 +1946,161 @@ TEST_F(IOBufTest, single_iobuf) {
     ASSERT_TRUE(p != nullptr);
 }
 
+// Reproduction test for https://github.com/apache/brpc/issues/3243
+//
+// release_tls_block() does not guard against a block being returned while it 
is
+// already the TLS list head.  The assignment `b->portal_next = block_head`
+// becomes `b->portal_next = b` (self-loop).  Any later traversal of the TLS
+// chain — remove_tls_block_chain() at thread exit, share_tls_block(), etc. —
+// spins forever, silently hanging the thread.
+//
+// The tests below run in a dedicated thread so the main thread's TLS is
+// unaffected and a self-loop can be detected and cleaned up safely.
+
+static void* double_return_release_tls_block_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    // Step 1: acquire a fresh block (removed from TLS or newly created).
+    butil::IOBuf::Block* b = butil::iobuf::acquire_tls_block();
+    if (!b) return NULL;
+
+    // Step 2: return it to TLS — b is now block_head.
+    butil::iobuf::release_tls_block(b);
+
+    // Step 3: return the *same* block again while it is still block_head.
+    // BUG: release_tls_block executes
+    //   b->portal_next = block_head   // == b  →  self-loop!
+    //   block_head = b                // no-op
+    butil::iobuf::release_tls_block(b);
+
+    // Detect the self-loop.
+    butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+    if (head && butil::iobuf::get_portal_next(head) == head) {
+        *self_loop = true;
+        // Break the cycle so thread_atexit(remove_tls_block_chain) won't hang.
+        // acquire_tls_block() sets portal_next = NULL, breaking the loop.
+        butil::iobuf::acquire_tls_block();
+        // block_head is still b (was self-referencing), acquire again to 
drain.
+        butil::iobuf::acquire_tls_block();
+        // TLS is now empty — thread can exit without hanging.
+    } else {
+        butil::iobuf::remove_tls_block_chain();
+    }
+    return NULL;
+}
+
+TEST_F(IOBufTest, release_tls_block_double_return_creates_self_loop) {
+    bool self_loop = false;
+    pthread_t tid;
+    ASSERT_EQ(0, pthread_create(&tid, NULL,
+                                double_return_release_tls_block_thread,
+                                &self_loop));
+    ASSERT_EQ(0, pthread_join(tid, NULL));
+
+    EXPECT_FALSE(self_loop)
+        << "release_tls_block() created a self-loop (b->portal_next == b) "
+           "when the same block was double-returned while already being the "
+           "TLS block_head.  This causes remove_tls_block_chain() to loop "
+           "infinitely at thread exit.  (GitHub issue #3243)";
+}
+
+static void* double_return_release_tls_block_chain_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    // Acquire a block and put it back as TLS head.
+    butil::IOBuf::Block* b = butil::iobuf::acquire_tls_block();
+    if (!b) return NULL;
+    butil::iobuf::release_tls_block(b);
+    // Now: block_head -> b -> NULL
+
+    // Return the same block through release_tls_block_chain.
+    // The chain is: b -> NULL  (b->portal_next was set to NULL by acquire).
+    // Internally: last_b = b, then last_b->portal_next = block_head = b
+    //   → self-loop!
+    b->u.portal_next = NULL; // simulate a single-block chain
+    butil::iobuf::release_tls_block_chain(b);
+
+    butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+    if (head && butil::iobuf::get_portal_next(head) == head) {
+        *self_loop = true;
+        butil::iobuf::acquire_tls_block();
+        butil::iobuf::acquire_tls_block();
+    } else {
+        butil::iobuf::remove_tls_block_chain();
+    }
+    return NULL;
+}
+
+TEST_F(IOBufTest, release_tls_block_chain_overlap_creates_self_loop) {
+    bool self_loop = false;
+    pthread_t tid;
+    ASSERT_EQ(0, pthread_create(&tid, NULL,
+                                double_return_release_tls_block_chain_thread,
+                                &self_loop));
+    ASSERT_EQ(0, pthread_join(tid, NULL));
+
+    EXPECT_FALSE(self_loop)
+        << "release_tls_block_chain() created a self-loop when the returned "
+           "chain overlaps with the existing TLS block_head.  "
+           "last_b->portal_next = block_head creates a cycle when first_b == "
+           "block_head.  (GitHub issue #3243)";
+}
+
+// Reproduce the self-loop through the IOBufAsZeroCopyOutputStream::BackUp()
+// code path described in the issue.  BackUp() eagerly returns _cur_block to
+// TLS.  If the same block pointer is subsequently released again (e.g. via
+// _release_block()), the double-return triggers the self-loop.
+static void* backup_double_return_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    butil::IOBuf buf;
+    {
+        // _block_size == 0 → uses TLS blocks.
+        butil::IOBufAsZeroCopyOutputStream stream(&buf);
+        void* data = NULL;
+        int size = 0;
+        EXPECT_TRUE(stream.Next(&data, &size));
+
+        // BackUp releases _cur_block to TLS and sets _cur_block = NULL.
+        stream.BackUp(size);
+
+        // At this point the block is the TLS head.
+        butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+        EXPECT_TRUE(head != NULL);
+
+        // Simulate a second release of the same block, which can happen
+        // when the block pointer is obtained from a still-live BlockRef.
+        butil::iobuf::release_tls_block(head);
+
+        butil::IOBuf::Block* new_head = butil::iobuf::get_tls_block_head();
+        if (new_head && butil::iobuf::get_portal_next(new_head) == new_head) {
+            *self_loop = true;
+            butil::iobuf::acquire_tls_block();
+            butil::iobuf::acquire_tls_block();
+        }
+    }
+    if (!*self_loop) {
+        butil::iobuf::remove_tls_block_chain();
+    }
+    return NULL;
+}
+
+TEST_F(IOBufTest, backup_then_double_release_creates_self_loop) {
+    bool self_loop = false;
+    pthread_t tid;
+    ASSERT_EQ(0, pthread_create(&tid, NULL,
+                                backup_double_return_thread, &self_loop));
+    ASSERT_EQ(0, pthread_join(tid, NULL));
+
+    EXPECT_FALSE(self_loop)
+        << "After IOBufAsZeroCopyOutputStream::BackUp() returned a block to "

Review Comment:
   Same naming mismatch here: the test name implies the self-loop is created, 
but it asserts `EXPECT_FALSE(self_loop)`. Consider renaming to reflect the 
expected post-fix behavior (or use a regression-style name tied to #3243).



##########
src/butil/iobuf.cpp:
##########
@@ -334,18 +334,32 @@ void release_tls_block_chain(IOBuf::Block* b) {
         g_num_hit_tls_threshold.fetch_add(n, butil::memory_order_relaxed);
         return;
     }
+    IOBuf::Block* const old_head = tls_data.block_head;
     IOBuf::Block* first_b = b;
     IOBuf::Block* last_b = NULL;
     do {
         ++n;
         CHECK(!b->full());
+        // If any block in the incoming chain is already the TLS block_head,
+        // linking last_b->portal_next = block_head would create a cycle:
+        //   - Single-block chain [B] where B == block_head:
+        //     last_b->portal_next = B, i.e. B->portal_next = B (self-loop)
+        //   - Multi-block chain [A -> B] where A == block_head:

Review Comment:
   The new check only compares incoming nodes against `old_head`. A duplicate 
return of a block that is already in the TLS chain but *not* the head can still 
create a cycle (e.g. TLS: `H -> X -> NULL`, releasing chain `X` would produce 
`H -> X -> H`). If the goal is to prevent hangs in 
`remove_tls_block_chain()`/`share_tls_block()`, the overlap detection needs to 
cover any node already in the TLS list (bounded by `MAX_BLOCKS_PER_THREAD`, so 
scanning the existing chain is feasible).



##########
test/iobuf_unittest.cpp:
##########
@@ -1946,4 +1946,161 @@ TEST_F(IOBufTest, single_iobuf) {
     ASSERT_TRUE(p != nullptr);
 }
 
+// Reproduction test for https://github.com/apache/brpc/issues/3243
+//
+// release_tls_block() does not guard against a block being returned while it 
is
+// already the TLS list head.  The assignment `b->portal_next = block_head`
+// becomes `b->portal_next = b` (self-loop).  Any later traversal of the TLS
+// chain — remove_tls_block_chain() at thread exit, share_tls_block(), etc. —
+// spins forever, silently hanging the thread.
+//
+// The tests below run in a dedicated thread so the main thread's TLS is
+// unaffected and a self-loop can be detected and cleaned up safely.
+
+static void* double_return_release_tls_block_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    // Step 1: acquire a fresh block (removed from TLS or newly created).
+    butil::IOBuf::Block* b = butil::iobuf::acquire_tls_block();
+    if (!b) return NULL;
+
+    // Step 2: return it to TLS — b is now block_head.
+    butil::iobuf::release_tls_block(b);
+
+    // Step 3: return the *same* block again while it is still block_head.
+    // BUG: release_tls_block executes
+    //   b->portal_next = block_head   // == b  →  self-loop!
+    //   block_head = b                // no-op
+    butil::iobuf::release_tls_block(b);
+
+    // Detect the self-loop.
+    butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+    if (head && butil::iobuf::get_portal_next(head) == head) {
+        *self_loop = true;
+        // Break the cycle so thread_atexit(remove_tls_block_chain) won't hang.
+        // acquire_tls_block() sets portal_next = NULL, breaking the loop.
+        butil::iobuf::acquire_tls_block();
+        // block_head is still b (was self-referencing), acquire again to 
drain.
+        butil::iobuf::acquire_tls_block();
+        // TLS is now empty — thread can exit without hanging.

Review Comment:
   In the self-loop cleanup path, `acquire_tls_block()` returns blocks removed 
from TLS ownership, but the returned pointers are discarded without 
`dec_ref()`. If the reproduction triggers (or under debug 
allocator/sanitizers), this can leak blocks and cause unrelated test failures. 
Capture the returned blocks and `dec_ref()` them after breaking the cycle (or 
use an equivalent cleanup that preserves reference counting correctness).
   



##########
test/iobuf_unittest.cpp:
##########
@@ -1946,4 +1946,161 @@ TEST_F(IOBufTest, single_iobuf) {
     ASSERT_TRUE(p != nullptr);
 }
 
+// Reproduction test for https://github.com/apache/brpc/issues/3243
+//
+// release_tls_block() does not guard against a block being returned while it 
is
+// already the TLS list head.  The assignment `b->portal_next = block_head`
+// becomes `b->portal_next = b` (self-loop).  Any later traversal of the TLS
+// chain — remove_tls_block_chain() at thread exit, share_tls_block(), etc. —
+// spins forever, silently hanging the thread.
+//
+// The tests below run in a dedicated thread so the main thread's TLS is
+// unaffected and a self-loop can be detected and cleaned up safely.
+
+static void* double_return_release_tls_block_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    // Step 1: acquire a fresh block (removed from TLS or newly created).
+    butil::IOBuf::Block* b = butil::iobuf::acquire_tls_block();
+    if (!b) return NULL;
+
+    // Step 2: return it to TLS — b is now block_head.
+    butil::iobuf::release_tls_block(b);
+
+    // Step 3: return the *same* block again while it is still block_head.
+    // BUG: release_tls_block executes
+    //   b->portal_next = block_head   // == b  →  self-loop!
+    //   block_head = b                // no-op
+    butil::iobuf::release_tls_block(b);
+
+    // Detect the self-loop.
+    butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+    if (head && butil::iobuf::get_portal_next(head) == head) {
+        *self_loop = true;
+        // Break the cycle so thread_atexit(remove_tls_block_chain) won't hang.
+        // acquire_tls_block() sets portal_next = NULL, breaking the loop.
+        butil::iobuf::acquire_tls_block();
+        // block_head is still b (was self-referencing), acquire again to 
drain.
+        butil::iobuf::acquire_tls_block();
+        // TLS is now empty — thread can exit without hanging.
+    } else {
+        butil::iobuf::remove_tls_block_chain();
+    }
+    return NULL;
+}
+
+TEST_F(IOBufTest, release_tls_block_double_return_creates_self_loop) {
+    bool self_loop = false;
+    pthread_t tid;
+    ASSERT_EQ(0, pthread_create(&tid, NULL,
+                                double_return_release_tls_block_thread,
+                                &self_loop));
+    ASSERT_EQ(0, pthread_join(tid, NULL));

Review Comment:
   The test name says "creates_self_loop" but the assertion expects `self_loop` 
to be false. Renaming to reflect the expected post-fix behavior (e.g. 
"...does_not_create_self_loop" or a "regression_3243" style name) would make 
failures easier to interpret.



##########
src/butil/iobuf_inl.h:
##########
@@ -615,6 +615,14 @@ inline void release_tls_block(IOBuf::Block* b) {
         b->dec_ref();
         // g_num_hit_tls_threshold.fetch_add(1, butil::memory_order_relaxed);
         inc_g_num_hit_tls_threshold();
+    } else if (b == tls_data->block_head) {

Review Comment:
   The duplicate-return guard only checks `b == tls_data->block_head` and it 
runs *after* the `num_blocks >= max_blocks_per_thread()` branch. If the TLS 
cache is already at the soft limit and a block is double-returned (or otherwise 
already present in the TLS list), this path will call `b->dec_ref()` even 
though TLS still holds a reference to `b`, leaving a dangling pointer in the 
TLS list. Consider checking whether `b` is already in the TLS chain (not just 
head) before any `dec_ref()`/insertion logic (the list is bounded by 
`MAX_BLOCKS_PER_THREAD`, so a linear scan is cheap).



##########
test/iobuf_unittest.cpp:
##########
@@ -1946,4 +1946,161 @@ TEST_F(IOBufTest, single_iobuf) {
     ASSERT_TRUE(p != nullptr);
 }
 
+// Reproduction test for https://github.com/apache/brpc/issues/3243
+//
+// release_tls_block() does not guard against a block being returned while it 
is
+// already the TLS list head.  The assignment `b->portal_next = block_head`
+// becomes `b->portal_next = b` (self-loop).  Any later traversal of the TLS
+// chain — remove_tls_block_chain() at thread exit, share_tls_block(), etc. —
+// spins forever, silently hanging the thread.
+//
+// The tests below run in a dedicated thread so the main thread's TLS is
+// unaffected and a self-loop can be detected and cleaned up safely.
+
+static void* double_return_release_tls_block_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    // Step 1: acquire a fresh block (removed from TLS or newly created).
+    butil::IOBuf::Block* b = butil::iobuf::acquire_tls_block();
+    if (!b) return NULL;
+
+    // Step 2: return it to TLS — b is now block_head.
+    butil::iobuf::release_tls_block(b);
+
+    // Step 3: return the *same* block again while it is still block_head.
+    // BUG: release_tls_block executes
+    //   b->portal_next = block_head   // == b  →  self-loop!
+    //   block_head = b                // no-op
+    butil::iobuf::release_tls_block(b);
+
+    // Detect the self-loop.
+    butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+    if (head && butil::iobuf::get_portal_next(head) == head) {
+        *self_loop = true;
+        // Break the cycle so thread_atexit(remove_tls_block_chain) won't hang.
+        // acquire_tls_block() sets portal_next = NULL, breaking the loop.
+        butil::iobuf::acquire_tls_block();
+        // block_head is still b (was self-referencing), acquire again to 
drain.
+        butil::iobuf::acquire_tls_block();
+        // TLS is now empty — thread can exit without hanging.
+    } else {
+        butil::iobuf::remove_tls_block_chain();
+    }
+    return NULL;
+}
+
+TEST_F(IOBufTest, release_tls_block_double_return_creates_self_loop) {
+    bool self_loop = false;
+    pthread_t tid;
+    ASSERT_EQ(0, pthread_create(&tid, NULL,
+                                double_return_release_tls_block_thread,
+                                &self_loop));
+    ASSERT_EQ(0, pthread_join(tid, NULL));
+
+    EXPECT_FALSE(self_loop)
+        << "release_tls_block() created a self-loop (b->portal_next == b) "
+           "when the same block was double-returned while already being the "
+           "TLS block_head.  This causes remove_tls_block_chain() to loop "
+           "infinitely at thread exit.  (GitHub issue #3243)";
+}
+
+static void* double_return_release_tls_block_chain_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    // Acquire a block and put it back as TLS head.
+    butil::IOBuf::Block* b = butil::iobuf::acquire_tls_block();
+    if (!b) return NULL;
+    butil::iobuf::release_tls_block(b);
+    // Now: block_head -> b -> NULL
+
+    // Return the same block through release_tls_block_chain.
+    // The chain is: b -> NULL  (b->portal_next was set to NULL by acquire).
+    // Internally: last_b = b, then last_b->portal_next = block_head = b
+    //   → self-loop!
+    b->u.portal_next = NULL; // simulate a single-block chain
+    butil::iobuf::release_tls_block_chain(b);
+
+    butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+    if (head && butil::iobuf::get_portal_next(head) == head) {
+        *self_loop = true;
+        butil::iobuf::acquire_tls_block();
+        butil::iobuf::acquire_tls_block();
+    } else {
+        butil::iobuf::remove_tls_block_chain();
+    }
+    return NULL;
+}
+
+TEST_F(IOBufTest, release_tls_block_chain_overlap_creates_self_loop) {
+    bool self_loop = false;
+    pthread_t tid;
+    ASSERT_EQ(0, pthread_create(&tid, NULL,
+                                double_return_release_tls_block_chain_thread,
+                                &self_loop));
+    ASSERT_EQ(0, pthread_join(tid, NULL));
+
+    EXPECT_FALSE(self_loop)
+        << "release_tls_block_chain() created a self-loop when the returned "
+           "chain overlaps with the existing TLS block_head.  "
+           "last_b->portal_next = block_head creates a cycle when first_b == "
+           "block_head.  (GitHub issue #3243)";
+}
+
+// Reproduce the self-loop through the IOBufAsZeroCopyOutputStream::BackUp()
+// code path described in the issue.  BackUp() eagerly returns _cur_block to
+// TLS.  If the same block pointer is subsequently released again (e.g. via
+// _release_block()), the double-return triggers the self-loop.
+static void* backup_double_return_thread(void* arg) {
+    bool* self_loop = static_cast<bool*>(arg);
+    butil::iobuf::remove_tls_block_chain();
+
+    butil::IOBuf buf;
+    {
+        // _block_size == 0 → uses TLS blocks.
+        butil::IOBufAsZeroCopyOutputStream stream(&buf);
+        void* data = NULL;
+        int size = 0;
+        EXPECT_TRUE(stream.Next(&data, &size));
+
+        // BackUp releases _cur_block to TLS and sets _cur_block = NULL.
+        stream.BackUp(size);
+
+        // At this point the block is the TLS head.
+        butil::IOBuf::Block* head = butil::iobuf::get_tls_block_head();
+        EXPECT_TRUE(head != NULL);
+
+        // Simulate a second release of the same block, which can happen
+        // when the block pointer is obtained from a still-live BlockRef.
+        butil::iobuf::release_tls_block(head);
+
+        butil::IOBuf::Block* new_head = butil::iobuf::get_tls_block_head();
+        if (new_head && butil::iobuf::get_portal_next(new_head) == new_head) {
+            *self_loop = true;
+            butil::iobuf::acquire_tls_block();
+            butil::iobuf::acquire_tls_block();

Review Comment:
   Same leak issue here: if a cycle is detected, the `acquire_tls_block()` 
calls used to break it drop the returned blocks on the floor without 
`dec_ref()`, which can leak blocks and cause follow-on failures in the suite’s 
leak checks. Capture and release the returned blocks after draining.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Fix IOBuf TLS block pool self-loop caused by double-return (#3243) (brpc)

Reply via email to