This is an automated email from the ASF dual-hosted git repository.

sollhui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b640914bf36 [opt](memory) release packed file writer buffer after 
flush (#63967)
b640914bf36 is described below

commit b640914bf36ad6e913d71d77eb499d001d56789f
Author: hui lai <[email protected]>
AuthorDate: Tue Jun 2 11:14:17 2026 +0800

    [opt](memory) release packed file writer buffer after flush (#63967)
    
    ### What problem does this PR solve?
    
    PackedFileWriter buffers data for files smaller than
    small_file_threshold_bytes before deciding whether to pack them into a
    packed file or switch to direct write. The buffered data is stored in a
    std::string. After the buffered data is flushed to the inner writer or
    submitted to PackedFileManager, the old code only called clear(), which
    resets size but keeps capacity. When segment file writers are still
    retained by upper-level rowset structures after close, this retained
    capacity can keep a large amount of memory alive and show up under
    PackedFileWriter::appendv in memory profiling:
    <img width="800" height="1180" alt="image"
    
src="https://github.com/user-attachments/assets/7e0e2c40-c35b-4bfc-b45b-aeed31c29771";
    />
    
    
    This change reserves the final append size before buffering to reduce
    repeated std::string growth, and releases the buffer capacity after the
    data has been flushed or submitted.
---
 be/src/io/fs/packed_file_writer.cpp       |  9 +++++++--
 be/src/io/fs/packed_file_writer.h         |  6 ++++++
 be/test/io/fs/packed_file_writer_test.cpp | 19 +++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/be/src/io/fs/packed_file_writer.cpp 
b/be/src/io/fs/packed_file_writer.cpp
index 6a345e0d8a8..45f49e0e03f 100644
--- a/be/src/io/fs/packed_file_writer.cpp
+++ b/be/src/io/fs/packed_file_writer.cpp
@@ -80,6 +80,7 @@ Status PackedFileWriter::appendv(const Slice* data, size_t 
data_cnt) {
     if (_is_direct_write) {
         RETURN_IF_ERROR(_inner_writer->appendv(data, data_cnt));
     } else {
+        _buffer.reserve(_bytes_appended + total_size);
         // Buffer small file data
         for (size_t i = 0; i < data_cnt; ++i) {
             _buffer.append(data[i].data, data[i].size);
@@ -181,6 +182,10 @@ Status PackedFileWriter::_wait_packed_upload() {
     return Status::OK();
 }
 
+void PackedFileWriter::_release_buffer() {
+    std::string().swap(_buffer);
+}
+
 Status PackedFileWriter::_switch_to_direct_write() {
     DCHECK(!_is_direct_write);
 
@@ -188,7 +193,7 @@ Status PackedFileWriter::_switch_to_direct_write() {
     if (_buffer.size() > 0) {
         Slice buffer_slice(_buffer.data(), _buffer.size());
         RETURN_IF_ERROR(_inner_writer->appendv(&buffer_slice, 1));
-        _buffer.clear();
+        _release_buffer();
     }
 
     return Status::OK();
@@ -213,7 +218,7 @@ Status PackedFileWriter::_send_to_packed_manager() {
 
     Slice data_slice(_buffer.data(), _buffer.size());
     RETURN_IF_ERROR(_packed_file_manager->append_small_file(_file_path, 
data_slice, _append_info));
-    _buffer.clear();
+    _release_buffer();
     return Status::OK();
 }
 
diff --git a/be/src/io/fs/packed_file_writer.h 
b/be/src/io/fs/packed_file_writer.h
index eaae0a6ed74..d2c06621138 100644
--- a/be/src/io/fs/packed_file_writer.h
+++ b/be/src/io/fs/packed_file_writer.h
@@ -60,7 +60,13 @@ public:
     // Returns true if this file's data was written to a packed file (not 
direct write)
     bool is_in_packed_file() const override { return !_is_direct_write; }
 
+#ifdef BE_TEST
+    size_t buffer_capacity_for_test() const { return _buffer.capacity(); }
+#endif
+
 private:
+    void _release_buffer();
+
     // Async close: submit data without waiting
     Status _close_async();
 
diff --git a/be/test/io/fs/packed_file_writer_test.cpp 
b/be/test/io/fs/packed_file_writer_test.cpp
index dc8b2b5ec4d..e26a5ec4743 100644
--- a/be/test/io/fs/packed_file_writer_test.cpp
+++ b/be/test/io/fs/packed_file_writer_test.cpp
@@ -172,6 +172,25 @@ TEST_F(PackedFileWriterTest, SwitchToDirectWrite) {
     EXPECT_GT(inner_writer_ptr->append_calls(), 0);
 }
 
+TEST_F(PackedFileWriterTest, SwitchToDirectWriteReleasesBufferedMemory) {
+    Path file_path("switch_file_release_buffer");
+    auto* inner_writer_ptr = _inner_writer.get();
+    PackedFileWriter writer(std::move(_inner_writer), file_path, _append_info);
+
+    const size_t default_capacity = std::string().capacity();
+    std::string small_data(80, 'a');
+    Slice small_slice(small_data);
+    ASSERT_TRUE(writer.appendv(&small_slice, 1).ok());
+    EXPECT_GT(writer.buffer_capacity_for_test(), default_capacity);
+
+    std::string large_data(30, 'b');
+    Slice large_slice(large_data);
+    ASSERT_TRUE(writer.appendv(&large_slice, 1).ok());
+
+    EXPECT_GT(inner_writer_ptr->append_calls(), 0);
+    EXPECT_LE(writer.buffer_capacity_for_test(), default_capacity);
+}
+
 TEST_F(PackedFileWriterTest, CloseAsync) {
     Path file_path("async_file");
     PackedFileWriter writer(std::move(_inner_writer), file_path, _append_info);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to