This is an automated email from the ASF dual-hosted git repository.
sollhui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b640914bf36 [opt](memory) release packed file writer buffer after
flush (#63967)
b640914bf36 is described below
commit b640914bf36ad6e913d71d77eb499d001d56789f
Author: hui lai <[email protected]>
AuthorDate: Tue Jun 2 11:14:17 2026 +0800
[opt](memory) release packed file writer buffer after flush (#63967)
### What problem does this PR solve?
PackedFileWriter buffers data for files smaller than
small_file_threshold_bytes before deciding whether to pack them into a
packed file or switch to direct write. The buffered data is stored in a
std::string. After the buffered data is flushed to the inner writer or
submitted to PackedFileManager, the old code only called clear(), which
resets size but keeps capacity. When segment file writers are still
retained by upper-level rowset structures after close, this retained
capacity can keep a large amount of memory alive and show up under
PackedFileWriter::appendv in memory profiling:
<img width="800" height="1180" alt="image"
src="https://github.com/user-attachments/assets/7e0e2c40-c35b-4bfc-b45b-aeed31c29771"
/>
This change reserves the final append size before buffering to reduce
repeated std::string growth, and releases the buffer capacity after the
data has been flushed or submitted.
---
be/src/io/fs/packed_file_writer.cpp | 9 +++++++--
be/src/io/fs/packed_file_writer.h | 6 ++++++
be/test/io/fs/packed_file_writer_test.cpp | 19 +++++++++++++++++++
3 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/be/src/io/fs/packed_file_writer.cpp
b/be/src/io/fs/packed_file_writer.cpp
index 6a345e0d8a8..45f49e0e03f 100644
--- a/be/src/io/fs/packed_file_writer.cpp
+++ b/be/src/io/fs/packed_file_writer.cpp
@@ -80,6 +80,7 @@ Status PackedFileWriter::appendv(const Slice* data, size_t
data_cnt) {
if (_is_direct_write) {
RETURN_IF_ERROR(_inner_writer->appendv(data, data_cnt));
} else {
+ _buffer.reserve(_bytes_appended + total_size);
// Buffer small file data
for (size_t i = 0; i < data_cnt; ++i) {
_buffer.append(data[i].data, data[i].size);
@@ -181,6 +182,10 @@ Status PackedFileWriter::_wait_packed_upload() {
return Status::OK();
}
+void PackedFileWriter::_release_buffer() {
+ std::string().swap(_buffer);
+}
+
Status PackedFileWriter::_switch_to_direct_write() {
DCHECK(!_is_direct_write);
@@ -188,7 +193,7 @@ Status PackedFileWriter::_switch_to_direct_write() {
if (_buffer.size() > 0) {
Slice buffer_slice(_buffer.data(), _buffer.size());
RETURN_IF_ERROR(_inner_writer->appendv(&buffer_slice, 1));
- _buffer.clear();
+ _release_buffer();
}
return Status::OK();
@@ -213,7 +218,7 @@ Status PackedFileWriter::_send_to_packed_manager() {
Slice data_slice(_buffer.data(), _buffer.size());
RETURN_IF_ERROR(_packed_file_manager->append_small_file(_file_path,
data_slice, _append_info));
- _buffer.clear();
+ _release_buffer();
return Status::OK();
}
diff --git a/be/src/io/fs/packed_file_writer.h
b/be/src/io/fs/packed_file_writer.h
index eaae0a6ed74..d2c06621138 100644
--- a/be/src/io/fs/packed_file_writer.h
+++ b/be/src/io/fs/packed_file_writer.h
@@ -60,7 +60,13 @@ public:
// Returns true if this file's data was written to a packed file (not
direct write)
bool is_in_packed_file() const override { return !_is_direct_write; }
+#ifdef BE_TEST
+ size_t buffer_capacity_for_test() const { return _buffer.capacity(); }
+#endif
+
private:
+ void _release_buffer();
+
// Async close: submit data without waiting
Status _close_async();
diff --git a/be/test/io/fs/packed_file_writer_test.cpp
b/be/test/io/fs/packed_file_writer_test.cpp
index dc8b2b5ec4d..e26a5ec4743 100644
--- a/be/test/io/fs/packed_file_writer_test.cpp
+++ b/be/test/io/fs/packed_file_writer_test.cpp
@@ -172,6 +172,25 @@ TEST_F(PackedFileWriterTest, SwitchToDirectWrite) {
EXPECT_GT(inner_writer_ptr->append_calls(), 0);
}
+TEST_F(PackedFileWriterTest, SwitchToDirectWriteReleasesBufferedMemory) {
+ Path file_path("switch_file_release_buffer");
+ auto* inner_writer_ptr = _inner_writer.get();
+ PackedFileWriter writer(std::move(_inner_writer), file_path, _append_info);
+
+ const size_t default_capacity = std::string().capacity();
+ std::string small_data(80, 'a');
+ Slice small_slice(small_data);
+ ASSERT_TRUE(writer.appendv(&small_slice, 1).ok());
+ EXPECT_GT(writer.buffer_capacity_for_test(), default_capacity);
+
+ std::string large_data(30, 'b');
+ Slice large_slice(large_data);
+ ASSERT_TRUE(writer.appendv(&large_slice, 1).ok());
+
+ EXPECT_GT(inner_writer_ptr->append_calls(), 0);
+ EXPECT_LE(writer.buffer_capacity_for_test(), default_capacity);
+}
+
TEST_F(PackedFileWriterTest, CloseAsync) {
Path file_path("async_file");
PackedFileWriter writer(std::move(_inner_writer), file_path, _append_info);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]