This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 27f7ae258d [Enhancement](load) optimize flush policy to avoid small 
segments #12706
27f7ae258d is described below

commit 27f7ae258db3b5e48e32f97020567fbbe580b365
Author: zhannngchen <[email protected]>
AuthorDate: Wed Sep 21 14:33:05 2022 +0800

    [Enhancement](load) optimize flush policy to avoid small segments #12706
    
    In current policy, if mem-limit exceeded, load channel will pick tablets 
that consume most memory, but mem_consumption contains memory in flush, if some 
delta writer flushing a full memtable(default 200MB), the current memtable 
might be very small, we should avoid flush such memtable, which can generate a 
very small segment.
---
 be/src/olap/delta_writer.cpp       | 17 ++++++++++++-----
 be/src/olap/delta_writer.h         |  9 +++++----
 be/src/runtime/tablets_channel.cpp |  8 ++++----
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index d0c8b249c1..8e1b27f5c0 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -394,13 +394,13 @@ Status DeltaWriter::cancel() {
     return Status::OK();
 }
 
-int64_t DeltaWriter::save_mem_consumption_snapshot() {
-    _mem_consumption_snapshot = mem_consumption();
-    return _mem_consumption_snapshot;
+int64_t DeltaWriter::save_memtable_consumption_snapshot() {
+    _memtable_consumption_snapshot = memtable_consumption();
+    return _memtable_consumption_snapshot;
 }
 
-int64_t DeltaWriter::get_mem_consumption_snapshot() const {
-    return _mem_consumption_snapshot;
+int64_t DeltaWriter::get_memtable_consumption_snapshot() const {
+    return _memtable_consumption_snapshot;
 }
 
 int64_t DeltaWriter::mem_consumption() const {
@@ -412,6 +412,13 @@ int64_t DeltaWriter::mem_consumption() const {
     return _mem_tracker->consumption();
 }
 
+int64_t DeltaWriter::memtable_consumption() const {
+    if (_mem_table == nullptr) {
+        return 0;
+    }
+    return _mem_table->memory_usage();
+}
+
 int64_t DeltaWriter::partition_id() const {
     return _req.partition_id;
 }
diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h
index 21a2d4f318..4ad7df38ed 100644
--- a/be/src/olap/delta_writer.h
+++ b/be/src/olap/delta_writer.h
@@ -104,9 +104,9 @@ public:
 
     int64_t memtable_consumption() const;
 
-    int64_t save_mem_consumption_snapshot();
+    int64_t save_memtable_consumption_snapshot();
 
-    int64_t get_mem_consumption_snapshot() const;
+    int64_t get_memtable_consumption_snapshot() const;
 
     void finish_slave_tablet_pull_rowset(int64_t node_id, bool is_succeed);
 
@@ -161,8 +161,9 @@ private:
     // use in vectorized load
     bool _is_vec;
 
-    //only used for std::sort more detail see issue(#9237)
-    int64_t _mem_consumption_snapshot = 0;
+    // memory consumption snapshot for current memtable, only
+    // used for std::sort
+    int64_t _memtable_consumption_snapshot = 0;
 
     std::unordered_set<int64_t> _unfinished_slave_node;
     PSuccessSlaveTabletNodeIds _success_slave_node_ids;
diff --git a/be/src/runtime/tablets_channel.cpp 
b/be/src/runtime/tablets_channel.cpp
index 274512a180..09a912306c 100644
--- a/be/src/runtime/tablets_channel.cpp
+++ b/be/src/runtime/tablets_channel.cpp
@@ -206,11 +206,11 @@ Status TabletsChannel::reduce_mem_usage(int64_t 
mem_limit, TabletWriterAddResult
     // Sort the DeltaWriters by mem consumption in descend order.
     std::vector<DeltaWriter*> writers;
     for (auto& it : _tablet_writers) {
-        it.second->save_mem_consumption_snapshot();
+        it.second->save_memtable_consumption_snapshot();
         writers.push_back(it.second);
     }
     std::sort(writers.begin(), writers.end(), [](const DeltaWriter* lhs, const 
DeltaWriter* rhs) {
-        return lhs->get_mem_consumption_snapshot() > 
rhs->get_mem_consumption_snapshot();
+        return lhs->get_memtable_consumption_snapshot() > 
rhs->get_memtable_consumption_snapshot();
     });
 
     // Decide which writes should be flushed to reduce mem consumption.
@@ -228,11 +228,11 @@ Status TabletsChannel::reduce_mem_usage(int64_t 
mem_limit, TabletWriterAddResult
     int counter = 0;
     int64_t sum = 0;
     for (auto writer : writers) {
-        if (writer->mem_consumption() <= 0) {
+        if (writer->memtable_consumption() <= 0) {
             break;
         }
         ++counter;
-        sum += writer->mem_consumption();
+        sum += writer->memtable_consumption();
         if (sum > mem_to_flushed) {
             break;
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to