This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 27f7ae258d [Enhancement](load) optimize flush policy to avoid small
segments #12706
27f7ae258d is described below
commit 27f7ae258db3b5e48e32f97020567fbbe580b365
Author: zhannngchen <[email protected]>
AuthorDate: Wed Sep 21 14:33:05 2022 +0800
[Enhancement](load) optimize flush policy to avoid small segments #12706
In current policy, if mem-limit exceeded, load channel will pick tablets
that consume most memory, but mem_consumption contains memory in flush, if some
delta writer flushing a full memtable(default 200MB), the current memtable
might be very small, we should avoid flush such memtable, which can generate a
very small segment.
---
be/src/olap/delta_writer.cpp | 17 ++++++++++++-----
be/src/olap/delta_writer.h | 9 +++++----
be/src/runtime/tablets_channel.cpp | 8 ++++----
3 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index d0c8b249c1..8e1b27f5c0 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -394,13 +394,13 @@ Status DeltaWriter::cancel() {
return Status::OK();
}
-int64_t DeltaWriter::save_mem_consumption_snapshot() {
- _mem_consumption_snapshot = mem_consumption();
- return _mem_consumption_snapshot;
+int64_t DeltaWriter::save_memtable_consumption_snapshot() {
+ _memtable_consumption_snapshot = memtable_consumption();
+ return _memtable_consumption_snapshot;
}
-int64_t DeltaWriter::get_mem_consumption_snapshot() const {
- return _mem_consumption_snapshot;
+int64_t DeltaWriter::get_memtable_consumption_snapshot() const {
+ return _memtable_consumption_snapshot;
}
int64_t DeltaWriter::mem_consumption() const {
@@ -412,6 +412,13 @@ int64_t DeltaWriter::mem_consumption() const {
return _mem_tracker->consumption();
}
+int64_t DeltaWriter::memtable_consumption() const {
+ if (_mem_table == nullptr) {
+ return 0;
+ }
+ return _mem_table->memory_usage();
+}
+
int64_t DeltaWriter::partition_id() const {
return _req.partition_id;
}
diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h
index 21a2d4f318..4ad7df38ed 100644
--- a/be/src/olap/delta_writer.h
+++ b/be/src/olap/delta_writer.h
@@ -104,9 +104,9 @@ public:
int64_t memtable_consumption() const;
- int64_t save_mem_consumption_snapshot();
+ int64_t save_memtable_consumption_snapshot();
- int64_t get_mem_consumption_snapshot() const;
+ int64_t get_memtable_consumption_snapshot() const;
void finish_slave_tablet_pull_rowset(int64_t node_id, bool is_succeed);
@@ -161,8 +161,9 @@ private:
// use in vectorized load
bool _is_vec;
- //only used for std::sort more detail see issue(#9237)
- int64_t _mem_consumption_snapshot = 0;
+ // memory consumption snapshot for current memtable, only
+ // used for std::sort
+ int64_t _memtable_consumption_snapshot = 0;
std::unordered_set<int64_t> _unfinished_slave_node;
PSuccessSlaveTabletNodeIds _success_slave_node_ids;
diff --git a/be/src/runtime/tablets_channel.cpp
b/be/src/runtime/tablets_channel.cpp
index 274512a180..09a912306c 100644
--- a/be/src/runtime/tablets_channel.cpp
+++ b/be/src/runtime/tablets_channel.cpp
@@ -206,11 +206,11 @@ Status TabletsChannel::reduce_mem_usage(int64_t
mem_limit, TabletWriterAddResult
// Sort the DeltaWriters by mem consumption in descend order.
std::vector<DeltaWriter*> writers;
for (auto& it : _tablet_writers) {
- it.second->save_mem_consumption_snapshot();
+ it.second->save_memtable_consumption_snapshot();
writers.push_back(it.second);
}
std::sort(writers.begin(), writers.end(), [](const DeltaWriter* lhs, const
DeltaWriter* rhs) {
- return lhs->get_mem_consumption_snapshot() >
rhs->get_mem_consumption_snapshot();
+ return lhs->get_memtable_consumption_snapshot() >
rhs->get_memtable_consumption_snapshot();
});
// Decide which writes should be flushed to reduce mem consumption.
@@ -228,11 +228,11 @@ Status TabletsChannel::reduce_mem_usage(int64_t
mem_limit, TabletWriterAddResult
int counter = 0;
int64_t sum = 0;
for (auto writer : writers) {
- if (writer->mem_consumption() <= 0) {
+ if (writer->memtable_consumption() <= 0) {
break;
}
++counter;
- sum += writer->mem_consumption();
+ sum += writer->memtable_consumption();
if (sum > mem_to_flushed) {
break;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]