This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
     new 0f808ede72 [improvement](publish) fix publish timeout in cocurrent 
load (#14646)
0f808ede72 is described below

commit 0f808ede729e9d66538568826350d1ccdfd636f0
Author: yixiutt <[email protected]>
AuthorDate: Mon Nov 28 19:22:57 2022 +0800

    [improvement](publish) fix publish timeout in cocurrent load (#14646)
    
    In concurrent load, some publish timeout happens occasionally. This is
    cause by meta lock hold by other thread so publish add increase rowset
    hang for several seconds.
    StorageEngine::start_delete_unused_rowset will hold gc_mutex and it cost
    a lot of time, so that add_used_rowset wait lock, and compaction 
modify_rowset
    or other tablet method will hold meta_lock and call add_unused_rowset which
    will make meta_lock occupied for too long, finally makes publish timeout.
    
    In this pr, I copy unused_rowsets in lock and delete these rowset without 
lock,
    makes gc_mutex more lightweight so meta lock can be acquired immediately in 
publish thread.
    My test shows that no publish timeout in concurrent stream load.
---
 be/src/common/config.h         |  2 +-
 be/src/olap/storage_engine.cpp | 32 ++++++++++++++++++++------------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index a86afb8c27..68c5e6cd7e 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -193,7 +193,7 @@ CONF_mInt32(default_num_rows_per_column_file_block, "1024");
 // pending data policy
 CONF_mInt32(pending_data_expire_time_sec, "1800");
 // inc_rowset snapshot rs sweep time interval
-CONF_mInt32(tablet_rowset_stale_sweep_time_sec, "1800");
+CONF_mInt32(tablet_rowset_stale_sweep_time_sec, "300");
 // garbage sweep policy
 CONF_Int32(max_garbage_sweep_interval, "3600");
 CONF_Int32(min_garbage_sweep_interval, "180");
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 665a394ede..deaa562a4d 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -883,20 +883,28 @@ void StorageEngine::_parse_default_rowset_type() {
 }
 
 void StorageEngine::start_delete_unused_rowset() {
-    MutexLock lock(&_gc_mutex);
-    for (auto it = _unused_rowsets.begin(); it != _unused_rowsets.end();) {
-        if (it->second.use_count() != 1) {
-            ++it;
-        } else if (it->second->need_delete_file()) {
-            VLOG_NOTICE << "start to remove rowset:" << it->second->rowset_id()
-                        << ", version:" << it->second->version().first << "-"
-                        << it->second->version().second;
-            OLAPStatus status = it->second->remove();
-            VLOG_NOTICE << "remove rowset:" << it->second->rowset_id()
-                        << " finished. status:" << status;
-            it = _unused_rowsets.erase(it);
+    std::unordered_map<std::string, RowsetSharedPtr> unused_rowsets_copy;
+    {
+        MutexLock lock(&_gc_mutex);
+        for (auto it = _unused_rowsets.begin(); it != _unused_rowsets.end();) {
+            if (it->second.use_count() == 1 && it->second->need_delete_file()) 
{
+                unused_rowsets_copy[it->first] = it->second;
+                it = _unused_rowsets.erase(it);
+            } else {
+                ++it;
+            }
         }
     }
+    for (auto it = unused_rowsets_copy.begin(); it != 
unused_rowsets_copy.end(); ++it) {
+        // FIXME(cyx): Currently remote unused rowsets are generated by 
compaction gc,
+        // we cannot remove them directly as other BE may need them.
+        VLOG_NOTICE << "start to remove rowset:" << it->second->rowset_id()
+                    << ", version:" << it->second->version().first << "-"
+                    << it->second->version().second;
+        OLAPStatus status = it->second->remove();
+        VLOG_NOTICE << "remove rowset:" << it->second->rowset_id()
+                    << " finished. status:" << status;
+    }
 }
 
 void StorageEngine::add_unused_rowset(RowsetSharedPtr rowset) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to