This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit fc2959b597ae91682ae4affbeda705d57ea0be43
Author: Xin Liao <[email protected]>
AuthorDate: Fri Feb 24 23:59:03 2023 +0800

    [fix](load) fix add broken tablet core dump (#17104)
---
 be/src/runtime/tablets_channel.cpp | 16 +++++++++++++---
 be/src/runtime/tablets_channel.h   |  5 +++++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/be/src/runtime/tablets_channel.cpp 
b/be/src/runtime/tablets_channel.cpp
index 36c5430e9f..d780e17eb1 100644
--- a/be/src/runtime/tablets_channel.cpp
+++ b/be/src/runtime/tablets_channel.cpp
@@ -127,7 +127,7 @@ Status TabletsChannel::close(
                 // to make sure tablet writer in `_broken_tablets` won't call 
`close_wait` method.
                 // `close_wait` might create the rowset and commit txn 
directly, and the subsequent
                 // publish version task will success, which can cause the 
replica inconsistency.
-                if (_broken_tablets.find(it.second->tablet_id()) != 
_broken_tablets.end()) {
+                if (_is_broken_tablet(it.second->tablet_id())) {
                     LOG(WARNING) << "SHOULD NOT HAPPEN, tablet writer is 
broken but not cancelled"
                                  << ", tablet_id=" << it.first << ", 
transaction_id=" << _txn_id;
                     continue;
@@ -455,7 +455,7 @@ Status TabletsChannel::add_batch(const 
TabletWriterAddRequest& request,
     std::unordered_map<int64_t /* tablet_id */, std::vector<int> /* row index 
*/> tablet_to_rowidxs;
     for (int i = 0; i < request.tablet_ids_size(); ++i) {
         int64_t tablet_id = request.tablet_ids(i);
-        if (_broken_tablets.find(tablet_id) != _broken_tablets.end()) {
+        if (_is_broken_tablet(tablet_id)) {
             // skip broken tablets
             VLOG_PROGRESS << "skip broken tablet tablet=" << tablet_id;
             continue;
@@ -498,7 +498,7 @@ Status TabletsChannel::add_batch(const 
TabletWriterAddRequest& request,
             error->set_tablet_id(tablet_to_rowidxs_it.first);
             error->set_msg(err_msg);
             tablet_writer_it->second->cancel_with_status(st);
-            _broken_tablets.insert(tablet_to_rowidxs_it.first);
+            _add_broken_tablet(tablet_to_rowidxs_it.first);
             // continue write to other tablet.
             // the error will return back to sender.
         }
@@ -511,6 +511,16 @@ Status TabletsChannel::add_batch(const 
TabletWriterAddRequest& request,
     return Status::OK();
 }
 
+void TabletsChannel::_add_broken_tablet(int64_t tablet_id) {
+    std::unique_lock<std::shared_mutex> wlock(_broken_tablets_lock);
+    _broken_tablets.insert(tablet_id);
+}
+
+bool TabletsChannel::_is_broken_tablet(int64_t tablet_id) {
+    std::shared_lock<std::shared_mutex> rlock(_broken_tablets_lock);
+    return _broken_tablets.find(tablet_id) != _broken_tablets.end();
+}
+
 template Status
 TabletsChannel::add_batch<PTabletWriterAddBatchRequest, 
PTabletWriterAddBatchResult>(
         PTabletWriterAddBatchRequest const&, PTabletWriterAddBatchResult*);
diff --git a/be/src/runtime/tablets_channel.h b/be/src/runtime/tablets_channel.h
index 524a66fb70..c55f7e1b82 100644
--- a/be/src/runtime/tablets_channel.h
+++ b/be/src/runtime/tablets_channel.h
@@ -111,6 +111,9 @@ private:
                      google::protobuf::RepeatedPtrField<PTabletError>* 
tablet_error,
                      PSlaveTabletNodes slave_tablet_nodes, const bool 
write_single_replica);
 
+    void _add_broken_tablet(int64_t tablet_id);
+    bool _is_broken_tablet(int64_t tablet_id);
+
     // id of this load channel
     TabletsChannelKey _key;
 
@@ -158,6 +161,8 @@ private:
     // it will wait on this condition variable.
     std::condition_variable _reduce_memory_cond;
 
+    std::shared_mutex _broken_tablets_lock;
+
     std::unordered_set<int64_t> _partition_ids;
 
     static std::atomic<uint64_t> _s_tablet_writer_count;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to