This is an automated email from the ASF dual-hosted git repository.
laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 2b45ee08e [KUDU-3412][Tools][Bugfix] Fix core dump of tablet copier
2b45ee08e is described below
commit 2b45ee08ee9cd7870d9122c75c3845efe4bc26c6
Author: xinghuayu007 <[email protected]>
AuthorDate: Mon Oct 24 19:49:45 2022 +0800
[KUDU-3412][Tools][Bugfix] Fix core dump of tablet copier
The function: CopyTablets() of class TabletCopier creates
a thread to obtain copy process periodically. Every 10s,
it will log the tablet id and tablet status. But fake_replica
does not have a tablet id, here will core dump.
Therefore, this patch uses map<string, TabletReplica*>
copying_replicas_by_tablet_id to store the tablet id and
fake_replica.
Change-Id: Iaa2107d3c04c9f3e850d33dbf420ebb62edc98d8
Reviewed-on: http://gerrit.cloudera.org:8080/19163
Reviewed-by: Yingchun Lai <[email protected]>
Tested-by: Yingchun Lai <[email protected]>
---
src/kudu/tools/tool_action_local_replica.cc | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/src/kudu/tools/tool_action_local_replica.cc
b/src/kudu/tools/tool_action_local_replica.cc
index afe405837..8d807261a 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -26,6 +26,7 @@
#include <optional>
#include <set>
#include <string>
+#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
@@ -91,6 +92,7 @@
#include "kudu/util/status.h"
#include "kudu/util/thread.h"
#include "kudu/util/threadpool.h"
+// IWYU pragma: no_include <boost/container/vector.hpp>
namespace kudu {
namespace rpc {
@@ -239,10 +241,10 @@ class TabletCopier {
Status CopyTablets() {
// Prepare to check copy progress.
int total_tablet_count = tablet_ids_to_copy_.size();
- // 'lock' is used for protecting 'copying_replicas', 'failed_tablet_ids'
+ // 'lock' is used for protecting 'copying_replicas_by_tablet_id',
'failed_tablet_ids'
// and 'succeed_tablet_count'.
simple_spinlock lock;
- set<TabletReplica*> copying_replicas;
+ map<string, TabletReplica*> copying_replicas_by_tablet_id;
set<string> failed_tablet_ids;
int succeed_tablet_count = 0;
if (copy_type_ == CopyType::FROM_LOCAL) {
@@ -266,10 +268,10 @@ class TabletCopier {
[&] () {
while (!latch.WaitFor(MonoDelta::FromSeconds(10))) {
std::lock_guard<simple_spinlock> l(lock);
- for (const auto& copying_replica : copying_replicas) {
+ for (const auto& entry : copying_replicas_by_tablet_id) {
LOG(INFO) << Substitute("Tablet $0 copy status: $1",
- copying_replica->tablet_id(),
- copying_replica->last_status());
+ entry.first,
+ entry.second->last_status());
}
}
}, &check_thread));
@@ -297,7 +299,7 @@ class TabletCopier {
{
std::lock_guard<simple_spinlock> l(lock);
LOG(WARNING) << "Start to copy tablet " << tablet_id;
- InsertOrDie(©ing_replicas, fake_replica.get());
+ copying_replicas_by_tablet_id[tablet_id] = fake_replica.get();
}
Status s;
unique_ptr<TabletCopyClient> client;
@@ -327,7 +329,7 @@ class TabletCopier {
succeed_tablet_count++;
LOG(INFO) << Substitute("Tablet $0 copy succeed.", tablet_id);
}
- copying_replicas.erase(fake_replica.get());
+ copying_replicas_by_tablet_id.erase(tablet_id);
LOG(INFO) << Substitute("$0/$1 tablets, $2 bytes copied, include $3
failed tablets.",
succeed_tablet_count +
failed_tablet_ids.size(),