This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 23e6372b25a [Fix](partition) Skip rowset partition id eq 0 smaller
than config wh… (#29363)
23e6372b25a is described below
commit 23e6372b25aa6c24cc69581615af6f600dcf6420
Author: deardeng <[email protected]>
AuthorDate: Fri Jan 5 19:39:47 2024 +0800
[Fix](partition) Skip rowset partition id eq 0 smaller than config wh…
(#29363)
Co-authored-by: Yongqiang YANG
<[email protected]>
---
be/src/common/config.cpp | 3 ++
be/src/common/config.h | 3 ++
be/src/olap/data_dir.cpp | 36 +++++++++++++++++++++---
be/src/olap/rowset/rowset_meta_manager.cpp | 16 ++++++++---
be/src/olap/rowset/rowset_meta_manager.h | 4 +--
be/src/olap/tablet_manager.cpp | 4 +++
be/src/olap/tablet_meta.cpp | 11 ++++++++
be/src/olap/txn_manager.cpp | 17 +++++------
be/test/olap/rowset/rowset_meta_manager_test.cpp | 2 +-
9 files changed, 77 insertions(+), 19 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index a762d0de392..4cd55d96806 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1120,6 +1120,9 @@ DEFINE_mInt32(s3_writer_buffer_allocation_timeout_second,
"60");
DEFINE_mBool(enable_column_type_check, "true");
+// Tolerance for the number of partition id 0 in rowset, default 0
+DEFINE_Int32(ignore_invalid_partition_id_rowset_num, "0");
+
// clang-format off
#ifdef BE_TEST
// test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 34699267002..eed14921baa 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1175,6 +1175,9 @@
DECLARE_mInt32(s3_writer_buffer_allocation_timeout_second);
DECLARE_mBool(enable_column_type_check);
+// Tolerance for the number of partition id 0 in rowset, default 0
+DECLARE_Int32(ignore_invalid_partition_id_rowset_num);
+
#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index 47434bd8226..5517391541f 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -383,6 +383,12 @@ Status DataDir::load() {
if (rowset_meta->is_local()) {
rowset_meta->set_fs(local_fs);
}
+
+ if (rowset_meta->partition_id() == 0) {
+ LOG(WARNING) << "rs tablet=" << rowset_meta->tablet_id() << "
rowset_id=" << rowset_id
+ << " load from meta but partition id eq 0";
+ }
+
dir_rowset_metas.push_back(rowset_meta);
return true;
};
@@ -470,6 +476,19 @@ Status DataDir::load() {
};
TabletMetaManager::traverse_pending_publish(_meta,
load_pending_publish_info_func);
+ int64_t rowset_partition_id_eq_0_num = 0;
+ for (auto rowset_meta : dir_rowset_metas) {
+ if (rowset_meta->partition_id() == 0) {
+ ++rowset_partition_id_eq_0_num;
+ }
+ }
+ if (rowset_partition_id_eq_0_num >
config::ignore_invalid_partition_id_rowset_num) {
+ LOG(FATAL) << fmt::format(
+ "roswet partition id eq 0 bigger than config {}, be exit, plz
check be.INFO",
+ config::ignore_invalid_partition_id_rowset_num);
+ exit(-1);
+ }
+
// traverse rowset
// 1. add committed rowset to txn map
// 2. add visible rowset to tablet
@@ -486,6 +505,13 @@ Status DataDir::load() {
continue;
}
+ if (rowset_meta->partition_id() == 0) {
+ LOG(WARNING) << "skip tablet_id=" << tablet->tablet_id()
+ << " rowset: " << rowset_meta->rowset_id()
+ << " txn: " << rowset_meta->txn_id();
+ continue;
+ }
+
RowsetSharedPtr rowset;
Status create_status = tablet->create_rowset(rowset_meta, &rowset);
if (!create_status) {
@@ -499,8 +525,9 @@ Status DataDir::load() {
rowset_meta->tablet_uid() == tablet->tablet_uid()) {
if (!rowset_meta->tablet_schema()) {
rowset_meta->set_tablet_schema(tablet->tablet_schema());
- RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(),
rowset_meta->rowset_id(),
- rowset_meta->get_rowset_pb());
+ RETURN_IF_ERROR(RowsetMetaManager::save(_meta,
rowset_meta->tablet_uid(),
+
rowset_meta->rowset_id(),
+
rowset_meta->get_rowset_pb(), false));
}
Status commit_txn_status = _txn_manager->commit_txn(
_meta, rowset_meta->partition_id(), rowset_meta->txn_id(),
@@ -527,8 +554,9 @@ Status DataDir::load() {
rowset_meta->tablet_uid() == tablet->tablet_uid()) {
if (!rowset_meta->tablet_schema()) {
rowset_meta->set_tablet_schema(tablet->tablet_schema());
- RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(),
rowset_meta->rowset_id(),
- rowset_meta->get_rowset_pb());
+ RETURN_IF_ERROR(RowsetMetaManager::save(_meta,
rowset_meta->tablet_uid(),
+
rowset_meta->rowset_id(),
+
rowset_meta->get_rowset_pb(), false));
}
Status publish_status = tablet->add_rowset(rowset);
if (!publish_status &&
!publish_status.is<PUSH_VERSION_ALREADY_EXIST>()) {
diff --git a/be/src/olap/rowset/rowset_meta_manager.cpp
b/be/src/olap/rowset/rowset_meta_manager.cpp
index 23682338ff9..f5dc8101ea0 100644
--- a/be/src/olap/rowset/rowset_meta_manager.cpp
+++ b/be/src/olap/rowset/rowset_meta_manager.cpp
@@ -34,6 +34,7 @@
#include "olap/olap_define.h"
#include "olap/olap_meta.h"
#include "olap/utils.h"
+#include "util/debug_points.h"
namespace doris {
namespace {
@@ -98,15 +99,22 @@ Status RowsetMetaManager::save(OlapMeta* meta, TabletUid
tablet_uid, const Rowse
// return Status::InternalError("invaid partition id {} tablet {}",
// rowset_meta_pb.partition_id(), rowset_meta_pb.tablet_id());
}
+ DBUG_EXECUTE_IF("RowsetMetaManager::save::zero_partition_id", {
+ long partition_id = rowset_meta_pb.partition_id();
+ auto& rs_pb =
const_cast<std::decay_t<decltype(rowset_meta_pb)>&>(rowset_meta_pb);
+ rs_pb.set_partition_id(0);
+ LOG(WARNING) << "set debug point
RowsetMetaManager::save::zero_partition_id old="
+ << partition_id << " new=" <<
rowset_meta_pb.DebugString();
+ });
if (enable_binlog) {
return _save_with_binlog(meta, tablet_uid, rowset_id, rowset_meta_pb);
} else {
- return save(meta, tablet_uid, rowset_id, rowset_meta_pb);
+ return _save(meta, tablet_uid, rowset_id, rowset_meta_pb);
}
}
-Status RowsetMetaManager::save(OlapMeta* meta, TabletUid tablet_uid, const
RowsetId& rowset_id,
- const RowsetMetaPB& rowset_meta_pb) {
+Status RowsetMetaManager::_save(OlapMeta* meta, TabletUid tablet_uid, const
RowsetId& rowset_id,
+ const RowsetMetaPB& rowset_meta_pb) {
std::string key =
fmt::format("{}{}_{}", ROWSET_PREFIX, tablet_uid.to_string(),
rowset_id.to_string());
std::string value;
@@ -523,7 +531,7 @@ Status RowsetMetaManager::load_json_rowset_meta(OlapMeta*
meta,
}
RowsetId rowset_id = rowset_meta.rowset_id();
TabletUid tablet_uid = rowset_meta.tablet_uid();
- Status status = save(meta, tablet_uid, rowset_id,
rowset_meta.get_rowset_pb());
+ Status status = save(meta, tablet_uid, rowset_id,
rowset_meta.get_rowset_pb(), false);
return status;
}
diff --git a/be/src/olap/rowset/rowset_meta_manager.h
b/be/src/olap/rowset/rowset_meta_manager.h
index 0c04cb686c5..ddf33aa055a 100644
--- a/be/src/olap/rowset/rowset_meta_manager.h
+++ b/be/src/olap/rowset/rowset_meta_manager.h
@@ -51,8 +51,6 @@ public:
// TODO(Drogon): refactor save && _save_with_binlog to one, adapt to ut
temperately
static Status save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId&
rowset_id,
const RowsetMetaPB& rowset_meta_pb, bool enable_binlog);
- static Status save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId&
rowset_id,
- const RowsetMetaPB& rowset_meta_pb);
static std::vector<std::string> get_binlog_filenames(OlapMeta* meta,
TabletUid tablet_uid,
std::string_view
binlog_version,
@@ -79,6 +77,8 @@ public:
static Status load_json_rowset_meta(OlapMeta* meta, const std::string&
rowset_meta_path);
private:
+ static Status _save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId&
rowset_id,
+ const RowsetMetaPB& rowset_meta_pb);
static Status _save_with_binlog(OlapMeta* meta, TabletUid tablet_uid,
const RowsetId& rowset_id,
const RowsetMetaPB& rowset_meta_pb);
static Status _get_rowset_binlog_metas(OlapMeta* meta, const TabletUid
tablet_uid,
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 7aa5c52534f..f1e8e6185fc 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -844,6 +844,10 @@ Status TabletManager::load_tablet_from_meta(DataDir*
data_dir, TTabletId tablet_
tablet_meta->set_tablet_state(TABLET_RUNNING);
}
+ if (tablet_meta->partition_id() == 0) {
+ LOG(WARNING) << "tablet=" << tablet_id << " load from meta but
partition id eq 0";
+ }
+
TabletSharedPtr tablet = Tablet::create_tablet_from_meta(tablet_meta,
data_dir);
if (tablet == nullptr) {
return Status::Error<TABLE_CREATE_FROM_HEADER_ERROR>(
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index daec22246e0..9d6ea16d6ff 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -38,6 +38,7 @@
#include "olap/olap_define.h"
#include "olap/tablet_meta_manager.h"
#include "olap/utils.h"
+#include "util/debug_points.h"
#include "util/string_util.h"
#include "util/time.h"
#include "util/uid_util.h"
@@ -471,6 +472,16 @@ Status TabletMeta::_save_meta(DataDir* data_dir) {
Status TabletMeta::serialize(string* meta_binary) {
TabletMetaPB tablet_meta_pb;
to_meta_pb(&tablet_meta_pb);
+ if (tablet_meta_pb.partition_id() <= 0) {
+ LOG(WARNING) << "invalid partition id " <<
tablet_meta_pb.partition_id() << " tablet "
+ << tablet_meta_pb.tablet_id();
+ }
+ DBUG_EXECUTE_IF("TabletMeta::serialize::zero_partition_id", {
+ long partition_id = tablet_meta_pb.partition_id();
+ tablet_meta_pb.set_partition_id(0);
+ LOG(WARNING) << "set debug point
TabletMeta::serialize::zero_partition_id old="
+ << partition_id << " new=" <<
tablet_meta_pb.DebugString();
+ });
bool serialize_success = tablet_meta_pb.SerializeToString(meta_binary);
if (!serialize_success) {
LOG(FATAL) << "failed to serialize meta " << full_name();
diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp
index da465b3fbd0..441cf93d4e4 100644
--- a/be/src/olap/txn_manager.cpp
+++ b/be/src/olap/txn_manager.cpp
@@ -290,6 +290,13 @@ Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId
partition_id,
do {
// get tx
std::shared_lock rdlock(_get_txn_map_lock(transaction_id));
+ auto rs_pb = rowset_ptr->rowset_meta()->get_rowset_pb();
+ // TODO(dx): remove log after fix partition id eq 0 bug
+ if (!rs_pb.has_partition_id() || rs_pb.partition_id() == 0) {
+ rowset_ptr->rowset_meta()->set_partition_id(partition_id);
+ LOG(WARNING) << "cant get partition id from rs pb, get from func
arg partition_id="
+ << partition_id;
+ }
txn_tablet_map_t& txn_tablet_map = _get_txn_tablet_map(transaction_id);
auto it = txn_tablet_map.find(key);
if (it == txn_tablet_map.end()) {
@@ -335,15 +342,9 @@ Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId
partition_id,
// save meta need access disk, it maybe very slow, so that it is not in
global txn lock
// it is under a single txn lock
if (!is_recovery) {
- auto rs_pb = rowset_ptr->rowset_meta()->get_rowset_pb();
- // TODO(dx): remove log after fix partition id eq 0 bug
- if (!rs_pb.has_partition_id() || rs_pb.partition_id() == 0) {
- rs_pb.set_partition_id(partition_id);
- LOG(WARNING) << "cant get partition id from rs pb, get from func
arg partition_id="
- << partition_id;
- }
Status save_status =
- RowsetMetaManager::save(meta, tablet_uid,
rowset_ptr->rowset_id(), rs_pb);
+ RowsetMetaManager::save(meta, tablet_uid,
rowset_ptr->rowset_id(),
+
rowset_ptr->rowset_meta()->get_rowset_pb(), false);
DBUG_EXECUTE_IF("TxnManager.RowsetMetaManager.save_wait", {
if (auto wait = dp->param<int>("duration", 0); wait > 0) {
LOG_WARNING("TxnManager.RowsetMetaManager.save_wait").tag("wait ms", wait);
diff --git a/be/test/olap/rowset/rowset_meta_manager_test.cpp
b/be/test/olap/rowset/rowset_meta_manager_test.cpp
index a747d1fa2ca..5875ba424b1 100644
--- a/be/test/olap/rowset/rowset_meta_manager_test.cpp
+++ b/be/test/olap/rowset/rowset_meta_manager_test.cpp
@@ -103,7 +103,7 @@ TEST_F(RowsetMetaManagerTest, TestSaveAndGetAndRemove) {
EXPECT_EQ(rowset_meta.rowset_id(), rowset_id);
RowsetMetaPB rowset_meta_pb;
rowset_meta.to_rowset_pb(&rowset_meta_pb);
- Status status = RowsetMetaManager::save(_meta, _tablet_uid, rowset_id,
rowset_meta_pb);
+ Status status = RowsetMetaManager::save(_meta, _tablet_uid, rowset_id,
rowset_meta_pb, false);
EXPECT_TRUE(status == Status::OK());
EXPECT_TRUE(RowsetMetaManager::check_rowset_meta(_meta, _tablet_uid,
rowset_id));
std::string json_rowset_meta_read;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]