This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new f3064a8e767 [Improve](Tablet Schema) Use deterministic way to
serialize protobuf (#30906) (#31947)
f3064a8e767 is described below
commit f3064a8e7670d7be5e705c7837ddfd8904aa5e10
Author: lihangyu <[email protected]>
AuthorDate: Thu Mar 7 20:02:57 2024 +0800
[Improve](Tablet Schema) Use deterministic way to serialize protobuf
(#30906) (#31947)
---
be/src/olap/rowset/rowset_meta.h | 4 ++--
be/src/olap/snapshot_manager.cpp | 4 ++--
be/src/olap/tablet_schema.cpp | 17 +++++++++++++++--
be/src/olap/tablet_schema.h | 24 ++++++++++++++++++++++++
4 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index 8c3bcb5958c..a03433865ec 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -56,7 +56,7 @@ public:
virtual bool init_from_pb(const RowsetMetaPB& rowset_meta_pb) {
if (rowset_meta_pb.has_tablet_schema()) {
_schema = TabletSchemaCache::instance()->insert(
- rowset_meta_pb.tablet_schema().SerializeAsString());
+
TabletSchema::deterministic_string_serialize(rowset_meta_pb.tablet_schema()));
}
// Release ownership of TabletSchemaPB from `rowset_meta_pb` and then
set it back to `rowset_meta_pb`,
// this won't break const semantics of `rowset_meta_pb`, because
`rowset_meta_pb` is not changed
@@ -379,7 +379,7 @@ private:
}
if (rowset_meta_pb.has_tablet_schema()) {
_schema = TabletSchemaCache::instance()->insert(
- rowset_meta_pb.tablet_schema().SerializeAsString());
+
TabletSchema::deterministic_string_serialize(rowset_meta_pb.tablet_schema()));
rowset_meta_pb.clear_tablet_schema();
}
_rowset_meta_pb = rowset_meta_pb;
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index e8135a42025..05e2c771aac 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -166,8 +166,8 @@ Status SnapshotManager::convert_rowset_ids(const
std::string& clone_dir, int64_t
}
new_tablet_meta_pb.set_schema_hash(schema_hash);
TabletSchemaSPtr tablet_schema;
- tablet_schema =
-
TabletSchemaCache::instance()->insert(new_tablet_meta_pb.schema().SerializeAsString());
+ tablet_schema = TabletSchemaCache::instance()->insert(
+
TabletSchema::deterministic_string_serialize(new_tablet_meta_pb.schema()));
std::unordered_map<Version, RowsetMetaPB*, HashOfVersion> rs_version_map;
std::unordered_map<RowsetId, RowsetId, HashOfRowsetId> rowset_id_mapping;
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index fefd291e3f7..8c810b528b9 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -20,12 +20,16 @@
#include <gen_cpp/Descriptors_types.h>
#include <gen_cpp/olap_file.pb.h>
#include <glog/logging.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include <algorithm>
#include <cctype>
// IWYU pragma: no_include <bits/std_abs.h>
#include <cmath> // IWYU pragma: keep
#include <ostream>
+#include <vector>
// IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
#include "common/compiler_util.h" // IWYU pragma: keep
@@ -609,7 +613,7 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
index->add_col_unique_id(col_unique_id);
}
index->set_index_type(_index_type);
- for (auto& kv : _properties) {
+ for (const auto& kv : _properties) {
(*index->mutable_properties())[kv.first] = kv.second;
}
}
@@ -726,7 +730,7 @@ void TabletSchema::copy_from(const TabletSchema&
tablet_schema) {
std::string TabletSchema::to_key() const {
TabletSchemaPB pb;
to_schema_pb(&pb);
- return pb.SerializeAsString();
+ return TabletSchema::deterministic_string_serialize(pb);
}
void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t
version,
@@ -1097,4 +1101,13 @@ bool operator!=(const TabletSchema& a, const
TabletSchema& b) {
return !(a == b);
}
+std::string TabletSchema::deterministic_string_serialize(const TabletSchemaPB&
schema_pb) {
+ std::string output;
+ google::protobuf::io::StringOutputStream string_output_stream(&output);
+ google::protobuf::io::CodedOutputStream
output_stream(&string_output_stream);
+ output_stream.SetSerializationDeterministic(true);
+ schema_pb.SerializeToCodedStream(&output_stream);
+ return output;
+}
+
} // namespace doris
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index abc75fe7a5b..11f87f29387 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -208,6 +208,9 @@ public:
// void create_from_pb(const TabletSchemaPB& schema, TabletSchema*
tablet_schema).
TabletSchema() = default;
void init_from_pb(const TabletSchemaPB& schema);
+ // Notice: Use deterministic way to serialize protobuf,
+ // since serialize Map in protobuf may could lead to un-deterministic by
default
+ static std::string deterministic_string_serialize(const TabletSchemaPB&
schema_pb);
void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const;
void append_column(TabletColumn column, bool is_dropped_column = false);
void append_index(TabletIndex index);
@@ -310,6 +313,27 @@ public:
str += "]";
return str;
}
+
+ // Dump [(name, type, is_nullable), ...]
+ string dump_structure() const {
+ string str = "[";
+ for (auto p : _cols) {
+ if (str.size() > 1) {
+ str += ", ";
+ }
+ str += "(";
+ str += p.name();
+ str += ", ";
+ str += TabletColumn::get_string_by_field_type(p.type());
+ str += ", ";
+ str += "is_nullable:";
+ str += (p.is_nullable() ? "true" : "false");
+ str += ")";
+ }
+ str += "]";
+ return str;
+ }
+
vectorized::Block create_block_by_cids(const std::vector<uint32_t>& cids);
private:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]