This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new f3064a8e767 [Improve](Tablet Schema) Use deterministic way to 
serialize protobuf (#30906) (#31947)
f3064a8e767 is described below

commit f3064a8e7670d7be5e705c7837ddfd8904aa5e10
Author: lihangyu <[email protected]>
AuthorDate: Thu Mar 7 20:02:57 2024 +0800

    [Improve](Tablet Schema) Use deterministic way to serialize protobuf 
(#30906) (#31947)
---
 be/src/olap/rowset/rowset_meta.h |  4 ++--
 be/src/olap/snapshot_manager.cpp |  4 ++--
 be/src/olap/tablet_schema.cpp    | 17 +++++++++++++++--
 be/src/olap/tablet_schema.h      | 24 ++++++++++++++++++++++++
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index 8c3bcb5958c..a03433865ec 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -56,7 +56,7 @@ public:
     virtual bool init_from_pb(const RowsetMetaPB& rowset_meta_pb) {
         if (rowset_meta_pb.has_tablet_schema()) {
             _schema = TabletSchemaCache::instance()->insert(
-                    rowset_meta_pb.tablet_schema().SerializeAsString());
+                    
TabletSchema::deterministic_string_serialize(rowset_meta_pb.tablet_schema()));
         }
         // Release ownership of TabletSchemaPB from `rowset_meta_pb` and then 
set it back to `rowset_meta_pb`,
         // this won't break const semantics of `rowset_meta_pb`, because 
`rowset_meta_pb` is not changed
@@ -379,7 +379,7 @@ private:
         }
         if (rowset_meta_pb.has_tablet_schema()) {
             _schema = TabletSchemaCache::instance()->insert(
-                    rowset_meta_pb.tablet_schema().SerializeAsString());
+                    
TabletSchema::deterministic_string_serialize(rowset_meta_pb.tablet_schema()));
             rowset_meta_pb.clear_tablet_schema();
         }
         _rowset_meta_pb = rowset_meta_pb;
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index e8135a42025..05e2c771aac 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -166,8 +166,8 @@ Status SnapshotManager::convert_rowset_ids(const 
std::string& clone_dir, int64_t
     }
     new_tablet_meta_pb.set_schema_hash(schema_hash);
     TabletSchemaSPtr tablet_schema;
-    tablet_schema =
-            
TabletSchemaCache::instance()->insert(new_tablet_meta_pb.schema().SerializeAsString());
+    tablet_schema = TabletSchemaCache::instance()->insert(
+            
TabletSchema::deterministic_string_serialize(new_tablet_meta_pb.schema()));
 
     std::unordered_map<Version, RowsetMetaPB*, HashOfVersion> rs_version_map;
     std::unordered_map<RowsetId, RowsetId, HashOfRowsetId> rowset_id_mapping;
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index fefd291e3f7..8c810b528b9 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -20,12 +20,16 @@
 #include <gen_cpp/Descriptors_types.h>
 #include <gen_cpp/olap_file.pb.h>
 #include <glog/logging.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
 
 #include <algorithm>
 #include <cctype>
 // IWYU pragma: no_include <bits/std_abs.h>
 #include <cmath> // IWYU pragma: keep
 #include <ostream>
+#include <vector>
 
 // IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
 #include "common/compiler_util.h" // IWYU pragma: keep
@@ -609,7 +613,7 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
         index->add_col_unique_id(col_unique_id);
     }
     index->set_index_type(_index_type);
-    for (auto& kv : _properties) {
+    for (const auto& kv : _properties) {
         (*index->mutable_properties())[kv.first] = kv.second;
     }
 }
@@ -726,7 +730,7 @@ void TabletSchema::copy_from(const TabletSchema& 
tablet_schema) {
 std::string TabletSchema::to_key() const {
     TabletSchemaPB pb;
     to_schema_pb(&pb);
-    return pb.SerializeAsString();
+    return TabletSchema::deterministic_string_serialize(pb);
 }
 
 void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t 
version,
@@ -1097,4 +1101,13 @@ bool operator!=(const TabletSchema& a, const 
TabletSchema& b) {
     return !(a == b);
 }
 
+std::string TabletSchema::deterministic_string_serialize(const TabletSchemaPB& 
schema_pb) {
+    std::string output;
+    google::protobuf::io::StringOutputStream string_output_stream(&output);
+    google::protobuf::io::CodedOutputStream 
output_stream(&string_output_stream);
+    output_stream.SetSerializationDeterministic(true);
+    schema_pb.SerializeToCodedStream(&output_stream);
+    return output;
+}
+
 } // namespace doris
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index abc75fe7a5b..11f87f29387 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -208,6 +208,9 @@ public:
     // void create_from_pb(const TabletSchemaPB& schema, TabletSchema* 
tablet_schema).
     TabletSchema() = default;
     void init_from_pb(const TabletSchemaPB& schema);
+    // Notice: Use deterministic way to serialize protobuf,
+    // since serialize Map in protobuf may could lead to un-deterministic by 
default
+    static std::string deterministic_string_serialize(const TabletSchemaPB& 
schema_pb);
     void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const;
     void append_column(TabletColumn column, bool is_dropped_column = false);
     void append_index(TabletIndex index);
@@ -310,6 +313,27 @@ public:
         str += "]";
         return str;
     }
+
+    // Dump [(name, type, is_nullable), ...]
+    string dump_structure() const {
+        string str = "[";
+        for (auto p : _cols) {
+            if (str.size() > 1) {
+                str += ", ";
+            }
+            str += "(";
+            str += p.name();
+            str += ", ";
+            str += TabletColumn::get_string_by_field_type(p.type());
+            str += ", ";
+            str += "is_nullable:";
+            str += (p.is_nullable() ? "true" : "false");
+            str += ")";
+        }
+        str += "]";
+        return str;
+    }
+
     vectorized::Block create_block_by_cids(const std::vector<uint32_t>& cids);
 
 private:


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to