eldenmoon commented on code in PR #33298:
URL: https://github.com/apache/doris/pull/33298#discussion_r1559086406


##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -413,10 +413,12 @@ Status Segment::_create_column_readers(const 
SegmentFooterPB& footer) {
     // init by column path
     for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); 
++ordinal) {
         auto& column = _tablet_schema->column(ordinal);
-        if (!column.has_path_info()) {
+        if (!column.has_path_info() && !column.is_variant_type()) {

Review Comment:
   modified



##########
cloud/src/meta-service/meta_service_schema.cpp:
##########
@@ -119,4 +128,239 @@ bool parse_schema_value(const ValueBuf& buf, 
doris::TabletSchemaCloudPB* schema)
     return buf.to_pb(schema);
 }
 
+// Map item to dictionary key, and add key to rowset meta, if it is a new one, 
generate it and increase item id
+// Need to remove dynamic parts from original RowsetMeta's TabletSchema, to 
make fdb schema kv stable
+template<typename ItemPB>
+void process_dictionary(
+    SchemaCloudDictionary& dict,
+    const google::protobuf::Map<int32_t, ItemPB>& item_dict,
+    google::protobuf::RepeatedPtrField<ItemPB>* result,
+    RowsetMetaCloudPB* rowset_meta,
+    const google::protobuf::RepeatedPtrField<ItemPB>& items,
+    const std::function<bool(const ItemPB&)>& filter,
+    const std::function<void(int32_t)>& add_dict_key_fn) {
+    if (items.empty()) {
+        return;
+    }
+    // Use deterministic method to do serialization since structure like
+    // `google::protobuf::Map`'s serialization is unstable
+    auto serialize_fn = [](const ItemPB& item) -> std::string {
+        std::string output;
+        google::protobuf::io::StringOutputStream string_output_stream(&output);
+        google::protobuf::io::CodedOutputStream 
output_stream(&string_output_stream);
+        output_stream.SetSerializationDeterministic(true);
+        item.SerializeToCodedStream(&output_stream);
+        return output;
+    };
+
+    google::protobuf::RepeatedPtrField<ItemPB> none_ext_items;
+    std::unordered_map<std::string, int> reversed_dict;
+    for (const auto& [key, val] : item_dict) {
+        reversed_dict[serialize_fn(val)] = key;
+    }
+
+    for (const auto& item : items) {
+        if (filter(item)) {
+            // Filter none extended items, mainly extended columns and 
extended indexes
+            *none_ext_items.Add() = item;
+            continue;
+        }
+        const std::string serialized_key = serialize_fn(item);
+        auto it = reversed_dict.find(serialized_key);
+        if (it != reversed_dict.end()) {
+            // Add existed dict key to related dict
+            add_dict_key_fn(it->second);
+        } else {
+            // Add new dictionary key-value pair and update 
current_xxx_dict_id.
+            int64_t current_dict_id = 0;
+            if constexpr (std::is_same_v<ItemPB, ColumnPB>) {
+                current_dict_id = dict.current_column_dict_id() + 1;
+                dict.set_current_column_dict_id(current_dict_id);
+                dict.mutable_column_dict()->emplace(current_dict_id, item);
+            }
+            if constexpr (std::is_same_v<ItemPB, doris::TabletIndexPB>) {
+                current_dict_id = dict.current_index_dict_id() + 1;
+                dict.set_current_index_dict_id(current_dict_id);
+                dict.mutable_index_dict()->emplace(current_dict_id, item);
+            }
+            add_dict_key_fn(current_dict_id);
+            reversed_dict[serialized_key] = current_dict_id;
+            // LOG(INFO) << "Add dict key = " << current_dict_id << " dict 
value = " << item.ShortDebugString();
+        }
+    }
+    if (result != nullptr) {
+        result->Swap(&none_ext_items);

Review Comment:
   clear extended columns/indexes to prevent writing them to fdb kv



##########
cloud/src/meta-service/meta_service_schema.h:
##########
@@ -30,4 +30,17 @@ void put_schema_kv(MetaServiceCode& code, std::string& msg, 
Transaction* txn,
 // Return true if parse success
 [[nodiscard]] bool parse_schema_value(const ValueBuf& buf, 
doris::TabletSchemaCloudPB* schema);
 
+// Writes schema dictionary metadata to RowsetMetaCloudPB
+[[nodiscard]] std::pair<MetaServiceCode, std::string> write_schema_dict(

Review Comment:
   OK



##########
cloud/src/meta-service/meta_service.cpp:
##########
@@ -1080,14 +1081,11 @@ void 
MetaServiceImpl::commit_rowset(::google::protobuf::RpcController* controlle
         DCHECK(rowset_meta.tablet_schema().has_schema_version());
         DCHECK_GE(rowset_meta.tablet_schema().schema_version(), 0);
         
rowset_meta.set_schema_version(rowset_meta.tablet_schema().schema_version());
-        std::string schema_key;
-        if (rowset_meta.has_variant_type_in_schema()) {
-            // encodes schema in a seperate kv, since variant schema is 
volatile
-            schema_key = meta_rowset_schema_key(
-                    {instance_id, rowset_meta.tablet_id(), 
rowset_meta.rowset_id_v2()});
-        } else {
-            schema_key = meta_schema_key(
+        std::string schema_key = meta_schema_key(
                     {instance_id, rowset_meta.index_id(), 
rowset_meta.schema_version()});
+        if (rowset_meta.has_variant_type_in_schema()) {
+            std::tie(code, msg) = write_schema_dict(instance_id, txn.get(), 
&rowset_meta); 

Review Comment:
   no, they are in the same transaction, both using txn.get()



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to