github-actions[bot] commented on code in PR #33298:
URL: https://github.com/apache/doris/pull/33298#discussion_r1554811074


##########
cloud/src/meta-service/meta_service_schema.cpp:
##########
@@ -119,4 +128,170 @@ bool parse_schema_value(const ValueBuf& buf, 
doris::TabletSchemaCloudPB* schema)
     return buf.to_pb(schema);
 }
 
+// Map item to dictionary key, and add key to rowset meta, if it is a new one, 
generate it and increase item id
+// Need to remove dynamic parts from original RowsetMeta's TabletSchema, to 
make fdb schema kv stable
+template<typename ItemPB>
+void process_dictionary(
+    SchemaCloudDictionary& dict,
+    const google::protobuf::Map<int32_t, ItemPB>& item_dict,
+    google::protobuf::RepeatedPtrField<ItemPB>* result,
+    RowsetMetaCloudPB* rowset_meta,
+    const google::protobuf::RepeatedPtrField<ItemPB>& items,
+    const std::function<bool(const ItemPB&)>& filter,
+    const std::function<void(int32_t)>& add_dict_key_fn) {
+    if (items.empty()) {
+        return;
+    }
+    // Use deterministic method to do serialization since structure like
+    // `google::protobuf::Map`'s serialization is unstable
+    auto serialize_fn = [](const ItemPB& item) -> std::string {
+        std::string output;
+        google::protobuf::io::StringOutputStream string_output_stream(&output);
+        google::protobuf::io::CodedOutputStream 
output_stream(&string_output_stream);
+        output_stream.SetSerializationDeterministic(true);
+        item.SerializeToCodedStream(&output_stream);
+        return output;
+    };
+
+    google::protobuf::RepeatedPtrField<ItemPB> none_ext_items;
+    std::unordered_map<std::string, int> reversed_dict;
+    for (const auto& [key, val] : item_dict) {
+        reversed_dict[serialize_fn(val)] = key;
+    }
+
+    for (const auto& item : items) {
+        if (filter(item)) {
+            // Filter none extended items, mainly extended columns and 
extended indexes
+            *none_ext_items.Add() = item;
+            continue;
+        }
+        const std::string serialized_key = serialize_fn(item);
+        auto it = reversed_dict.find(serialized_key);
+        if (it != reversed_dict.end()) {
+            // Add existed dict key to related dict
+            add_dict_key_fn(it->second);
+        } else {
+            // Add new dictionary key-value pair and update 
current_xxx_dict_id.
+            int64_t current_dict_id = 0;
+            if constexpr (std::is_same_v<ItemPB, ColumnPB>) {
+                current_dict_id = dict.current_column_dict_id() + 1;
+                dict.set_current_column_dict_id(current_dict_id);
+                dict.mutable_column_dict()->emplace(current_dict_id, item);
+            }
+            if constexpr (std::is_same_v<ItemPB, doris::TabletIndexPB>) {
+                current_dict_id = dict.current_index_dict_id() + 1;
+                dict.set_current_index_dict_id(current_dict_id);
+                dict.mutable_index_dict()->emplace(current_dict_id, item);
+            }
+            add_dict_key_fn(current_dict_id);
+            reversed_dict[serialized_key] = current_dict_id;
+            // LOG(INFO) << "Add dict key = " << current_dict_id << " dict 
value = " << item.ShortDebugString();
+        }
+    }
+    if (result != nullptr) {
+        result->Swap(&none_ext_items);
+    }
+}
+
+// Writes schema dictionary metadata to RowsetMetaCloudPB.
+// Schema was extended in BE side, we need to reset schema to original 
frontend schema and store
+// such restored schema in fdb. And also add extra dict key info to 
RowsetMetaCloudPB.
+std::pair<MetaServiceCode, std::string> write_schema_dict(

Review Comment:
   warning: function 'write_schema_dict' exceeds recommended size/complexity 
thresholds [readability-function-size]
   ```cpp
   std::pair<MetaServiceCode, std::string> write_schema_dict(
                                           ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **cloud/src/meta-service/meta_service_schema.cpp:198:** 93 lines including 
whitespace and comments (threshold 80)
   ```cpp
   std::pair<MetaServiceCode, std::string> write_schema_dict(
                                           ^
   ```
   
   </details>
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to