github-actions[bot] commented on code in PR #33298: URL: https://github.com/apache/doris/pull/33298#discussion_r1554811074
########## cloud/src/meta-service/meta_service_schema.cpp: ########## @@ -119,4 +128,170 @@ bool parse_schema_value(const ValueBuf& buf, doris::TabletSchemaCloudPB* schema) return buf.to_pb(schema); } +// Map item to dictionary key, and add key to rowset meta, if it is a new one, generate it and increase item id +// Need to remove dynamic parts from original RowsetMeta's TabletSchema, to make fdb schema kv stable +template<typename ItemPB> +void process_dictionary( + SchemaCloudDictionary& dict, + const google::protobuf::Map<int32_t, ItemPB>& item_dict, + google::protobuf::RepeatedPtrField<ItemPB>* result, + RowsetMetaCloudPB* rowset_meta, + const google::protobuf::RepeatedPtrField<ItemPB>& items, + const std::function<bool(const ItemPB&)>& filter, + const std::function<void(int32_t)>& add_dict_key_fn) { + if (items.empty()) { + return; + } + // Use deterministic method to do serialization since structure like + // `google::protobuf::Map`'s serialization is unstable + auto serialize_fn = [](const ItemPB& item) -> std::string { + std::string output; + google::protobuf::io::StringOutputStream string_output_stream(&output); + google::protobuf::io::CodedOutputStream output_stream(&string_output_stream); + output_stream.SetSerializationDeterministic(true); + item.SerializeToCodedStream(&output_stream); + return output; + }; + + google::protobuf::RepeatedPtrField<ItemPB> none_ext_items; + std::unordered_map<std::string, int> reversed_dict; + for (const auto& [key, val] : item_dict) { + reversed_dict[serialize_fn(val)] = key; + } + + for (const auto& item : items) { + if (filter(item)) { + // Filter none extended items, mainly extended columns and extended indexes + *none_ext_items.Add() = item; + continue; + } + const std::string serialized_key = serialize_fn(item); + auto it = reversed_dict.find(serialized_key); + if (it != reversed_dict.end()) { + // Add existed dict key to related dict + add_dict_key_fn(it->second); + } else { + // Add new dictionary key-value pair and update current_xxx_dict_id. + int64_t current_dict_id = 0; + if constexpr (std::is_same_v<ItemPB, ColumnPB>) { + current_dict_id = dict.current_column_dict_id() + 1; + dict.set_current_column_dict_id(current_dict_id); + dict.mutable_column_dict()->emplace(current_dict_id, item); + } + if constexpr (std::is_same_v<ItemPB, doris::TabletIndexPB>) { + current_dict_id = dict.current_index_dict_id() + 1; + dict.set_current_index_dict_id(current_dict_id); + dict.mutable_index_dict()->emplace(current_dict_id, item); + } + add_dict_key_fn(current_dict_id); + reversed_dict[serialized_key] = current_dict_id; + // LOG(INFO) << "Add dict key = " << current_dict_id << " dict value = " << item.ShortDebugString(); + } + } + if (result != nullptr) { + result->Swap(&none_ext_items); + } +} + +// Writes schema dictionary metadata to RowsetMetaCloudPB. +// Schema was extended in BE side, we need to reset schema to original frontend schema and store +// such restored schema in fdb. And also add extra dict key info to RowsetMetaCloudPB. +std::pair<MetaServiceCode, std::string> write_schema_dict( Review Comment: warning: function 'write_schema_dict' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp std::pair<MetaServiceCode, std::string> write_schema_dict( ^ ``` <details> <summary>Additional context</summary> **cloud/src/meta-service/meta_service_schema.cpp:198:** 93 lines including whitespace and comments (threshold 80) ```cpp std::pair<MetaServiceCode, std::string> write_schema_dict( ^ ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org