github-actions[bot] commented on code in PR #33298:
URL: https://github.com/apache/doris/pull/33298#discussion_r1559180788
##########
cloud/src/meta-service/meta_service_schema.cpp:
##########
@@ -118,5 +130,247 @@ bool parse_schema_value(const ValueBuf& buf,
doris::TabletSchemaCloudPB* schema)
// TODO(plat1ko): Apply decompression based on value version
return buf.to_pb(schema);
}
+/**
+ * Processes dictionary items, mapping them to a dictionary key and adding the
key to rowset meta.
+ * If it's a new item, generates a new key and increments the item ID. This
function is also responsible
+ * for removing dynamic parts from the original RowsetMeta's TabletSchema to
ensure the stability of
+ * FDB schema key-value pairs.
+ *
+ * @param dict The schema cloud dictionary reference, used for storing and
managing schema dictionary data.
+ * @param item_dict A mapping from item unique identifiers to their protobuf
representations, used to find
+ * and process specific item data.
+ * @param result Pointer to the collection of result items. Stores filtered or
transformed items. Can be nullptr
+ * if collecting results is not required.
+ * @param items The collection of items to be processed. These items are
filtered and potentially added to the dictionary.
+ * @param filter A function to determine which items should be processed. If
it returns true, the item is processed.
+ * @param add_dict_key_fn A function to handle the logic when a new item is
added to the dictionary, such as updating metadata.
+ */
+template <typename ItemPB>
+void process_dictionary(SchemaCloudDictionary& dict,
+ const google::protobuf::Map<int32_t, ItemPB>&
item_dict,
+ google::protobuf::RepeatedPtrField<ItemPB>* result,
+ const google::protobuf::RepeatedPtrField<ItemPB>&
items,
+ const std::function<bool(const ItemPB&)>& filter,
+ const std::function<void(int32_t)>& add_dict_key_fn) {
+ if (items.empty()) {
+ return;
+ }
+ // Use deterministic method to do serialization since structure like
+ // `google::protobuf::Map`'s serialization is unstable
+ auto serialize_fn = [](const ItemPB& item) -> std::string {
+ std::string output;
+ google::protobuf::io::StringOutputStream string_output_stream(&output);
+ google::protobuf::io::CodedOutputStream
output_stream(&string_output_stream);
+ output_stream.SetSerializationDeterministic(true);
+ item.SerializeToCodedStream(&output_stream);
+ return output;
+ };
+
+ google::protobuf::RepeatedPtrField<ItemPB> none_ext_items;
+ std::unordered_map<std::string, int> reversed_dict;
+ for (const auto& [key, val] : item_dict) {
+ reversed_dict[serialize_fn(val)] = key;
+ }
+
+ for (const auto& item : items) {
+ if (filter(item)) {
+ // Filter none extended items, mainly extended columns and
extended indexes
+ *none_ext_items.Add() = item;
+ continue;
+ }
+ const std::string serialized_key = serialize_fn(item);
+ auto it = reversed_dict.find(serialized_key);
+ if (it != reversed_dict.end()) {
+ // Add existed dict key to related dict
+ add_dict_key_fn(it->second);
+ } else {
+ // Add new dictionary key-value pair and update
current_xxx_dict_id.
+ int64_t current_dict_id = 0;
+ if constexpr (std::is_same_v<ItemPB, ColumnPB>) {
+ current_dict_id = dict.current_column_dict_id() + 1;
+ dict.set_current_column_dict_id(current_dict_id);
+ dict.mutable_column_dict()->emplace(current_dict_id, item);
+ }
+ if constexpr (std::is_same_v<ItemPB, doris::TabletIndexPB>) {
+ current_dict_id = dict.current_index_dict_id() + 1;
+ dict.set_current_index_dict_id(current_dict_id);
+ dict.mutable_index_dict()->emplace(current_dict_id, item);
+ }
+ add_dict_key_fn(current_dict_id);
+ reversed_dict[serialized_key] = current_dict_id;
+ // LOG(INFO) << "Add dict key = " << current_dict_id << " dict
value = " << item.ShortDebugString();
+ }
+ }
+ // clear extended items to prevent writing them to fdb
+ if (result != nullptr) {
+ result->Swap(&none_ext_items);
+ }
+}
+
+// Writes schema dictionary metadata to RowsetMetaCloudPB.
+// Schema was extended in BE side, we need to reset schema to original
frontend schema and store
+// such restored schema in fdb. And also add extra dict key info to
RowsetMetaCloudPB.
+void write_schema_dict(MetaServiceCode& code, std::string& msg, const
std::string& instance_id,
Review Comment:
warning: function 'write_schema_dict' exceeds recommended size/complexity
thresholds [readability-function-size]
```cpp
void write_schema_dict(
^
```
<details>
<summary>Additional context</summary>
**cloud/src/meta-service/meta_service_schema.cpp:212:** 90 lines including
whitespace and comments (threshold 80)
```cpp
void write_schema_dict(
^
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]