eldenmoon commented on code in PR #56977:
URL: https://github.com/apache/doris/pull/56977#discussion_r2434919243


##########
be/src/vec/columns/column_variant.cpp:
##########
@@ -2503,6 +2477,41 @@ size_t 
ColumnVariant::find_path_lower_bound_in_sparse_data(StringRef path,
     return it.index;
 }
 
+void ColumnVariant::Subcolumn::deserialize_from_sparse_column(const 
ColumnString* value,
+                                                              size_t row) {
+    const auto& data_ref = value->get_data_at(row);
+    const auto* start_data = reinterpret_cast<const uint8_t*>(data_ref.data);
+    const PrimitiveType type =
+            
TabletColumn::get_primitive_type_by_field_type(static_cast<FieldType>(*start_data));

Review Comment:
   PrimitiveType type can be cached to subcolumn, to avoid 
get_primitive_type_by_field_type each time



##########
be/src/vec/columns/column_variant.cpp:
##########
@@ -2503,6 +2477,41 @@ size_t 
ColumnVariant::find_path_lower_bound_in_sparse_data(StringRef path,
     return it.index;
 }
 
+void ColumnVariant::Subcolumn::deserialize_from_sparse_column(const 
ColumnString* value,
+                                                              size_t row) {
+    const auto& data_ref = value->get_data_at(row);
+    const auto* start_data = reinterpret_cast<const uint8_t*>(data_ref.data);
+    const PrimitiveType type =
+            
TabletColumn::get_primitive_type_by_field_type(static_cast<FieldType>(*start_data));
+    auto check_end = [&](const uint8_t* end_ptr) {
+        CHECK_EQ(end_ptr - reinterpret_cast<const uint8_t*>(data_ref.data), 
data_ref.size);
+    };
+
+    bool need_field_deser = type != least_common_type.get_type_id();

Review Comment:
   need_field_deser 名字不太好理解



##########
be/src/vec/data_types/serde/data_type_serde.cpp:
##########
@@ -134,5 +138,230 @@ void DataTypeSerDe::to_string(const IColumn& column, 
size_t row_num, BufferWrita
 const std::string DataTypeSerDe::NULL_IN_COMPLEX_TYPE = "null";
 const std::string DataTypeSerDe::NULL_IN_CSV_FOR_ORDINARY_TYPE = "\\N";
 
+const uint8_t* DataTypeSerDe::deserialize_binary_to_column(const uint8_t* 
data, IColumn& column) {
+    auto& nullable_column = assert_cast<ColumnNullable&, 
TypeCheckOnRelease::DISABLE>(column);
+    const FieldType type = static_cast<FieldType>(*data++);
+    const uint8_t* end = data;
+    switch (type) {
+    case FieldType::OLAP_FIELD_TYPE_STRING: {
+        end = DataTypeStringSerDe::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_TINYINT: {
+        end = DataTypeNumberSerDe<TYPE_TINYINT>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_SMALLINT: {
+        end = DataTypeNumberSerDe<TYPE_SMALLINT>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_INT: {
+        end = DataTypeNumberSerDe<TYPE_INT>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_BIGINT: {
+        end = DataTypeNumberSerDe<TYPE_BIGINT>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_LARGEINT: {
+        end = DataTypeNumberSerDe<TYPE_LARGEINT>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_FLOAT: {
+        end = DataTypeNumberSerDe<TYPE_FLOAT>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_DOUBLE: {
+        end = DataTypeNumberSerDe<TYPE_DOUBLE>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_JSONB: {
+        end = DataTypeJsonbSerDe::deserialize_binary_to_column(data,
+                                                               
nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_ARRAY: {
+        end = DataTypeArraySerDe::deserialize_binary_to_column(data,
+                                                               
nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_IPV4: {
+        end = DataTypeNumberSerDe<TYPE_IPV4>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_IPV6: {
+        end = DataTypeNumberSerDe<TYPE_IPV6>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_DATEV2: {
+        end = DataTypeNumberSerDe<TYPE_DATEV2>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
+        end = 
DataTypeNumberSerDe<TYPE_DATETIMEV2>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL32: {
+        end = 
DataTypeDecimalSerDe<TYPE_DECIMAL32>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL64: {
+        end = 
DataTypeDecimalSerDe<TYPE_DECIMAL64>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: {
+        end = 
DataTypeDecimalSerDe<TYPE_DECIMAL128I>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL256: {
+        end = 
DataTypeDecimalSerDe<TYPE_DECIMAL256>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_BOOL: {
+        end = DataTypeNumberSerDe<TYPE_BOOLEAN>::deserialize_binary_to_column(
+                data, nullable_column.get_nested_column());
+        nullable_column.push_false_to_nullmap(1);
+        break;
+    }
+    case FieldType::OLAP_FIELD_TYPE_NONE: {
+        end = data;
+        nullable_column.insert_default();
+        break;
+    }
+    default:
+        throw doris::Exception(ErrorCode::OUT_OF_BOUND,
+                               "Type ({}) for deserialize_binary_to_column is 
invalid", type);
+    }
+    return end;
+}
+
+const uint8_t* DataTypeSerDe::deserialize_binary_to_field(const uint8_t* data, 
Field& field,

Review Comment:
   直接在Subcolumn缓存serde?是否能避免这一堆switch case
   
   
   



##########
be/src/vec/data_types/serde/data_type_decimal_serde.cpp:
##########
@@ -654,6 +654,65 @@ void 
DataTypeDecimalSerDe<T>::write_one_cell_to_binary(const IColumn& src_column
            data_ref.data, data_ref.size);
 }
 
+template <PrimitiveType T>
+const uint8_t* DataTypeDecimalSerDe<T>::deserialize_binary_to_column(const 
uint8_t* data,
+                                                                     IColumn& 
column) {
+    auto& col = assert_cast<ColumnDecimal<T>&>(column);
+    data += sizeof(uint8_t);
+    data += sizeof(uint8_t);
+    if constexpr (T == TYPE_DECIMAL32) {
+        col.insert_value(unaligned_load<Int32>(data));
+        data += sizeof(Int32);
+    } else if constexpr (T == TYPE_DECIMAL64) {
+        col.insert_value(unaligned_load<Int64>(data));
+        data += sizeof(Int64);
+    } else if constexpr (T == TYPE_DECIMAL128I) {
+        col.insert_value(unaligned_load<Int128>(data));
+        data += sizeof(Int128);
+    } else if constexpr (T == TYPE_DECIMAL256) {
+        col.insert_value(Decimal256(unaligned_load<wide::Int256>(data)));
+        data += sizeof(wide::Int256);
+    } else {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+                               "deserialize_binary_to_column with type " + 
column.get_name());
+    }
+    return data;
+}
+
+template <PrimitiveType T>
+const uint8_t* DataTypeDecimalSerDe<T>::deserialize_binary_to_field(const 
uint8_t* data,
+                                                                    Field& 
field, FieldInfo& info) {
+    const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
+    data += sizeof(uint8_t);
+    const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
+    data += sizeof(uint8_t);
+    info.precision = static_cast<int>(precision);
+    info.scale = static_cast<int>(scale);
+    if constexpr (T == TYPE_DECIMAL32) {
+        Int32 v = unaligned_load<Int32>(data);
+        field = Field::create_field<TYPE_DECIMAL32>(Decimal32(v));
+        data += sizeof(Int32);
+    } else if constexpr (T == TYPE_DECIMAL64) {
+        Int64 v = unaligned_load<Int64>(data);
+        field = Field::create_field<TYPE_DECIMAL64>(Decimal64(v));
+        data += sizeof(Int64);
+    } else if constexpr (T == TYPE_DECIMAL128I) {
+        PackedInt128 pack;

Review Comment:
   add comment for `PackedInt128`



##########
be/src/vec/columns/column_variant.cpp:
##########
@@ -2503,6 +2477,41 @@ size_t 
ColumnVariant::find_path_lower_bound_in_sparse_data(StringRef path,
     return it.index;
 }
 
+void ColumnVariant::Subcolumn::deserialize_from_sparse_column(const 
ColumnString* value,
+                                                              size_t row) {
+    const auto& data_ref = value->get_data_at(row);
+    const auto* start_data = reinterpret_cast<const uint8_t*>(data_ref.data);
+    const PrimitiveType type =
+            
TabletColumn::get_primitive_type_by_field_type(static_cast<FieldType>(*start_data));
+    auto check_end = [&](const uint8_t* end_ptr) {
+        CHECK_EQ(end_ptr - reinterpret_cast<const uint8_t*>(data_ref.data), 
data_ref.size);

Review Comment:
   DCHECK_EQ



##########
be/src/vec/data_types/serde/data_type_array_serde.h:
##########
@@ -117,8 +117,32 @@ class DataTypeArraySerDe : public DataTypeSerDe {
     void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
                                   int64_t row_num) const override;
 
+    static const uint8_t* deserialize_binary_to_column(const uint8_t* data, 
IColumn& column);
+
+    static const uint8_t* deserialize_binary_to_field(const uint8_t* data, 
Field& field,
+                                                      FieldInfo& info);
+
     void to_string(const IColumn& column, size_t row_num, BufferWritable& bw) 
const override;
 
+    //     static std::pair<Field, FieldInfo> deserialize_from_binary(const 
ColumnString* value, size_t row) {

Review Comment:
   remove useless code



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to