This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8a6f0f91c3f [opt](variant) deserialize sparse binary data to variant 
subcolumn (#56977)
8a6f0f91c3f is described below

commit 8a6f0f91c3f2005f979ef7f5ff872c83dabe275a
Author: Sun Chenyang <[email protected]>
AuthorDate: Mon Oct 27 11:35:10 2025 +0800

    [opt](variant) deserialize sparse binary data to variant subcolumn (#56977)
---
 .../variant/hierarchical_data_iterator.cpp         |  13 +-
 be/src/olap/tablet_schema.cpp                      | 115 +++----
 be/src/vec/columns/column_variant.cpp              | 265 +++------------
 be/src/vec/columns/column_variant.h                |  21 +-
 .../vec/data_types/serde/data_type_array_serde.cpp |  40 +++
 .../vec/data_types/serde/data_type_array_serde.h   |   5 +
 .../data_types/serde/data_type_decimal_serde.cpp   |  62 ++++
 .../vec/data_types/serde/data_type_decimal_serde.h |   5 +
 .../vec/data_types/serde/data_type_jsonb_serde.cpp |  20 ++
 .../vec/data_types/serde/data_type_jsonb_serde.h   |   5 +
 .../data_types/serde/data_type_number_serde.cpp    | 111 ++++++
 .../vec/data_types/serde/data_type_number_serde.h  |   6 +
 be/src/vec/data_types/serde/data_type_serde.cpp    | 128 +++++++
 be/src/vec/data_types/serde/data_type_serde.h      |  37 ++
 .../vec/data_types/serde/data_type_string_serde.h  |  19 ++
 be/src/vec/functions/function_variant_element.cpp  |   5 +-
 .../deserialize_from_sparse_column_test.bin        | Bin 0 -> 410 bytes
 be/test/vec/columns/column_variant_test.cpp        | 114 -------
 .../vec/data_types/serde/data_type_serde_test.cpp  | 376 +++++++++++++++++++++
 19 files changed, 914 insertions(+), 433 deletions(-)

diff --git 
a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp 
b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
index b0af87641f6..b44aa627ff2 100644
--- a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
@@ -381,18 +381,16 @@ Status HierarchicalDataIterator::_process_sparse_column(
                         // Case 1: subcolumn already created, append this 
row's value into it.
                         if (auto it = 
subcolumns_from_sparse_column.find(sub_path);
                             it != subcolumns_from_sparse_column.end()) {
-                            const auto& data = 
ColumnVariant::deserialize_from_sparse_column(
-                                    &src_sparse_data_values, 
lower_bound_index);
-                            it->second.insert(data.first, data.second);
+                            
it->second.deserialize_from_sparse_column(&src_sparse_data_values,
+                                                                      
lower_bound_index);
                         }
                         // Case 2: subcolumn not created yet and we still have 
quota → create it and insert.
                         else if (subcolumns_from_sparse_column.size() < count) 
{
                             // Initialize subcolumn with current logical row 
index i to align sizes.
                             ColumnVariant::Subcolumn subcolumn(/*size*/ i, 
/*is_nullable*/ true,
                                                                false);
-                            const auto& data = 
ColumnVariant::deserialize_from_sparse_column(
-                                    &src_sparse_data_values, 
lower_bound_index);
-                            subcolumn.insert(data.first, data.second);
+                            
subcolumn.deserialize_from_sparse_column(&src_sparse_data_values,
+                                                                     
lower_bound_index);
                             subcolumns_from_sparse_column.emplace(sub_path, 
std::move(subcolumn));
                         }
                         // Case 3: quota exhausted → keep the key/value in 
container's sparse column.
@@ -416,9 +414,8 @@ Status HierarchicalDataIterator::_process_sparse_column(
                             //     return Status::InternalError("Failed to add 
subcolumn for sparse column");
                             // }
                         }
-                        const auto& data = 
ColumnVariant::deserialize_from_sparse_column(
+                        
container_variant.get_subcolumn({})->deserialize_from_sparse_column(
                                 &src_sparse_data_values, lower_bound_index);
-                        
container_variant.get_subcolumn({})->insert(data.first, data.second);
                     }
                 }
                 // if root was created, and not seen in sparse data, insert 
default
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index e86ab940f56..bbeb0312dde 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -135,76 +135,51 @@ FieldType 
TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) {
 }
 
 PrimitiveType TabletColumn::get_primitive_type_by_field_type(FieldType type) {
-    switch (type) {
-    case FieldType::OLAP_FIELD_TYPE_UNKNOWN:
-        return PrimitiveType::INVALID_TYPE;
-    case FieldType::OLAP_FIELD_TYPE_NONE:
-        return PrimitiveType::TYPE_NULL;
-    case FieldType::OLAP_FIELD_TYPE_BOOL:
-        return PrimitiveType::TYPE_BOOLEAN;
-    case FieldType::OLAP_FIELD_TYPE_TINYINT:
-        return PrimitiveType::TYPE_TINYINT;
-    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
-        return PrimitiveType::TYPE_SMALLINT;
-    case FieldType::OLAP_FIELD_TYPE_INT:
-        return PrimitiveType::TYPE_INT;
-    case FieldType::OLAP_FIELD_TYPE_BIGINT:
-        return PrimitiveType::TYPE_BIGINT;
-    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
-        return PrimitiveType::TYPE_LARGEINT;
-    case FieldType::OLAP_FIELD_TYPE_FLOAT:
-        return PrimitiveType::TYPE_FLOAT;
-    case FieldType::OLAP_FIELD_TYPE_DOUBLE:
-        return PrimitiveType::TYPE_DOUBLE;
-    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
-        return PrimitiveType::TYPE_VARCHAR;
-    case FieldType::OLAP_FIELD_TYPE_STRING:
-        return PrimitiveType::TYPE_STRING;
-    case FieldType::OLAP_FIELD_TYPE_DATE:
-        return PrimitiveType::TYPE_DATE;
-    case FieldType::OLAP_FIELD_TYPE_DATETIME:
-        return PrimitiveType::TYPE_DATETIME;
-    case FieldType::OLAP_FIELD_TYPE_CHAR:
-        return PrimitiveType::TYPE_CHAR;
-    case FieldType::OLAP_FIELD_TYPE_STRUCT:
-        return PrimitiveType::TYPE_STRUCT;
-    case FieldType::OLAP_FIELD_TYPE_ARRAY:
-        return PrimitiveType::TYPE_ARRAY;
-    case FieldType::OLAP_FIELD_TYPE_MAP:
-        return PrimitiveType::TYPE_MAP;
-    case FieldType::OLAP_FIELD_TYPE_HLL:
-        return PrimitiveType::TYPE_HLL;
-    case FieldType::OLAP_FIELD_TYPE_BITMAP:
-        return PrimitiveType::TYPE_BITMAP;
-    case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE:
-        return PrimitiveType::TYPE_QUANTILE_STATE;
-    case FieldType::OLAP_FIELD_TYPE_DATEV2:
-        return PrimitiveType::TYPE_DATEV2;
-    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
-        return PrimitiveType::TYPE_DATETIMEV2;
-    case FieldType::OLAP_FIELD_TYPE_TIMEV2:
-        return PrimitiveType::TYPE_TIMEV2;
-    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
-        return PrimitiveType::TYPE_DECIMAL32;
-    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
-        return PrimitiveType::TYPE_DECIMAL64;
-    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
-        return PrimitiveType::TYPE_DECIMAL128I;
-    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
-        return PrimitiveType::TYPE_DECIMAL256;
-    case FieldType::OLAP_FIELD_TYPE_IPV4:
-        return PrimitiveType::TYPE_IPV4;
-    case FieldType::OLAP_FIELD_TYPE_IPV6:
-        return PrimitiveType::TYPE_IPV6;
-    case FieldType::OLAP_FIELD_TYPE_JSONB:
-        return PrimitiveType::TYPE_JSONB;
-    case FieldType::OLAP_FIELD_TYPE_VARIANT:
-        return PrimitiveType::TYPE_VARIANT;
-    case FieldType::OLAP_FIELD_TYPE_AGG_STATE:
-        return PrimitiveType::TYPE_AGG_STATE;
-    default:
-        return PrimitiveType::INVALID_TYPE;
-    }
+    static const PrimitiveType mapping[] = {
+            /*  0 */ PrimitiveType::INVALID_TYPE,
+            /*  1 OLAP_FIELD_TYPE_TINYINT           */ 
PrimitiveType::TYPE_TINYINT,
+            /*  2 OLAP_FIELD_TYPE_UNSIGNED_TINYINT  */ 
PrimitiveType::INVALID_TYPE,
+            /*  3 OLAP_FIELD_TYPE_SMALLINT          */ 
PrimitiveType::TYPE_SMALLINT,
+            /*  4 OLAP_FIELD_TYPE_UNSIGNED_SMALLINT */ 
PrimitiveType::INVALID_TYPE,
+            /*  5 OLAP_FIELD_TYPE_INT               */ PrimitiveType::TYPE_INT,
+            /*  6 OLAP_FIELD_TYPE_UNSIGNED_INT      */ 
PrimitiveType::INVALID_TYPE,
+            /*  7 OLAP_FIELD_TYPE_BIGINT            */ 
PrimitiveType::TYPE_BIGINT,
+            /*  8 OLAP_FIELD_TYPE_UNSIGNED_BIGINT   */ 
PrimitiveType::INVALID_TYPE,
+            /*  9 OLAP_FIELD_TYPE_LARGEINT          */ 
PrimitiveType::TYPE_LARGEINT,
+            /* 10 OLAP_FIELD_TYPE_FLOAT             */ 
PrimitiveType::TYPE_FLOAT,
+            /* 11 OLAP_FIELD_TYPE_DOUBLE            */ 
PrimitiveType::TYPE_DOUBLE,
+            /* 12 OLAP_FIELD_TYPE_DISCRETE_DOUBLE   */ 
PrimitiveType::INVALID_TYPE,
+            /* 13 OLAP_FIELD_TYPE_CHAR              */ 
PrimitiveType::TYPE_CHAR,
+            /* 14 OLAP_FIELD_TYPE_DATE              */ 
PrimitiveType::TYPE_DATE,
+            /* 15 OLAP_FIELD_TYPE_DATETIME          */ 
PrimitiveType::TYPE_DATETIME,
+            /* 16 OLAP_FIELD_TYPE_DECIMAL           */ 
PrimitiveType::INVALID_TYPE,
+            /* 17 OLAP_FIELD_TYPE_VARCHAR           */ 
PrimitiveType::TYPE_VARCHAR,
+            /* 18 OLAP_FIELD_TYPE_STRUCT            */ 
PrimitiveType::TYPE_STRUCT,
+            /* 19 OLAP_FIELD_TYPE_ARRAY             */ 
PrimitiveType::TYPE_ARRAY,
+            /* 20 OLAP_FIELD_TYPE_MAP               */ PrimitiveType::TYPE_MAP,
+            /* 21 OLAP_FIELD_TYPE_UNKNOWN           */ 
PrimitiveType::INVALID_TYPE,
+            /* 22 OLAP_FIELD_TYPE_NONE              */ 
PrimitiveType::TYPE_NULL,
+            /* 23 OLAP_FIELD_TYPE_HLL               */ PrimitiveType::TYPE_HLL,
+            /* 24 OLAP_FIELD_TYPE_BOOL              */ 
PrimitiveType::TYPE_BOOLEAN,
+            /* 25 OLAP_FIELD_TYPE_BITMAP            */ 
PrimitiveType::TYPE_BITMAP,
+            /* 26 OLAP_FIELD_TYPE_STRING            */ 
PrimitiveType::TYPE_STRING,
+            /* 27 OLAP_FIELD_TYPE_QUANTILE_STATE    */ 
PrimitiveType::TYPE_QUANTILE_STATE,
+            /* 28 OLAP_FIELD_TYPE_DATEV2            */ 
PrimitiveType::TYPE_DATEV2,
+            /* 29 OLAP_FIELD_TYPE_DATETIMEV2        */ 
PrimitiveType::TYPE_DATETIMEV2,
+            /* 30 OLAP_FIELD_TYPE_TIMEV2            */ 
PrimitiveType::TYPE_TIMEV2,
+            /* 31 OLAP_FIELD_TYPE_DECIMAL32         */ 
PrimitiveType::TYPE_DECIMAL32,
+            /* 32 OLAP_FIELD_TYPE_DECIMAL64         */ 
PrimitiveType::TYPE_DECIMAL64,
+            /* 33 OLAP_FIELD_TYPE_DECIMAL128I       */ 
PrimitiveType::TYPE_DECIMAL128I,
+            /* 34 OLAP_FIELD_TYPE_JSONB             */ 
PrimitiveType::TYPE_JSONB,
+            /* 35 OLAP_FIELD_TYPE_VARIANT           */ 
PrimitiveType::TYPE_VARIANT,
+            /* 36 OLAP_FIELD_TYPE_AGG_STATE         */ 
PrimitiveType::TYPE_AGG_STATE,
+            /* 37 OLAP_FIELD_TYPE_DECIMAL256        */ 
PrimitiveType::TYPE_DECIMAL256,
+            /* 38 OLAP_FIELD_TYPE_IPV4              */ 
PrimitiveType::TYPE_IPV4,
+            /* 39 OLAP_FIELD_TYPE_IPV6              */ 
PrimitiveType::TYPE_IPV6,
+    };
+
+    int idx = static_cast<int>(type);
+    return mapping[idx];
 }
 
 FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
diff --git a/be/src/vec/columns/column_variant.cpp 
b/be/src/vec/columns/column_variant.cpp
index 6f7f43da9cd..a8af33605e0 100644
--- a/be/src/vec/columns/column_variant.cpp
+++ b/be/src/vec/columns/column_variant.cpp
@@ -851,217 +851,14 @@ void 
ColumnVariant::Subcolumn::serialize_to_sparse_column(ColumnString* key, std
                            "Index ({}) for serialize to sparse column is out 
of range", row);
 }
 
-struct PackedUInt128 {
-    // PackedInt128() : value(0) {}
-    PackedUInt128() = default;
-
-    PackedUInt128(const unsigned __int128& value_) { value = value_; }
-    PackedUInt128& operator=(const unsigned __int128& value_) {
-        value = value_;
-        return *this;
-    }
-    PackedUInt128& operator=(const PackedUInt128& rhs) = default;
-
-    uint128_t value;
-} __attribute__((packed));
-
-const NO_SANITIZE_UNDEFINED char* parse_binary_from_sparse_column(FieldType 
type, const char* data,
-                                                                  Field& res, 
FieldInfo& info_res) {
-    info_res.scalar_type_id = 
TabletColumn::get_primitive_type_by_field_type(type);
-    const char* end = data;
-    switch (type) {
-    case FieldType::OLAP_FIELD_TYPE_STRING: {
-        size_t size = unaligned_load<size_t>(data);
-        data += sizeof(size_t);
-        res = Field::create_field<TYPE_STRING>(String(data, size));
-        end = data + size;
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_TINYINT: {
-        Int8 v = unaligned_load<Int8>(data);
-        res = Field::create_field<TYPE_TINYINT>(v);
-        end = data + sizeof(Int8);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_SMALLINT: {
-        Int16 v = unaligned_load<Int16>(data);
-        res = Field::create_field<TYPE_SMALLINT>(v);
-        end = data + sizeof(Int16);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_INT: {
-        Int32 v = unaligned_load<Int32>(data);
-        res = Field::create_field<TYPE_INT>(v);
-        end = data + sizeof(Int32);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_BIGINT: {
-        Int64 v = unaligned_load<Int64>(data);
-        res = Field::create_field<TYPE_BIGINT>(v);
-        end = data + sizeof(Int64);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_LARGEINT: {
-        PackedInt128 pack;
-        memcpy(&pack, data, sizeof(PackedInt128));
-        res = Field::create_field<TYPE_LARGEINT>(Int128(pack.value));
-        end = data + sizeof(PackedInt128);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_FLOAT: {
-        Float32 v = unaligned_load<Float32>(data);
-        res = Field::create_field<TYPE_FLOAT>(v);
-        end = data + sizeof(Float32);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_DOUBLE: {
-        Float64 v = unaligned_load<Float64>(data);
-        res = Field::create_field<TYPE_DOUBLE>(v);
-        end = data + sizeof(Float64);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_JSONB: {
-        size_t size = unaligned_load<size_t>(data);
-        data += sizeof(size_t);
-        res = Field::create_field<TYPE_JSONB>(JsonbField(data, size));
-        end = data + size;
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_ARRAY: {
-        const size_t size = unaligned_load<size_t>(data);
-        data += sizeof(size_t);
-        res = Field::create_field<TYPE_ARRAY>(Array(size));
-        auto& array = res.get<Array>();
-        info_res.num_dimensions++;
-        FieldType nested_filed_type = FieldType::OLAP_FIELD_TYPE_NONE;
-        for (size_t i = 0; i < size; ++i) {
-            Field nested_field;
-            const auto nested_type =
-                    static_cast<FieldType>(*reinterpret_cast<const 
uint8_t*>(data++));
-            data = parse_binary_from_sparse_column(nested_type, data, 
nested_field, info_res);
-            array[i] = std::move(nested_field);
-            if (nested_type != FieldType::OLAP_FIELD_TYPE_NONE) {
-                nested_filed_type = nested_type;
-            }
-        }
-        info_res.scalar_type_id = 
TabletColumn::get_primitive_type_by_field_type(nested_filed_type);
-        end = data;
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_IPV4: {
-        IPv4 v = unaligned_load<IPv4>(data);
-        res = Field::create_field<TYPE_IPV4>(v);
-        end = data + sizeof(IPv4);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_IPV6: {
-        PackedUInt128 pack;
-        memcpy(&pack, data, sizeof(PackedUInt128));
-        auto v = pack.value;
-        res = Field::create_field<TYPE_IPV6>(v);
-        end = data + sizeof(PackedUInt128);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_DATEV2: {
-        UInt32 v = unaligned_load<UInt32>(data);
-        res = Field::create_field<TYPE_DATEV2>(v);
-        end = data + sizeof(UInt32);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
-        const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
-        data += sizeof(uint8_t);
-        UInt64 v = unaligned_load<UInt64>(data);
-        res = Field::create_field<TYPE_DATETIMEV2>(v);
-        info_res.precision = -1;
-        info_res.scale = static_cast<int>(scale);
-        end = data + sizeof(UInt64);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_DECIMAL32: {
-        const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
-        data += sizeof(uint8_t);
-        const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
-        data += sizeof(uint8_t);
-        Int32 v = unaligned_load<Int32>(data);
-        res = Field::create_field<TYPE_DECIMAL32>(Decimal32(v));
-        info_res.precision = static_cast<int>(precision);
-        info_res.scale = static_cast<int>(scale);
-        end = data + sizeof(Int32);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_DECIMAL64: {
-        const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
-        data += sizeof(uint8_t);
-        const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
-        data += sizeof(uint8_t);
-        Int64 v = unaligned_load<Int64>(data);
-        res = Field::create_field<TYPE_DECIMAL64>(Decimal64(v));
-        info_res.precision = static_cast<int>(precision);
-        info_res.scale = static_cast<int>(scale);
-        end = data + sizeof(Int64);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: {
-        const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
-        data += sizeof(uint8_t);
-        const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
-        data += sizeof(uint8_t);
-        PackedInt128 pack;
-        memcpy(&pack, data, sizeof(PackedInt128));
-        res = Field::create_field<TYPE_DECIMAL128I>(Decimal128V3(pack.value));
-        info_res.precision = static_cast<int>(precision);
-        info_res.scale = static_cast<int>(scale);
-        end = data + sizeof(PackedInt128);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_DECIMAL256: {
-        const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
-        data += sizeof(uint8_t);
-        const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
-        data += sizeof(uint8_t);
-        wide::Int256 v;
-        memcpy(&v, data, sizeof(wide::Int256));
-        res = Field::create_field<TYPE_DECIMAL256>(Decimal256(v));
-        info_res.precision = static_cast<int>(precision);
-        info_res.scale = static_cast<int>(scale);
-        end = data + sizeof(wide::Int256);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_BOOL: {
-        res = Field::create_field<TYPE_BOOLEAN>(*reinterpret_cast<const 
uint8_t*>(data));
-        end = data + sizeof(uint8_t);
-        break;
-    }
-    case FieldType::OLAP_FIELD_TYPE_NONE: {
-        res = Field();
-        end = data;
-        break;
-    }
-    default:
-        throw doris::Exception(ErrorCode::OUT_OF_BOUND,
-                               "Type ({}) for deserialize_from_sparse_column 
is invalid", type);
-    }
-    return end;
-}
-
 std::pair<Field, FieldInfo> 
ColumnVariant::deserialize_from_sparse_column(const ColumnString* value,
                                                                           
size_t row) {
     const auto& data_ref = value->get_data_at(row);
-    const char* data = data_ref.data;
-    DCHECK(data_ref.size > 1);
-    const FieldType type = static_cast<FieldType>(*reinterpret_cast<const 
uint8_t*>(data++));
+    const auto* start_data = reinterpret_cast<const uint8_t*>(data_ref.data);
     Field res;
-    FieldInfo info_res = {
-            .scalar_type_id = 
TabletColumn::get_primitive_type_by_field_type(type),
-            .have_nulls = false,
-            .need_convert = false,
-            .num_dimensions = 0,
-    };
-    const char* end = parse_binary_from_sparse_column(type, data, res, 
info_res);
-    DCHECK_EQ(end - data_ref.data, data_ref.size)
-            << "FieldType: " << (int)type << " data_ref.size: " << 
data_ref.size << " end: " << end
-            << " data: " << data;
+    FieldInfo info_res;
+    const uint8_t* end = 
DataTypeSerDe::deserialize_binary_to_field(start_data, res, info_res);
+    CHECK_EQ(end - start_data, data_ref.size);
     return {std::move(res), std::move(info_res)};
 }
 
@@ -1296,9 +1093,7 @@ void 
ColumnVariant::insert_from_sparse_column_and_fill_remaing_dense_column(
             const PathInData column_path(src_sparse_path);
             if (auto* subcolumn = get_subcolumn(column_path); subcolumn != 
nullptr) {
                 // Deserialize binary value into subcolumn from src serialized 
sparse column data.
-                const auto& data =
-                        
ColumnVariant::deserialize_from_sparse_column(src_sparse_column_values, i);
-                subcolumn->insert(data.first, data.second);
+                
subcolumn->deserialize_from_sparse_column(src_sparse_column_values, i);
             } else {
                 // Before inserting this path into sparse column check if we 
need to
                 // insert subcolumns from 
sorted_src_subcolumn_for_sparse_column before.
@@ -1811,9 +1606,8 @@ void 
ColumnVariant::serialize_one_row_to_json_format(int64_t row_num, BufferWrit
         } else {
             // To serialize value stored in shared data we should first 
deserialize it from binary format.
             Subcolumn tmp_subcolumn(0, true);
-            const auto& data = ColumnVariant::deserialize_from_sparse_column(
-                    sparse_data_values, index_in_sparse_data_values++);
-            tmp_subcolumn.insert(data.first, data.second);
+            tmp_subcolumn.deserialize_from_sparse_column(sparse_data_values,
+                                                         
index_in_sparse_data_values++);
             DataTypeSerDe::FormatOptions options;
             options.escape_char = '\\';
             tmp_subcolumn.serialize_text_json(0, output, options);
@@ -2508,6 +2302,44 @@ size_t 
ColumnVariant::find_path_lower_bound_in_sparse_data(StringRef path,
     return it.index;
 }
 
+void ColumnVariant::Subcolumn::deserialize_from_sparse_column(const 
ColumnString* value,
+                                                              size_t row) {
+    const auto& data_ref = value->get_data_at(row);
+    const auto* start_data = reinterpret_cast<const uint8_t*>(data_ref.data);
+    const PrimitiveType type =
+            
TabletColumn::get_primitive_type_by_field_type(static_cast<FieldType>(*start_data));
+    auto check_end = [&](const uint8_t* end_ptr) {
+        DCHECK_EQ(end_ptr - reinterpret_cast<const uint8_t*>(data_ref.data), 
data_ref.size);
+    };
+
+    // check if the type is same as least common type
+    // if the type is same as least common type, we can directly deserialize 
to the subcolumn
+    // if not, we need to deserialize to the field first, then insert to the 
subcolumn
+    bool same_as_least_common_type = type != least_common_type.get_type_id();
+
+    // array needs to check nested type is same as least common type's nested 
type
+    if (!same_as_least_common_type && type == PrimitiveType::TYPE_ARRAY) {
+        const auto* nested_start_data = start_data + 1;
+        const PrimitiveType nested_type = 
TabletColumn::get_primitive_type_by_field_type(
+                static_cast<FieldType>(*nested_start_data));
+        same_as_least_common_type = (nested_type != 
least_common_type.get_base_type_id());
+    }
+
+    if (same_as_least_common_type) {
+        Field res;
+        FieldInfo info;
+        const uint8_t* end_data = 
DataTypeSerDe::deserialize_binary_to_field(start_data, res, info);
+        check_end(end_data);
+        insert(std::move(res), std::move(info));
+    } else {
+        CHECK(data.size() > 0);
+        const uint8_t* end_data =
+                DataTypeSerDe::deserialize_binary_to_column(start_data, 
*data.back());
+        check_end(end_data);
+        ++num_rows;
+    }
+}
+
 void ColumnVariant::fill_path_column_from_sparse_data(Subcolumn& subcolumn, 
NullMap* null_map,
                                                       StringRef path,
                                                       const ColumnPtr& 
sparse_data_column,
@@ -2535,12 +2367,7 @@ void 
ColumnVariant::fill_path_column_from_sparse_data(Subcolumn& subcolumn, Null
         bool is_null = false;
         if (lower_bound_path_index != paths_end &&
             sparse_data_paths.get_data_at(lower_bound_path_index) == path) {
-            // auto value_data = 
sparse_data_values.get_data_at(lower_bound_path_index);
-            // ReadBufferFromMemory buf(value_data.data, value_data.size);
-            // dynamic_serialization->deserializeBinary(path_column, buf, 
getFormatSettings());
-            const auto& data = ColumnVariant::deserialize_from_sparse_column(
-                    &sparse_data_values, lower_bound_path_index);
-            subcolumn.insert(data.first, data.second);
+            subcolumn.deserialize_from_sparse_column(&sparse_data_values, 
lower_bound_path_index);
             is_null = false;
         } else {
             subcolumn.insert_default();
diff --git a/be/src/vec/columns/column_variant.h 
b/be/src/vec/columns/column_variant.h
index 74258fb0b15..9c38d5441a3 100644
--- a/be/src/vec/columns/column_variant.h
+++ b/be/src/vec/columns/column_variant.h
@@ -69,25 +69,6 @@ namespace doris::vectorized {
 #define ENABLE_CHECK_CONSISTENCY(this) (this)->check_consistency()
 #endif
 
-/// Info that represents a scalar or array field in a decomposed view.
-/// It allows to recreate field with different number
-/// of dimensions or nullability.
-struct FieldInfo {
-    /// The common type id of of all scalars in field.
-    PrimitiveType scalar_type_id = PrimitiveType::INVALID_TYPE;
-    /// Do we have NULL scalar in field.
-    bool have_nulls = false;
-    /// If true then we have scalars with different types in array and
-    /// we need to convert scalars to the common type.
-    bool need_convert = false;
-    /// Number of dimension in array. 0 if field is scalar.
-    size_t num_dimensions = 0;
-
-    // decimal info
-    int scale = 0;
-    int precision = 0;
-};
-
 /** A column that represents object with dynamic set of subcolumns.
  *  Subcolumns are identified by paths in document and are stored in
  *  a trie-like structure. ColumnVariant is not suitable for writing into 
tables
@@ -195,6 +176,8 @@ public:
         /// Returns last inserted field.
         Field get_last_field() const;
 
+        void deserialize_from_sparse_column(const ColumnString* value, size_t 
row);
+
         /// Returns single column if subcolumn in finalizes.
         /// Otherwise -- undefined behaviour.
         IColumn& get_finalized_column();
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp 
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index 267e0a3d2db..97571257504 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -527,6 +527,46 @@ void DataTypeArraySerDe::write_one_cell_to_binary(const 
IColumn& src_column,
     }
 }
 
+const uint8_t* DataTypeArraySerDe::deserialize_binary_to_column(const uint8_t* 
data,
+                                                                IColumn& 
column) {
+    auto& array_col = assert_cast<ColumnArray&, 
TypeCheckOnRelease::DISABLE>(column);
+    auto& offsets = array_col.get_offsets();
+    auto& nested_column = array_col.get_data();
+    const size_t nested_size = unaligned_load<size_t>(data);
+    data += sizeof(size_t);
+    if (nested_size == 0) [[unlikely]] {
+        offsets.push_back(offsets.back());
+        return data;
+    }
+
+    for (size_t i = 0; i < nested_size; ++i) {
+        const uint8_t* new_data = 
DataTypeSerDe::deserialize_binary_to_column(data, nested_column);
+        data = new_data;
+    }
+    offsets.push_back(offsets.back() + nested_size);
+    return data;
+}
+
+const uint8_t* DataTypeArraySerDe::deserialize_binary_to_field(const uint8_t* 
data, Field& field,
+                                                               FieldInfo& 
info) {
+    const size_t nested_size = unaligned_load<size_t>(data);
+    data += sizeof(size_t);
+    field = Field::create_field<TYPE_ARRAY>(Array(nested_size));
+    info.num_dimensions++;
+    auto& array = field.get<Array>();
+    PrimitiveType nested_type = PrimitiveType::TYPE_NULL;
+    for (size_t i = 0; i < nested_size; ++i) {
+        Field nested_field;
+        data = DataTypeSerDe::deserialize_binary_to_field(data, nested_field, 
info);
+        array[i] = std::move(nested_field);
+        if (info.scalar_type_id != PrimitiveType::TYPE_NULL) {
+            nested_type = info.scalar_type_id;
+        }
+    }
+    info.scalar_type_id = nested_type;
+    return data;
+}
+
 void DataTypeArraySerDe::to_string(const IColumn& column, size_t row_num,
                                    BufferWritable& bw) const {
     const auto& data_column = assert_cast<const ColumnArray&>(column);
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h 
b/be/src/vec/data_types/serde/data_type_array_serde.h
index b53e2da4e55..eeedf502b5f 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.h
+++ b/be/src/vec/data_types/serde/data_type_array_serde.h
@@ -117,6 +117,11 @@ public:
     void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
                                   int64_t row_num) const override;
 
+    static const uint8_t* deserialize_binary_to_column(const uint8_t* data, 
IColumn& column);
+
+    static const uint8_t* deserialize_binary_to_field(const uint8_t* data, 
Field& field,
+                                                      FieldInfo& info);
+
     void to_string(const IColumn& column, size_t row_num, BufferWritable& bw) 
const override;
 
 private:
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp 
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index 2014c2258b3..e9f3f5d5dba 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -654,6 +654,68 @@ void 
DataTypeDecimalSerDe<T>::write_one_cell_to_binary(const IColumn& src_column
            data_ref.data, data_ref.size);
 }
 
+template <PrimitiveType T>
+const uint8_t* DataTypeDecimalSerDe<T>::deserialize_binary_to_column(const 
uint8_t* data,
+                                                                     IColumn& 
column) {
+    auto& col = assert_cast<ColumnDecimal<T>&, 
TypeCheckOnRelease::DISABLE>(column);
+    data += sizeof(uint8_t);
+    data += sizeof(uint8_t);
+    if constexpr (T == TYPE_DECIMAL32) {
+        col.insert_value(unaligned_load<Int32>(data));
+        data += sizeof(Int32);
+    } else if constexpr (T == TYPE_DECIMAL64) {
+        col.insert_value(unaligned_load<Int64>(data));
+        data += sizeof(Int64);
+    } else if constexpr (T == TYPE_DECIMAL128I) {
+        col.insert_value(unaligned_load<Int128>(data));
+        data += sizeof(Int128);
+    } else if constexpr (T == TYPE_DECIMAL256) {
+        col.insert_value(Decimal256(unaligned_load<wide::Int256>(data)));
+        data += sizeof(wide::Int256);
+    } else {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+                               "deserialize_binary_to_column with type " + 
column.get_name());
+    }
+    return data;
+}
+
+template <PrimitiveType T>
+const uint8_t* DataTypeDecimalSerDe<T>::deserialize_binary_to_field(const 
uint8_t* data,
+                                                                    Field& 
field, FieldInfo& info) {
+    const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
+    data += sizeof(uint8_t);
+    const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
+    data += sizeof(uint8_t);
+    info.precision = static_cast<int>(precision);
+    info.scale = static_cast<int>(scale);
+    if constexpr (T == TYPE_DECIMAL32) {
+        Int32 v = unaligned_load<Int32>(data);
+        field = Field::create_field<TYPE_DECIMAL32>(Decimal32(v));
+        data += sizeof(Int32);
+    } else if constexpr (T == TYPE_DECIMAL64) {
+        Int64 v = unaligned_load<Int64>(data);
+        field = Field::create_field<TYPE_DECIMAL64>(Decimal64(v));
+        data += sizeof(Int64);
+    } else if constexpr (T == TYPE_DECIMAL128I) {
+        // Because __int128 in memory is not aligned, but GCC7 will generate 
SSE instruction
+        // for __int128 load/store. This will cause segment fault.
+        PackedInt128 pack;
+        // use memcpy to avoid unaligned access
+        memcpy(&pack, data, sizeof(PackedInt128));
+        field = 
Field::create_field<TYPE_DECIMAL128I>(Decimal128V3(pack.value));
+        data += sizeof(PackedInt128);
+    } else if constexpr (T == TYPE_DECIMAL256) {
+        wide::Int256 v;
+        memcpy(&v, data, sizeof(wide::Int256));
+        field = Field::create_field<TYPE_DECIMAL256>(Decimal256(v));
+        data += sizeof(wide::Int256);
+    } else {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+                               "deserialize_binary_to_field with type " + 
type_to_string(T));
+    }
+    return data;
+}
+
 template class DataTypeDecimalSerDe<TYPE_DECIMAL32>;
 template class DataTypeDecimalSerDe<TYPE_DECIMAL64>;
 template class DataTypeDecimalSerDe<TYPE_DECIMAL128I>;
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h 
b/be/src/vec/data_types/serde/data_type_decimal_serde.h
index 7e0cf461314..277a0309abd 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.h
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h
@@ -132,6 +132,11 @@ public:
 
     void to_string_batch(const IColumn& column, ColumnString& column_to) const 
override;
 
+    static const uint8_t* deserialize_binary_to_column(const uint8_t* data, 
IColumn& column);
+
+    static const uint8_t* deserialize_binary_to_field(const uint8_t* data, 
Field& field,
+                                                      FieldInfo& info);
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp 
b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
index f29f5368b95..072611f362e 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
@@ -330,6 +330,26 @@ void DataTypeJsonbSerDe::write_one_cell_to_binary(const 
IColumn& src_column,
     memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), 
data_ref.data, data_size);
 }
 
+const uint8_t* DataTypeJsonbSerDe::deserialize_binary_to_column(const uint8_t* 
data,
+                                                                IColumn& 
column) {
+    auto& col = assert_cast<ColumnString&, 
TypeCheckOnRelease::DISABLE>(column);
+    const size_t data_size = unaligned_load<size_t>(data);
+    data += sizeof(size_t);
+    col.insert_data(reinterpret_cast<const char*>(data), data_size);
+    data += data_size;
+    return data;
+}
+
+const uint8_t* DataTypeJsonbSerDe::deserialize_binary_to_field(const uint8_t* 
data, Field& field,
+                                                               FieldInfo& 
info) {
+    const size_t data_size = unaligned_load<size_t>(data);
+    data += sizeof(size_t);
+    field = Field::create_field<TYPE_JSONB>(
+            JsonbField(reinterpret_cast<const char*>(data), data_size));
+    data += data_size;
+    return data;
+}
+
 void DataTypeJsonbSerDe::to_string(const IColumn& column, size_t row_num,
                                    BufferWritable& bw) const {
     const auto& col = assert_cast<const ColumnString&, 
TypeCheckOnRelease::DISABLE>(column);
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h 
b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
index ce1ce81a437..1e0a1065b2c 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
@@ -80,6 +80,11 @@ public:
     void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
                                   int64_t row_num) const override;
 
+    static const uint8_t* deserialize_binary_to_column(const uint8_t* data, 
IColumn& column);
+
+    static const uint8_t* deserialize_binary_to_field(const uint8_t* data, 
Field& field,
+                                                      FieldInfo& info);
+
     void to_string(const IColumn& column, size_t row_num, BufferWritable& bw) 
const override;
 
 private:
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index 1a15a02c7b3..0201a3a2ac9 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -770,6 +770,117 @@ void 
DataTypeNumberSerDe<T>::write_one_cell_to_binary(const IColumn& src_column,
     memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data, 
data_ref.size);
 }
 
+template <PrimitiveType T>
+const uint8_t* DataTypeNumberSerDe<T>::deserialize_binary_to_column(const 
uint8_t* data,
+                                                                    IColumn& 
column) {
+    auto& col = assert_cast<ColumnType&, TypeCheckOnRelease::DISABLE>(column);
+    if constexpr (T == TYPE_BOOLEAN) {
+        col.insert_value(unaligned_load<UInt8>(data));
+        data += sizeof(UInt8);
+    } else if constexpr (T == TYPE_TINYINT) {
+        col.insert_value(unaligned_load<Int8>(data));
+        data += sizeof(Int8);
+    } else if constexpr (T == TYPE_SMALLINT) {
+        col.insert_value(unaligned_load<Int16>(data));
+        data += sizeof(Int16);
+    } else if constexpr (T == TYPE_INT) {
+        col.insert_value(unaligned_load<Int32>(data));
+        data += sizeof(Int32);
+    } else if constexpr (T == TYPE_BIGINT) {
+        col.insert_value(unaligned_load<Int64>(data));
+        data += sizeof(Int64);
+    } else if constexpr (T == TYPE_LARGEINT) {
+        col.insert_value(unaligned_load<Int128>(data));
+        data += sizeof(Int128);
+    } else if constexpr (T == TYPE_FLOAT) {
+        col.insert_value(unaligned_load<Float32>(data));
+        data += sizeof(Float32);
+    } else if constexpr (T == TYPE_DOUBLE) {
+        col.insert_value(unaligned_load<Float64>(data));
+        data += sizeof(Float64);
+    } else if constexpr (T == TYPE_IPV4) {
+        col.insert_value(unaligned_load<UInt32>(data));
+        data += sizeof(UInt32);
+    } else if constexpr (T == TYPE_IPV6) {
+        col.insert_value(unaligned_load<Int128>(data));
+        data += sizeof(Int128);
+    } else if constexpr (T == TYPE_DATEV2) {
+        col.insert_value(unaligned_load<UInt32>(data));
+        data += sizeof(UInt32);
+    } else if constexpr (T == TYPE_DATETIMEV2) {
+        data += sizeof(uint8_t);
+        col.insert_value(unaligned_load<UInt64>(data));
+        data += sizeof(UInt64);
+    } else {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+                               "deserialize_binary_to_column with type '{}'", 
type_to_string(T));
+    }
+    return data;
+}
+
+template <PrimitiveType T>
+const uint8_t* DataTypeNumberSerDe<T>::deserialize_binary_to_field(const 
uint8_t* data,
+                                                                   Field& 
field, FieldInfo& info) {
+    if constexpr (T == TYPE_BOOLEAN) {
+        field = Field::create_field<TYPE_BOOLEAN>(unaligned_load<UInt8>(data));
+        data += sizeof(UInt8);
+    } else if constexpr (T == TYPE_TINYINT) {
+        Int8 v = unaligned_load<Int8>(data);
+        field = Field::create_field<TYPE_TINYINT>(v);
+        data += sizeof(Int8);
+    } else if constexpr (T == TYPE_SMALLINT) {
+        Int16 v = unaligned_load<Int16>(data);
+        field = Field::create_field<TYPE_SMALLINT>(v);
+        data += sizeof(Int16);
+    } else if constexpr (T == TYPE_INT) {
+        Int32 v = unaligned_load<Int32>(data);
+        field = Field::create_field<TYPE_INT>(v);
+        data += sizeof(Int32);
+    } else if constexpr (T == TYPE_BIGINT) {
+        Int64 v = unaligned_load<Int64>(data);
+        field = Field::create_field<TYPE_BIGINT>(v);
+        data += sizeof(Int64);
+    } else if constexpr (T == TYPE_LARGEINT) {
+        PackedInt128 pack;
+        memcpy(&pack, data, sizeof(PackedInt128));
+        field = Field::create_field<TYPE_LARGEINT>(Int128(pack.value));
+        data += sizeof(PackedInt128);
+    } else if constexpr (T == TYPE_FLOAT) {
+        Float32 v = unaligned_load<Float32>(data);
+        field = Field::create_field<TYPE_FLOAT>(v);
+        data += sizeof(Float32);
+    } else if constexpr (T == TYPE_DOUBLE) {
+        Float64 v = unaligned_load<Float64>(data);
+        field = Field::create_field<TYPE_DOUBLE>(v);
+        data += sizeof(Float64);
+    } else if constexpr (T == TYPE_IPV4) {
+        IPv4 v = unaligned_load<IPv4>(data);
+        field = Field::create_field<TYPE_IPV4>(v);
+        data += sizeof(IPv4);
+    } else if constexpr (T == TYPE_IPV6) {
+        PackedUInt128 pack;
+        memcpy(&pack, data, sizeof(PackedUInt128));
+        auto v = pack.value;
+        field = Field::create_field<TYPE_IPV6>(v);
+        data += sizeof(PackedUInt128);
+    } else if constexpr (T == TYPE_DATEV2) {
+        UInt32 v = unaligned_load<UInt32>(data);
+        field = Field::create_field<TYPE_DATEV2>(v);
+        data += sizeof(UInt32);
+    } else if constexpr (T == TYPE_DATETIMEV2) {
+        const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
+        data += sizeof(uint8_t);
+        UInt64 v = unaligned_load<UInt64>(data);
+        info.precision = -1;
+        info.scale = static_cast<int>(scale);
+        field = Field::create_field<TYPE_DATETIMEV2>(v);
+        data += sizeof(UInt64);
+    } else {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+                               "deserialize_binary_to_column with type '{}'", 
type_to_string(T));
+    }
+    return data;
+}
 template <PrimitiveType T>
 void value_to_string(const typename PrimitiveTypeTraits<T>::ColumnItemType 
value,
                      BufferWritable& bw, int scale) {
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h 
b/be/src/vec/data_types/serde/data_type_number_serde.h
index e38b54a7bb5..783c7fc9bfc 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -129,12 +129,18 @@ public:
 
     void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
                                   int64_t row_num) const override;
+
     void to_string(const IColumn& column, size_t row_num, BufferWritable& bw) 
const override;
 
     void to_string_batch(const IColumn& column, ColumnString& column_to) const 
override;
     // will override in DateTime and Time
     virtual int get_scale() const { return 0; }
 
+    static const uint8_t* deserialize_binary_to_column(const uint8_t* data, 
IColumn& column);
+
+    static const uint8_t* deserialize_binary_to_field(const uint8_t* data, 
Field& field,
+                                                      FieldInfo& info);
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_serde.cpp 
b/be/src/vec/data_types/serde/data_type_serde.cpp
index 683fdfbded8..0b1288895fa 100644
--- a/be/src/vec/data_types/serde/data_type_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_serde.cpp
@@ -25,7 +25,11 @@
 #include "vec/columns/column.h"
 #include "vec/core/field.h"
 #include "vec/data_types/data_type.h"
+#include "vec/data_types/serde/data_type_array_serde.h"
+#include "vec/data_types/serde/data_type_decimal_serde.h"
 #include "vec/data_types/serde/data_type_jsonb_serde.h"
+#include "vec/data_types/serde/data_type_number_serde.h"
+#include "vec/data_types/serde/data_type_string_serde.h"
 #include "vec/functions/cast/cast_base.h"
 namespace doris {
 namespace vectorized {
@@ -134,5 +138,129 @@ void DataTypeSerDe::to_string(const IColumn& column, 
size_t row_num, BufferWrita
 const std::string DataTypeSerDe::NULL_IN_COMPLEX_TYPE = "null";
 const std::string DataTypeSerDe::NULL_IN_CSV_FOR_ORDINARY_TYPE = "\\N";
 
+const uint8_t* DataTypeSerDe::deserialize_binary_to_column(const uint8_t* 
data, IColumn& column) {
+    auto& nullable_column = assert_cast<ColumnNullable&, 
TypeCheckOnRelease::DISABLE>(column);
+    const FieldType type = static_cast<FieldType>(*data++);
+    const uint8_t* end = data;
+    switch (type) {
+#define HANDLE_SIMPLE_SERDE(FT, SERDE)                                         
               \
+    case FieldType::FT: {                                                      
               \
+        end = SERDE::deserialize_binary_to_column(data, 
nullable_column.get_nested_column()); \
+        nullable_column.push_false_to_nullmap(1);                              
               \
+        break;                                                                 
               \
+    }
+
+#define HANDLE_T_NUM_SERDE(FT, TYPEID)                                   \
+    case FieldType::FT: {                                                \
+        end = DataTypeNumberSerDe<TYPEID>::deserialize_binary_to_column( \
+                data, nullable_column.get_nested_column());              \
+        nullable_column.push_false_to_nullmap(1);                        \
+        break;                                                           \
+    }
+
+#define HANDLE_T_DEC_SERDE(FT, TYPEID)                                    \
+    case FieldType::FT: {                                                 \
+        end = DataTypeDecimalSerDe<TYPEID>::deserialize_binary_to_column( \
+                data, nullable_column.get_nested_column());               \
+        nullable_column.push_false_to_nullmap(1);                         \
+        break;                                                            \
+    }
+
+        HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_STRING, DataTypeStringSerDe)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_TINYINT, TYPE_TINYINT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_SMALLINT, TYPE_SMALLINT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_INT, TYPE_INT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_BIGINT, TYPE_BIGINT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_LARGEINT, TYPE_LARGEINT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_FLOAT, TYPE_FLOAT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DOUBLE, TYPE_DOUBLE)
+        HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_JSONB, DataTypeJsonbSerDe)
+        HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_ARRAY, DataTypeArraySerDe)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DATEV2, TYPE_DATEV2)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DATETIMEV2, TYPE_DATETIMEV2)
+        HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL32, TYPE_DECIMAL32)
+        HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL64, TYPE_DECIMAL64)
+        HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL128I, TYPE_DECIMAL128I)
+        HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL256, TYPE_DECIMAL256)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_BOOL, TYPE_BOOLEAN)
+
+    case FieldType::OLAP_FIELD_TYPE_NONE: {
+        end = data;
+        nullable_column.insert_default();
+        break;
+    }
+    default:
+        throw doris::Exception(ErrorCode::OUT_OF_BOUND,
+                               "Type ({}) for deserialize_binary_to_column is 
invalid", type);
+    }
+
+#undef HANDLE_T_DEC_SERDE
+#undef HANDLE_T_NUM_SERDE
+#undef HANDLE_SIMPLE_SERDE
+
+    return end;
+}
+
+const uint8_t* DataTypeSerDe::deserialize_binary_to_field(const uint8_t* data, 
Field& field,
+                                                          FieldInfo& info) {
+    const FieldType type = static_cast<FieldType>(*data++);
+    info.scalar_type_id = TabletColumn::get_primitive_type_by_field_type(type);
+    const uint8_t* end = data;
+    switch (type) {
+#define HANDLE_SIMPLE_SERDE(FT, SERDE)                               \
+    case FieldType::FT: {                                            \
+        end = SERDE::deserialize_binary_to_field(data, field, info); \
+        break;                                                       \
+    }
+
+#define HANDLE_T_NUM_SERDE(FT, TYPEID)                                         
            \
+    case FieldType::FT: {                                                      
            \
+        end = DataTypeNumberSerDe<TYPEID>::deserialize_binary_to_field(data, 
field, info); \
+        break;                                                                 
            \
+    }
+
+#define HANDLE_T_DEC_SERDE(FT, TYPEID)                                         
             \
+    case FieldType::FT: {                                                      
             \
+        end = DataTypeDecimalSerDe<TYPEID>::deserialize_binary_to_field(data, 
field, info); \
+        break;                                                                 
             \
+    }
+
+        HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_STRING, DataTypeStringSerDe)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_TINYINT, TYPE_TINYINT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_SMALLINT, TYPE_SMALLINT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_INT, TYPE_INT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_BIGINT, TYPE_BIGINT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_LARGEINT, TYPE_LARGEINT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_FLOAT, TYPE_FLOAT)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DOUBLE, TYPE_DOUBLE)
+        HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_JSONB, DataTypeJsonbSerDe)
+        HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_ARRAY, DataTypeArraySerDe)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DATEV2, TYPE_DATEV2)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DATETIMEV2, TYPE_DATETIMEV2)
+        HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL32, TYPE_DECIMAL32)
+        HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL64, TYPE_DECIMAL64)
+        HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL128I, TYPE_DECIMAL128I)
+        HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL256, TYPE_DECIMAL256)
+        HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_BOOL, TYPE_BOOLEAN)
+
+    case FieldType::OLAP_FIELD_TYPE_NONE: {
+        end = data;
+        break;
+    }
+    default:
+        throw doris::Exception(ErrorCode::OUT_OF_BOUND,
+                               "Type ({}) for deserialize_binary_to_field is 
invalid", type);
+    }
+
+#undef HANDLE_T_DEC_SERDE
+#undef HANDLE_T_NUM_SERDE
+#undef HANDLE_SIMPLE_SERDE
+    return end;
+}
+
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index a12357ba9f1..e4914acd739 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -94,6 +94,38 @@ class DataTypeSerDe;
 using DataTypeSerDeSPtr = std::shared_ptr<DataTypeSerDe>;
 using DataTypeSerDeSPtrs = std::vector<DataTypeSerDeSPtr>;
 
+/// Info that represents a scalar or array field in a decomposed view.
+/// It allows to recreate field with different number
+/// of dimensions or nullability.
+struct FieldInfo {
+    /// The common type id of of all scalars in field.
+    PrimitiveType scalar_type_id = PrimitiveType::INVALID_TYPE;
+    /// Do we have NULL scalar in field.
+    bool have_nulls = false;
+    /// If true then we have scalars with different types in array and
+    /// we need to convert scalars to the common type.
+    bool need_convert = false;
+    /// Number of dimension in array. 0 if field is scalar.
+    size_t num_dimensions = 0;
+
+    // decimal info
+    int scale = 0;
+    int precision = 0;
+};
+struct PackedUInt128 {
+    // PackedInt128() : value(0) {}
+    PackedUInt128() = default;
+
+    PackedUInt128(const unsigned __int128& value_) { value = value_; }
+    PackedUInt128& operator=(const unsigned __int128& value_) {
+        value = value_;
+        return *this;
+    }
+    PackedUInt128& operator=(const PackedUInt128& rhs) = default;
+
+    uint128_t value;
+} __attribute__((packed));
+
 // Deserialize means read from different file format or memory format,
 // for example read from arrow, read from parquet.
 // Serialize means write the column cell or the total column into another
@@ -421,6 +453,11 @@ public:
         throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, 
"write_one_cell_to_binary");
     }
 
+    static const uint8_t* deserialize_binary_to_column(const uint8_t* data, 
IColumn& column);
+
+    static const uint8_t* deserialize_binary_to_field(const uint8_t* data, 
Field& field,
+                                                      FieldInfo& info);
+
 protected:
     bool _return_object_as_string = false;
     // This parameter indicates what level the serde belongs to and is mainly 
used for complex types
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h 
b/be/src/vec/data_types/serde/data_type_string_serde.h
index 879f2c1dce4..12bc12e6f12 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -233,6 +233,25 @@ public:
                data_size);
     }
 
+    static const uint8_t* deserialize_binary_to_column(const uint8_t* data, 
IColumn& column) {
+        auto& col = assert_cast<ColumnString&, 
TypeCheckOnRelease::DISABLE>(column);
+        const size_t data_size = unaligned_load<size_t>(data);
+        data += sizeof(size_t);
+        col.insert_data(reinterpret_cast<const char*>(data), data_size);
+        data += data_size;
+        return data;
+    }
+
+    static const uint8_t* deserialize_binary_to_field(const uint8_t* data, 
Field& field,
+                                                      FieldInfo& info) {
+        const size_t data_size = unaligned_load<size_t>(data);
+        data += sizeof(size_t);
+        field = Field::create_field<TYPE_STRING>(
+                String(reinterpret_cast<const char*>(data), data_size));
+        data += data_size;
+        return data;
+    }
+
     void to_string(const IColumn& column, size_t row_num, BufferWritable& bw) 
const override;
 
 private:
diff --git a/be/src/vec/functions/function_variant_element.cpp 
b/be/src/vec/functions/function_variant_element.cpp
index fd0fd491b8f..aa2d082d156 100644
--- a/be/src/vec/functions/function_variant_element.cpp
+++ b/be/src/vec/functions/function_variant_element.cpp
@@ -217,9 +217,8 @@ private:
                             // {"b" : {"c" : 456}}
                             // b maybe in sparse column, and b.c is in 
subolumn, put `b` into root column to distinguish
                             // from "" which is empty path and root
-                            const auto& data = 
ColumnVariant::deserialize_from_sparse_column(
-                                    &src_sparse_data_values, 
lower_bound_index);
-                            root.insert(data.first, data.second);
+                            
root.deserialize_from_sparse_column(&src_sparse_data_values,
+                                                                
lower_bound_index);
                         }
                     }
                     if (root.size() == sparse_data_offsets.size()) {
diff --git a/be/test/util/test_data/deserialize_from_sparse_column_test.bin 
b/be/test/util/test_data/deserialize_from_sparse_column_test.bin
new file mode 100644
index 00000000000..663fd5aaf33
Binary files /dev/null and 
b/be/test/util/test_data/deserialize_from_sparse_column_test.bin differ
diff --git a/be/test/vec/columns/column_variant_test.cpp 
b/be/test/vec/columns/column_variant_test.cpp
index e777634f6e0..de3d31200dc 100644
--- a/be/test/vec/columns/column_variant_test.cpp
+++ b/be/test/vec/columns/column_variant_test.cpp
@@ -3130,120 +3130,6 @@ TEST_F(ColumnVariantTest, 
subcolumn_operations_coverage) {
         
dst_subcolumn->insert_range_from(src_column2->get_subcolumns().get_root()->data,
 0, 1);
     }
 
-    // Test parse_binary_from_sparse_column
-    {
-        auto column = VariantUtil::construct_basic_varint_column();
-        vectorized::Field res;
-        FieldInfo field_info;
-
-        // Test String type
-        {
-            std::string test_str = "test_data";
-            std::vector<char> binary_data;
-            size_t str_size = test_str.size();
-            binary_data.resize(sizeof(size_t) + test_str.size());
-            memcpy(binary_data.data(), &str_size, sizeof(size_t));
-            memcpy(binary_data.data() + sizeof(size_t), test_str.data(), 
test_str.size());
-            const char* data = binary_data.data();
-            parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_STRING, 
data, res,
-                                            field_info);
-            EXPECT_EQ(res.get<String>(), "test_data");
-        }
-
-        // Test integer types
-        {
-            Int8 int8_val = 42;
-            const char* data = reinterpret_cast<const char*>(&int8_val);
-            
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_TINYINT, data, res,
-                                            field_info);
-            EXPECT_EQ(res.get<Int8>(), 42);
-        }
-
-        {
-            Int16 int16_val = 12345;
-            const char* data = reinterpret_cast<const char*>(&int16_val);
-            
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_SMALLINT, data, res,
-                                            field_info);
-            EXPECT_EQ(res.get<Int16>(), 12345);
-        }
-
-        {
-            Int32 int32_val = 123456789;
-            const char* data = reinterpret_cast<const char*>(&int32_val);
-            parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_INT, 
data, res, field_info);
-            EXPECT_EQ(res.get<Int32>(), 123456789);
-        }
-
-        {
-            Int64 int64_val = 1234567890123456789LL;
-            const char* data = reinterpret_cast<const char*>(&int64_val);
-            parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_BIGINT, 
data, res,
-                                            field_info);
-            EXPECT_EQ(res.get<Int64>(), 1234567890123456789LL);
-        }
-
-        // Test floating point types
-        {
-            Float32 float32_val = 3.1415901f;
-            const char* data = reinterpret_cast<const char*>(&float32_val);
-            parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_FLOAT, 
data, res,
-                                            field_info);
-            EXPECT_FLOAT_EQ(res.get<Float32>(), 0);
-        }
-
-        {
-            Float64 float64_val = 3.141592653589793;
-            const char* data = reinterpret_cast<const char*>(&float64_val);
-            parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_DOUBLE, 
data, res,
-                                            field_info);
-            EXPECT_DOUBLE_EQ(res.get<Float64>(), 3.141592653589793);
-        }
-
-        // Test JSONB type
-        {
-            std::string json_str = "{\"key\": \"value\"}";
-            std::vector<char> binary_data;
-            size_t json_size = json_str.size();
-            binary_data.resize(sizeof(size_t) + json_str.size());
-            memcpy(binary_data.data(), &json_size, sizeof(size_t));
-            memcpy(binary_data.data() + sizeof(size_t), json_str.data(), 
json_str.size());
-            const char* data = binary_data.data();
-            parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_JSONB, 
data, res,
-                                            field_info);
-        }
-
-        // Test Array type
-        {
-            std::vector<char> binary_data;
-            size_t array_size = 2;
-            binary_data.resize(sizeof(size_t) + 2 * (sizeof(uint8_t) + 
sizeof(Int32)));
-            char* data_ptr = binary_data.data();
-
-            // Write array size
-            memcpy(data_ptr, &array_size, sizeof(size_t));
-            data_ptr += sizeof(size_t);
-
-            // Write first element (Int32)
-            *data_ptr++ = static_cast<uint8_t>(PrimitiveType::TYPE_INT);
-            Int32 val1 = 42;
-            memcpy(data_ptr, &val1, sizeof(Int32));
-            data_ptr += sizeof(Int32);
-
-            // Write second element (Int32)
-            *data_ptr++ = static_cast<uint8_t>(PrimitiveType::TYPE_INT);
-            Int32 val2 = 43;
-            memcpy(data_ptr, &val2, sizeof(Int32));
-
-            const char* data = binary_data.data();
-            parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_ARRAY, 
data, res,
-                                            field_info);
-            const Array& array = res.get<Array>();
-            EXPECT_EQ(array.size(), 2);
-            EXPECT_EQ(array[0].get<Int32>(), 42);
-            EXPECT_EQ(array[1].get<Int32>(), 43);
-        }
-    }
-
     // Test add_sub_column
     {
         auto column = VariantUtil::construct_basic_varint_column();
diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp 
b/be/test/vec/data_types/serde/data_type_serde_test.cpp
index f4e36d3ab47..62e4a65f760 100644
--- a/be/test/vec/data_types/serde/data_type_serde_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp
@@ -25,6 +25,7 @@
 #include <stdlib.h>
 #include <time.h>
 
+#include <fstream>
 #include <iostream>
 #include <memory>
 #include <string>
@@ -41,6 +42,7 @@
 #include "vec/columns/column_decimal.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
+#include "vec/columns/column_variant.h"
 #include "vec/columns/column_vector.h"
 #include "vec/core/types.h"
 #include "vec/data_types/data_type.h"
@@ -271,4 +273,378 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
     }
 }
 
+TEST(DataTypeSerDeTest, DeserializeFromSparseColumnTest) {
+    auto sparse_column = ColumnVariant::create_sparse_column_fn();
+    auto& column_map = assert_cast<ColumnMap&>(*sparse_column);
+    // auto& key = assert_cast<ColumnString&>(column_map.get_keys());
+    auto& value = assert_cast<ColumnString&>(column_map.get_values());
+    // auto& offsets = column_map.get_offsets();
+    auto data_type = ColumnVariant::get_sparse_column_type();
+    std::string file_path = std::string(getenv("ROOT")) +
+                            
"/be/test/util/test_data/deserialize_from_sparse_column_test.bin";
+
+    // Field string_field = Field::create_field<TYPE_STRING>("123");
+    // FieldInfo info = {PrimitiveType::TYPE_STRING, false, false, 0};
+    // ColumnVariant::Subcolumn string_subcolumn = {0, true, true};
+    // string_subcolumn.insert(string_field, info);
+    // string_subcolumn.serialize_to_sparse_column(&key, "a", &value, 0);
+
+    // Field int_field = Field::create_field<TYPE_INT>(123);
+    // info.scalar_type_id = PrimitiveType::TYPE_INT;
+    // ColumnVariant::Subcolumn int_subcolumn = {0, true, true};
+    // int_subcolumn.insert(int_field, info);
+    // int_subcolumn.serialize_to_sparse_column(&key, "b", &value, 0);
+
+    // Field largeint_field = 
Field::create_field<TYPE_LARGEINT>(__int128_t(123));
+    // info.scalar_type_id = PrimitiveType::TYPE_LARGEINT;
+    // ColumnVariant::Subcolumn largeint_subcolumn = {0, true, true};
+    // largeint_subcolumn.insert(largeint_field, info);
+    // largeint_subcolumn.serialize_to_sparse_column(&key, "c", &value, 0);
+
+    // Field double_field = Field::create_field<TYPE_DOUBLE>(123.456);
+    // info.scalar_type_id = PrimitiveType::TYPE_DOUBLE;
+    // ColumnVariant::Subcolumn double_subcolumn = {0, true, true};
+    // double_subcolumn.insert(double_field, info);
+    // double_subcolumn.serialize_to_sparse_column(&key, "d", &value, 0);
+
+    // Field bool_field = Field::create_field<TYPE_BOOLEAN>(true);
+    // info.scalar_type_id = PrimitiveType::TYPE_BOOLEAN;
+    // ColumnVariant::Subcolumn bool_subcolumn = {0, true, true};
+    // bool_subcolumn.insert(bool_field, info);
+    // bool_subcolumn.serialize_to_sparse_column(&key, "e", &value, 0);
+
+    // Field datetime_field = Field::create_field<TYPE_DATETIMEV2>(23232323);
+    // info.scalar_type_id = PrimitiveType::TYPE_DATETIMEV2;
+    // info.scale = 3;
+    // ColumnVariant::Subcolumn datetime_subcolumn = {0, true, true};
+    // datetime_subcolumn.insert(datetime_field, info);
+    // datetime_subcolumn.serialize_to_sparse_column(&key, "f", &value, 0);
+
+    // Field date_field = Field::create_field<TYPE_DATEV2>(154543245);
+    // info.scalar_type_id = PrimitiveType::TYPE_DATEV2;
+    // info.scale = 3;
+    // ColumnVariant::Subcolumn date_subcolumn = {0, true, true};
+    // date_subcolumn.insert(date_field, info);
+    // date_subcolumn.serialize_to_sparse_column(&key, "g", &value, 0);
+
+    // Field ipv4_field = Field::create_field<TYPE_IPV4>(367357);
+    // info.scalar_type_id = PrimitiveType::TYPE_IPV4;
+    // ColumnVariant::Subcolumn ipv4_subcolumn = {0, true, true};
+    // ipv4_subcolumn.insert(ipv4_field, info);
+    // ipv4_subcolumn.serialize_to_sparse_column(&key, "h", &value, 0);
+
+    // Field ipv6_field = Field::create_field<TYPE_IPV6>(36534645);
+    // info.scalar_type_id = PrimitiveType::TYPE_IPV6;
+    // ColumnVariant::Subcolumn ipv6_subcolumn = {0, true, true};
+    // ipv6_subcolumn.insert(ipv6_field, info);
+    // ipv6_subcolumn.serialize_to_sparse_column(&key, "i", &value, 0);
+
+    // Field decimal32_field =
+    //         
Field::create_field<TYPE_DECIMAL32>(DecimalField<Decimal32>(3456345634, 2));
+    // info.scalar_type_id = PrimitiveType::TYPE_DECIMAL32;
+    // info.precision = 5;
+    // info.scale = 2;
+    // ColumnVariant::Subcolumn decimal32_subcolumn = {0, true, true};
+    // decimal32_subcolumn.insert(decimal32_field, info);
+    // decimal32_subcolumn.serialize_to_sparse_column(&key, "j", &value, 0);
+
+    // Field decimal64_field =
+    //         
Field::create_field<TYPE_DECIMAL64>(DecimalField<Decimal64>(13452435, 6));
+    // info.scalar_type_id = PrimitiveType::TYPE_DECIMAL64;
+    // info.precision = 12;
+    // info.scale = 6;
+    // ColumnVariant::Subcolumn decimal64_subcolumn = {0, true, true};
+    // decimal64_subcolumn.insert(decimal64_field, info);
+    // decimal64_subcolumn.serialize_to_sparse_column(&key, "k", &value, 0);
+
+    // Field decimal128i_field =
+    //         
Field::create_field<TYPE_DECIMAL128I>(DecimalField<Decimal128V3>(2342345, 12));
+    // info.scalar_type_id = PrimitiveType::TYPE_DECIMAL128I;
+    // info.precision = 32;
+    // info.scale = 12;
+    // ColumnVariant::Subcolumn decimal128i_subcolumn = {0, true, true};
+    // decimal128i_subcolumn.insert(decimal128i_field, info);
+    // decimal128i_subcolumn.serialize_to_sparse_column(&key, "l", &value, 0);
+
+    // Field decimal256_field =
+    //         
Field::create_field<TYPE_DECIMAL256>(DecimalField<Decimal256>(Decimal256(2345243),
 5));
+    // info.scalar_type_id = PrimitiveType::TYPE_DECIMAL256;
+    // info.precision = 52;
+    // info.scale = 5;
+    // ColumnVariant::Subcolumn decimal256_subcolumn = {0, true, true};
+    // decimal256_subcolumn.insert(decimal256_field, info);
+    // decimal256_subcolumn.serialize_to_sparse_column(&key, "m", &value, 0);
+
+    // Field jsonb_field = Field::create_field<TYPE_JSONB>(JsonbField("abc", 
3));
+    // info.scalar_type_id = PrimitiveType::TYPE_JSONB;
+    // ColumnVariant::Subcolumn jsonb_subcolumn = {0, true, true};
+    // jsonb_subcolumn.insert(jsonb_field, info);
+    // jsonb_subcolumn.serialize_to_sparse_column(&key, "n", &value, 0);
+
+    // Field array_field = Field::create_field<TYPE_ARRAY>(Array(3));
+    // info.scalar_type_id = PrimitiveType::TYPE_JSONB;
+    // info.num_dimensions = 1;
+    // auto& array = array_field.get<Array>();
+    // array[0] = jsonb_field;
+    // array[1] = Field();
+    // array[2] = jsonb_field;
+
+    // ColumnVariant::Subcolumn array_subcolumn = {0, true, true};
+    // array_subcolumn.insert(array_field, info);
+    // array_subcolumn.serialize_to_sparse_column(&key, "o", &value, 0);
+    // offsets.push_back(key.size());
+
+    // auto size = 
data_type->get_uncompressed_serialized_bytes(*sparse_column, 8);
+    // char* buf = new char[size];
+    // data_type->serialize(*sparse_column, buf, 8);
+    // {
+    //     std::ofstream ofs(file_path, std::ios::binary);
+    //     ASSERT_TRUE(ofs.is_open());
+    //     ofs.write(buf, static_cast<std::streamsize>(size));
+    //     ofs.close();
+    // }
+    // delete[] buf;
+
+    std::string read_data;
+    {
+        std::ifstream ifs(file_path, std::ios::binary);
+        ASSERT_TRUE(ifs.is_open());
+        ifs.seekg(0, std::ios::end);
+        std::streamsize fsize = ifs.tellg();
+        ifs.seekg(0, std::ios::beg);
+        read_data.resize(static_cast<size_t>(fsize));
+        ifs.read(read_data.data(), fsize);
+    }
+
+    sparse_column->clear();
+
+    data_type->deserialize(read_data.data(), &sparse_column, 8);
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 0);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_STRING);
+        EXPECT_EQ(subcolumn.get_last_field().get<String>(), "123");
+        subcolumn.deserialize_from_sparse_column(&value, 0);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_STRING);
+        EXPECT_EQ(subcolumn.get_last_field().get<String>(), "123");
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 1);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(), 
PrimitiveType::TYPE_INT);
+        EXPECT_EQ(subcolumn.get_last_field().get<Int32>(), 123);
+        subcolumn.deserialize_from_sparse_column(&value, 1);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(), 
PrimitiveType::TYPE_INT);
+        EXPECT_EQ(subcolumn.get_last_field().get<Int32>(), 123);
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 2);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_LARGEINT);
+        EXPECT_EQ(subcolumn.get_last_field().get<Int64>(), 123);
+        subcolumn.deserialize_from_sparse_column(&value, 2);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_LARGEINT);
+        EXPECT_EQ(subcolumn.get_last_field().get<Int64>(), 123);
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 3);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DOUBLE);
+        EXPECT_EQ(subcolumn.get_last_field().get<double>(), 123.456);
+        subcolumn.deserialize_from_sparse_column(&value, 3);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DOUBLE);
+        EXPECT_EQ(subcolumn.get_last_field().get<double>(), 123.456);
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 4);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_BOOLEAN);
+        EXPECT_EQ(subcolumn.get_last_field().get<bool>(), true);
+        subcolumn.deserialize_from_sparse_column(&value, 4);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_BOOLEAN);
+        EXPECT_EQ(subcolumn.get_last_field().get<bool>(), true);
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 5);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DATETIMEV2);
+        EXPECT_EQ(subcolumn.get_last_field().get<UInt64>(), 23232323);
+        subcolumn.deserialize_from_sparse_column(&value, 5);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DATETIMEV2);
+        EXPECT_EQ(subcolumn.get_last_field().get<UInt64>(), 23232323);
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 6);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DATEV2);
+        EXPECT_EQ(subcolumn.get_last_field().get<UInt64>(), 154543245);
+        subcolumn.deserialize_from_sparse_column(&value, 6);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DATEV2);
+        EXPECT_EQ(subcolumn.get_last_field().get<UInt64>(), 154543245);
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 7);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_IPV4);
+        EXPECT_EQ(subcolumn.get_last_field().get<IPv4>(), 
static_cast<IPv4>(367357));
+        subcolumn.deserialize_from_sparse_column(&value, 7);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_IPV4);
+        EXPECT_EQ(subcolumn.get_last_field().get<IPv4>(), 
static_cast<IPv4>(367357));
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 8);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_IPV6);
+        EXPECT_EQ(subcolumn.get_last_field().get<IPv6>(), 
static_cast<IPv6>(36534645));
+        subcolumn.deserialize_from_sparse_column(&value, 8);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_IPV6);
+        EXPECT_EQ(subcolumn.get_last_field().get<IPv6>(), 
static_cast<IPv6>(36534645));
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 9);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DECIMAL32);
+        auto v = subcolumn.get_last_field().get<DecimalField<Decimal32>>();
+        EXPECT_EQ(static_cast<Int32>(v.get_value()), 
static_cast<Int32>(3456345634));
+        subcolumn.deserialize_from_sparse_column(&value, 9);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DECIMAL32);
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 10);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DECIMAL64);
+        auto v = subcolumn.get_last_field().get<DecimalField<Decimal64>>();
+        EXPECT_EQ(static_cast<Int64>(v.get_value()), 
static_cast<Int64>(13452435));
+        subcolumn.deserialize_from_sparse_column(&value, 10);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DECIMAL64);
+        v = subcolumn.get_last_field().get<DecimalField<Decimal64>>();
+        EXPECT_EQ(static_cast<Int64>(v.get_value()), 
static_cast<Int64>(13452435));
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 11);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DECIMAL128I);
+        auto v = subcolumn.get_last_field().get<DecimalField<Decimal128V3>>();
+        EXPECT_EQ(static_cast<Int128>(v.get_value()), 
static_cast<Int128>(2342345));
+        subcolumn.deserialize_from_sparse_column(&value, 11);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DECIMAL128I);
+        v = subcolumn.get_last_field().get<DecimalField<Decimal128V3>>();
+        EXPECT_EQ(static_cast<Int128>(v.get_value()), 
static_cast<Int128>(2342345));
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 12);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DECIMAL256);
+        auto v = subcolumn.get_last_field().get<DecimalField<Decimal256>>();
+        EXPECT_TRUE(v.get_value() == Decimal256(2345243));
+        subcolumn.deserialize_from_sparse_column(&value, 12);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_DECIMAL256);
+        v = subcolumn.get_last_field().get<DecimalField<Decimal256>>();
+        EXPECT_TRUE(v.get_value() == Decimal256(2345243));
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 13);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_JSONB);
+        subcolumn.deserialize_from_sparse_column(&value, 13);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_JSONB);
+    }
+
+    {
+        ColumnVariant::Subcolumn subcolumn = {0, true, true};
+        subcolumn.deserialize_from_sparse_column(&value, 14);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_ARRAY);
+        EXPECT_EQ(subcolumn.get_dimensions(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_base_type_id(), 
PrimitiveType::TYPE_JSONB);
+        auto v = subcolumn.get_last_field();
+        auto& arr = v.get<Array>();
+        EXPECT_EQ(arr.size(), 3);
+        EXPECT_FALSE(arr[0].is_null());
+        EXPECT_TRUE(arr[1].is_null());
+        EXPECT_FALSE(arr[2].is_null());
+        subcolumn.deserialize_from_sparse_column(&value, 14);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_ARRAY);
+        EXPECT_EQ(subcolumn.get_dimensions(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_base_type_id(), 
PrimitiveType::TYPE_JSONB);
+
+        v = subcolumn.get_last_field();
+        arr = v.get<Array>();
+        EXPECT_EQ(arr.size(), 3);
+        EXPECT_FALSE(arr[0].is_null());
+        EXPECT_TRUE(arr[1].is_null());
+        EXPECT_FALSE(arr[2].is_null());
+    }
+}
 } // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to