This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch revert-45667-seralize-variant
in repository https://gitbox.apache.org/repos/asf/doris.git

commit f47a7297142b4ca2d7b959bb696a73bb4f1089b1
Author: lihangyu <[email protected]>
AuthorDate: Fri Dec 20 01:21:23 2024 +0800

    Revert "[fix](serialize) fix column serialize and deserialize (#45667)"
    
    This reverts commit c15122c043defa4c417663bf34f5e48d98dc2f42.
---
 be/src/vec/columns/column_object.cpp               | 13 +++-------
 .../vec/data_types/serde/data_type_array_serde.cpp |  7 +++---
 .../vec/data_types/serde/data_type_array_serde.h   |  2 +-
 .../vec/data_types/serde/data_type_jsonb_serde.cpp |  5 ++--
 .../vec/data_types/serde/data_type_jsonb_serde.h   |  2 +-
 .../data_types/serde/data_type_nullable_serde.cpp  | 29 +++++++++++-----------
 .../data_types/serde/data_type_nullable_serde.h    |  4 +--
 .../data_types/serde/data_type_number_serde.cpp    |  4 ++-
 .../vec/data_types/serde/data_type_number_serde.h  |  2 +-
 be/src/vec/data_types/serde/data_type_serde.h      |  2 +-
 .../vec/data_types/serde/data_type_string_serde.h  |  4 ++-
 11 files changed, 36 insertions(+), 38 deletions(-)

diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 17c02172da8..91a0936673f 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1053,10 +1053,8 @@ void 
ColumnObject::Subcolumn::serialize_to_sparse_column(ColumnString* key, std:
                 auto& nullable_col = assert_cast<const ColumnNullable&>(*part);
 
                 // insert value
-                ColumnString::Chars& chars = value->get_chars();
                 nullable_serde->get_nested_serde()->write_one_cell_to_binary(
-                        nullable_col.get_nested_column(), chars, row);
-                value->get_offsets().push_back(chars.size());
+                        nullable_col.get_nested_column(), value, row);
             }
             return;
         }
@@ -1116,11 +1114,6 @@ const char* parse_binary_from_sparse_column(TypeIndex 
type, const char* data, Fi
         end = data + size;
         break;
     }
-    case TypeIndex::Nothing: {
-        res = Null();
-        end = data;
-        break;
-    }
     case TypeIndex::Array: {
         const size_t size = *reinterpret_cast<const size_t*>(data);
         data += sizeof(size_t);
@@ -1130,9 +1123,9 @@ const char* parse_binary_from_sparse_column(TypeIndex 
type, const char* data, Fi
         for (size_t i = 0; i < size; ++i) {
             Field nested_field;
             const auto nested_type =
-                    static_cast<const TypeIndex>(*reinterpret_cast<const 
uint8_t*>(data++));
+                    assert_cast<const TypeIndex>(*reinterpret_cast<const 
uint8_t*>(data++));
             data = parse_binary_from_sparse_column(nested_type, data, 
nested_field, info_res);
-            array[i] = std::move(nested_field);
+            array.emplace_back(std::move(nested_field));
         }
         end = data;
         break;
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp 
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index 2c906ce4c49..57a43fbb381 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -436,9 +436,9 @@ Status DataTypeArraySerDe::read_column_from_pb(IColumn& 
column, const PValues& a
 }
 
 void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column,
-                                                  ColumnString::Chars& chars,
-                                                  int64_t row_num) const {
+                                                  ColumnString* dst_column, 
int64_t row_num) const {
     const uint8_t type = static_cast<uint8_t>(TypeIndex::Array);
+    ColumnString::Chars& chars = dst_column->get_chars();
     const size_t old_size = chars.size();
     const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t);
     chars.resize(new_size);
@@ -453,8 +453,9 @@ void DataTypeArraySerDe::write_one_cell_to_binary(const 
IColumn& src_column,
     memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const 
char*>(&size),
            sizeof(size_t));
     for (size_t offset = start; offset != end; ++offset) {
-        nested_serde->write_one_cell_to_binary(nested_column, chars, offset);
+        nested_serde->write_one_cell_to_binary(nested_column, dst_column, 
offset);
     }
+    dst_column->get_offsets().push_back(chars.size());
 }
 
 } // namespace vectorized
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h 
b/be/src/vec/data_types/serde/data_type_array_serde.h
index 25da83f2cff..aaf1a425512 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.h
+++ b/be/src/vec/data_types/serde/data_type_array_serde.h
@@ -101,7 +101,7 @@ public:
         nested_serde->set_return_object_as_string(value);
     }
 
-    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
+    void write_one_cell_to_binary(const IColumn& src_column, ColumnString* 
dst_column,
                                   int64_t row_num) const override;
 
 private:
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp 
b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
index 69dbae7241c..7279a0fc4a6 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
@@ -279,13 +279,13 @@ Status DataTypeJsonbSerDe::read_column_from_pb(IColumn& 
column, const PValues& a
 }
 
 void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column,
-                                                  ColumnString::Chars& chars,
-                                                  int64_t row_num) const {
+                                                  ColumnString* dst_column, 
int64_t row_num) const {
     const uint8_t type = static_cast<uint8_t>(TypeIndex::JSONB);
     const auto& col = assert_cast<const ColumnString&>(src_column);
     const auto& data_ref = col.get_data_at(row_num);
     size_t data_size = data_ref.size;
 
+    ColumnString::Chars& chars = dst_column->get_chars();
     const size_t old_size = chars.size();
     const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + 
data_ref.size;
     chars.resize(new_size);
@@ -294,6 +294,7 @@ void DataTypeJsonbSerDe::write_one_cell_to_binary(const 
IColumn& src_column,
     memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const 
char*>(&data_size),
            sizeof(size_t));
     memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), 
data_ref.data, data_size);
+    dst_column->get_offsets().push_back(new_size);
 }
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h 
b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
index 95e510516ed..d6d29cce556 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
@@ -71,7 +71,7 @@ public:
                               int64_t end) const override;
     Status read_column_from_pb(IColumn& column, const PValues& arg) const 
override;
 
-    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
+    void write_one_cell_to_binary(const IColumn& src_column, ColumnString* 
dst_column,
                                   int64_t row_num) const override;
 
 private:
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp 
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index 9193a3b0100..b325ec88e9f 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -393,21 +393,20 @@ Status 
DataTypeNullableSerDe::read_one_cell_from_json(IColumn& column,
     return Status::OK();
 }
 
-void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column,
-                                                     ColumnString::Chars& 
chars,
-                                                     int64_t row_num) const {
-    auto& col = assert_cast<const ColumnNullable&>(src_column);
-    if (col.is_null_at(row_num)) [[unlikely]] {
-        const uint8_t type = static_cast<uint8_t>(TypeIndex::Nothing);
-        const size_t old_size = chars.size();
-        const size_t new_size = old_size + sizeof(uint8_t);
-        chars.resize(new_size);
-        memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), 
sizeof(uint8_t));
-    } else {
-        auto& nested_col = col.get_nested_column();
-        nested_serde->write_one_cell_to_binary(nested_col, chars, row_num);
-    }
-}
+// void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& 
src_column,
+//                                                      ColumnString* 
dst_column,
+//                                                      int64_t row_num) const 
{
+//     auto& col = assert_cast<const ColumnNullable&>(src_column);
+//     uint8_t is_null = 0;
+//     if (col.is_null_at(row_num)) [[unlikely]] {
+//         is_null = 1;
+//         dst_column->insert_data(reinterpret_cast<const char*>(is_null), 
sizeof(uint8_t));
+//     } else {
+//         dst_column->insert_data(reinterpret_cast<const char*>(is_null), 
sizeof(uint8_t));
+//         auto& nested_col = col.get_nested_column();
+//         nested_serde->write_one_cell_to_binary(nested_col, dst_column, 
row_num);
+//     }
+// }
 
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h 
b/be/src/vec/data_types/serde/data_type_nullable_serde.h
index 828c079244b..33cf86ab694 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.h
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h
@@ -99,8 +99,8 @@ public:
                                   int64_t row_num) const override;
     Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& 
result) const override;
 
-    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
-                                  int64_t row_num) const override;
+    //     void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString* dst_column,
+    //                                   int64_t row_num) const override;
 
     DataTypeSerDeSPtr get_nested_serde() { return nested_serde; }
 
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index fcf72e6f992..c5f2994f6b2 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -395,17 +395,19 @@ Status DataTypeNumberSerDe<T>::write_column_to_orc(const 
std::string& timezone,
 
 template <typename T>
 void DataTypeNumberSerDe<T>::write_one_cell_to_binary(const IColumn& 
src_column,
-                                                      ColumnString::Chars& 
chars,
+                                                      ColumnString* dst_column,
                                                       int64_t row_num) const {
     const uint8_t type = static_cast<uint8_t>(TypeId<T>::value);
     const auto& data_ref = assert_cast<const 
ColumnType&>(src_column).get_data_at(row_num);
 
+    ColumnString::Chars& chars = dst_column->get_chars();
     const size_t old_size = chars.size();
     const size_t new_size = old_size + sizeof(uint8_t) + data_ref.size;
     chars.resize(new_size);
 
     memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), 
sizeof(uint8_t));
     memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data, 
data_ref.size);
+    dst_column->get_offsets().push_back(new_size);
 }
 
 /// Explicit template instantiations - to avoid code bloat in headers.
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h 
b/be/src/vec/data_types/serde/data_type_number_serde.h
index db4373e646c..c9073f5e868 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -107,7 +107,7 @@ public:
                                   int64_t row_num) const override;
     Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& 
result) const override;
 
-    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
+    void write_one_cell_to_binary(const IColumn& src_column, ColumnString* 
dst_column,
                                   int64_t row_num) const override;
 
 private:
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index 8a879b5df26..38b2590b062 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -337,7 +337,7 @@ public:
                                           Arena& mem_pool, int64_t row_num) 
const;
     virtual Status read_one_cell_from_json(IColumn& column, const 
rapidjson::Value& result) const;
 
-    virtual void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
+    virtual void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString* dst,
                                           int64_t row_num) const {
         throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, 
"write_one_cell_to_binary");
     }
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h 
b/be/src/vec/data_types/serde/data_type_string_serde.h
index 39a623316a2..50acf28c6f2 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -366,13 +366,14 @@ public:
         return Status::OK();
     }
 
-    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
+    void write_one_cell_to_binary(const IColumn& src_column, ColumnString* 
dst_column,
                                   int64_t row_num) const override {
         const uint8_t type = static_cast<uint8_t>(TypeIndex::String);
         const auto& col = assert_cast<const ColumnType&>(src_column);
         const auto& data_ref = col.get_data_at(row_num);
         const size_t data_size = data_ref.size;
 
+        ColumnString::Chars& chars = dst_column->get_chars();
         const size_t old_size = chars.size();
         const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + 
data_ref.size;
         chars.resize(new_size);
@@ -382,6 +383,7 @@ public:
                sizeof(size_t));
         memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), 
data_ref.data,
                data_size);
+        dst_column->get_offsets().push_back(chars.size());
     }
 
 private:


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to