This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new c15122c043d [fix](serialize) fix column serialize and deserialize 
(#45667)
c15122c043d is described below

commit c15122c043defa4c417663bf34f5e48d98dc2f42
Author: Sun Chenyang <[email protected]>
AuthorDate: Fri Dec 20 01:01:55 2024 +0800

    [fix](serialize) fix column serialize and deserialize (#45667)
---
 be/src/vec/columns/column_object.cpp               | 13 +++++++---
 .../vec/data_types/serde/data_type_array_serde.cpp |  7 +++---
 .../vec/data_types/serde/data_type_array_serde.h   |  2 +-
 .../vec/data_types/serde/data_type_jsonb_serde.cpp |  5 ++--
 .../vec/data_types/serde/data_type_jsonb_serde.h   |  2 +-
 .../data_types/serde/data_type_nullable_serde.cpp  | 29 +++++++++++-----------
 .../data_types/serde/data_type_nullable_serde.h    |  4 +--
 .../data_types/serde/data_type_number_serde.cpp    |  4 +--
 .../vec/data_types/serde/data_type_number_serde.h  |  2 +-
 be/src/vec/data_types/serde/data_type_serde.h      |  2 +-
 .../vec/data_types/serde/data_type_string_serde.h  |  4 +--
 11 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 91a0936673f..17c02172da8 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1053,8 +1053,10 @@ void 
ColumnObject::Subcolumn::serialize_to_sparse_column(ColumnString* key, std:
                 auto& nullable_col = assert_cast<const ColumnNullable&>(*part);
 
                 // insert value
+                ColumnString::Chars& chars = value->get_chars();
                 nullable_serde->get_nested_serde()->write_one_cell_to_binary(
-                        nullable_col.get_nested_column(), value, row);
+                        nullable_col.get_nested_column(), chars, row);
+                value->get_offsets().push_back(chars.size());
             }
             return;
         }
@@ -1114,6 +1116,11 @@ const char* parse_binary_from_sparse_column(TypeIndex 
type, const char* data, Fi
         end = data + size;
         break;
     }
+    case TypeIndex::Nothing: {
+        res = Null();
+        end = data;
+        break;
+    }
     case TypeIndex::Array: {
         const size_t size = *reinterpret_cast<const size_t*>(data);
         data += sizeof(size_t);
@@ -1123,9 +1130,9 @@ const char* parse_binary_from_sparse_column(TypeIndex 
type, const char* data, Fi
         for (size_t i = 0; i < size; ++i) {
             Field nested_field;
             const auto nested_type =
-                    assert_cast<const TypeIndex>(*reinterpret_cast<const 
uint8_t*>(data++));
+                    static_cast<const TypeIndex>(*reinterpret_cast<const 
uint8_t*>(data++));
             data = parse_binary_from_sparse_column(nested_type, data, 
nested_field, info_res);
-            array.emplace_back(std::move(nested_field));
+            array[i] = std::move(nested_field);
         }
         end = data;
         break;
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp 
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index 57a43fbb381..2c906ce4c49 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -436,9 +436,9 @@ Status DataTypeArraySerDe::read_column_from_pb(IColumn& 
column, const PValues& a
 }
 
 void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column,
-                                                  ColumnString* dst_column, 
int64_t row_num) const {
+                                                  ColumnString::Chars& chars,
+                                                  int64_t row_num) const {
     const uint8_t type = static_cast<uint8_t>(TypeIndex::Array);
-    ColumnString::Chars& chars = dst_column->get_chars();
     const size_t old_size = chars.size();
     const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t);
     chars.resize(new_size);
@@ -453,9 +453,8 @@ void DataTypeArraySerDe::write_one_cell_to_binary(const 
IColumn& src_column,
     memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const 
char*>(&size),
            sizeof(size_t));
     for (size_t offset = start; offset != end; ++offset) {
-        nested_serde->write_one_cell_to_binary(nested_column, dst_column, 
offset);
+        nested_serde->write_one_cell_to_binary(nested_column, chars, offset);
     }
-    dst_column->get_offsets().push_back(chars.size());
 }
 
 } // namespace vectorized
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h 
b/be/src/vec/data_types/serde/data_type_array_serde.h
index aaf1a425512..25da83f2cff 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.h
+++ b/be/src/vec/data_types/serde/data_type_array_serde.h
@@ -101,7 +101,7 @@ public:
         nested_serde->set_return_object_as_string(value);
     }
 
-    void write_one_cell_to_binary(const IColumn& src_column, ColumnString* 
dst_column,
+    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
                                   int64_t row_num) const override;
 
 private:
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp 
b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
index 7279a0fc4a6..69dbae7241c 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
@@ -279,13 +279,13 @@ Status DataTypeJsonbSerDe::read_column_from_pb(IColumn& 
column, const PValues& a
 }
 
 void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column,
-                                                  ColumnString* dst_column, 
int64_t row_num) const {
+                                                  ColumnString::Chars& chars,
+                                                  int64_t row_num) const {
     const uint8_t type = static_cast<uint8_t>(TypeIndex::JSONB);
     const auto& col = assert_cast<const ColumnString&>(src_column);
     const auto& data_ref = col.get_data_at(row_num);
     size_t data_size = data_ref.size;
 
-    ColumnString::Chars& chars = dst_column->get_chars();
     const size_t old_size = chars.size();
     const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + 
data_ref.size;
     chars.resize(new_size);
@@ -294,7 +294,6 @@ void DataTypeJsonbSerDe::write_one_cell_to_binary(const 
IColumn& src_column,
     memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const 
char*>(&data_size),
            sizeof(size_t));
     memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), 
data_ref.data, data_size);
-    dst_column->get_offsets().push_back(new_size);
 }
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h 
b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
index d6d29cce556..95e510516ed 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
@@ -71,7 +71,7 @@ public:
                               int64_t end) const override;
     Status read_column_from_pb(IColumn& column, const PValues& arg) const 
override;
 
-    void write_one_cell_to_binary(const IColumn& src_column, ColumnString* 
dst_column,
+    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
                                   int64_t row_num) const override;
 
 private:
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp 
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index b325ec88e9f..9193a3b0100 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -393,20 +393,21 @@ Status 
DataTypeNullableSerDe::read_one_cell_from_json(IColumn& column,
     return Status::OK();
 }
 
-// void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& 
src_column,
-//                                                      ColumnString* 
dst_column,
-//                                                      int64_t row_num) const 
{
-//     auto& col = assert_cast<const ColumnNullable&>(src_column);
-//     uint8_t is_null = 0;
-//     if (col.is_null_at(row_num)) [[unlikely]] {
-//         is_null = 1;
-//         dst_column->insert_data(reinterpret_cast<const char*>(is_null), 
sizeof(uint8_t));
-//     } else {
-//         dst_column->insert_data(reinterpret_cast<const char*>(is_null), 
sizeof(uint8_t));
-//         auto& nested_col = col.get_nested_column();
-//         nested_serde->write_one_cell_to_binary(nested_col, dst_column, 
row_num);
-//     }
-// }
+void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column,
+                                                     ColumnString::Chars& 
chars,
+                                                     int64_t row_num) const {
+    auto& col = assert_cast<const ColumnNullable&>(src_column);
+    if (col.is_null_at(row_num)) [[unlikely]] {
+        const uint8_t type = static_cast<uint8_t>(TypeIndex::Nothing);
+        const size_t old_size = chars.size();
+        const size_t new_size = old_size + sizeof(uint8_t);
+        chars.resize(new_size);
+        memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), 
sizeof(uint8_t));
+    } else {
+        auto& nested_col = col.get_nested_column();
+        nested_serde->write_one_cell_to_binary(nested_col, chars, row_num);
+    }
+}
 
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h 
b/be/src/vec/data_types/serde/data_type_nullable_serde.h
index 33cf86ab694..828c079244b 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.h
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h
@@ -99,8 +99,8 @@ public:
                                   int64_t row_num) const override;
     Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& 
result) const override;
 
-    //     void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString* dst_column,
-    //                                   int64_t row_num) const override;
+    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
+                                  int64_t row_num) const override;
 
     DataTypeSerDeSPtr get_nested_serde() { return nested_serde; }
 
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index c5f2994f6b2..fcf72e6f992 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -395,19 +395,17 @@ Status DataTypeNumberSerDe<T>::write_column_to_orc(const 
std::string& timezone,
 
 template <typename T>
 void DataTypeNumberSerDe<T>::write_one_cell_to_binary(const IColumn& 
src_column,
-                                                      ColumnString* dst_column,
+                                                      ColumnString::Chars& 
chars,
                                                       int64_t row_num) const {
     const uint8_t type = static_cast<uint8_t>(TypeId<T>::value);
     const auto& data_ref = assert_cast<const 
ColumnType&>(src_column).get_data_at(row_num);
 
-    ColumnString::Chars& chars = dst_column->get_chars();
     const size_t old_size = chars.size();
     const size_t new_size = old_size + sizeof(uint8_t) + data_ref.size;
     chars.resize(new_size);
 
     memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), 
sizeof(uint8_t));
     memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data, 
data_ref.size);
-    dst_column->get_offsets().push_back(new_size);
 }
 
 /// Explicit template instantiations - to avoid code bloat in headers.
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h 
b/be/src/vec/data_types/serde/data_type_number_serde.h
index c9073f5e868..db4373e646c 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -107,7 +107,7 @@ public:
                                   int64_t row_num) const override;
     Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& 
result) const override;
 
-    void write_one_cell_to_binary(const IColumn& src_column, ColumnString* 
dst_column,
+    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
                                   int64_t row_num) const override;
 
 private:
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index 38b2590b062..8a879b5df26 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -337,7 +337,7 @@ public:
                                           Arena& mem_pool, int64_t row_num) 
const;
     virtual Status read_one_cell_from_json(IColumn& column, const 
rapidjson::Value& result) const;
 
-    virtual void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString* dst,
+    virtual void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
                                           int64_t row_num) const {
         throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, 
"write_one_cell_to_binary");
     }
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h 
b/be/src/vec/data_types/serde/data_type_string_serde.h
index 50acf28c6f2..39a623316a2 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -366,14 +366,13 @@ public:
         return Status::OK();
     }
 
-    void write_one_cell_to_binary(const IColumn& src_column, ColumnString* 
dst_column,
+    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
                                   int64_t row_num) const override {
         const uint8_t type = static_cast<uint8_t>(TypeIndex::String);
         const auto& col = assert_cast<const ColumnType&>(src_column);
         const auto& data_ref = col.get_data_at(row_num);
         const size_t data_size = data_ref.size;
 
-        ColumnString::Chars& chars = dst_column->get_chars();
         const size_t old_size = chars.size();
         const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + 
data_ref.size;
         chars.resize(new_size);
@@ -383,7 +382,6 @@ public:
                sizeof(size_t));
         memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), 
data_ref.data,
                data_size);
-        dst_column->get_offsets().push_back(chars.size());
     }
 
 private:


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to