This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push:
new c15122c043d [fix](serialize) fix column serialize and deserialize
(#45667)
c15122c043d is described below
commit c15122c043defa4c417663bf34f5e48d98dc2f42
Author: Sun Chenyang <[email protected]>
AuthorDate: Fri Dec 20 01:01:55 2024 +0800
[fix](serialize) fix column serialize and deserialize (#45667)
---
be/src/vec/columns/column_object.cpp | 13 +++++++---
.../vec/data_types/serde/data_type_array_serde.cpp | 7 +++---
.../vec/data_types/serde/data_type_array_serde.h | 2 +-
.../vec/data_types/serde/data_type_jsonb_serde.cpp | 5 ++--
.../vec/data_types/serde/data_type_jsonb_serde.h | 2 +-
.../data_types/serde/data_type_nullable_serde.cpp | 29 +++++++++++-----------
.../data_types/serde/data_type_nullable_serde.h | 4 +--
.../data_types/serde/data_type_number_serde.cpp | 4 +--
.../vec/data_types/serde/data_type_number_serde.h | 2 +-
be/src/vec/data_types/serde/data_type_serde.h | 2 +-
.../vec/data_types/serde/data_type_string_serde.h | 4 +--
11 files changed, 38 insertions(+), 36 deletions(-)
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index 91a0936673f..17c02172da8 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1053,8 +1053,10 @@ void
ColumnObject::Subcolumn::serialize_to_sparse_column(ColumnString* key, std:
auto& nullable_col = assert_cast<const ColumnNullable&>(*part);
// insert value
+ ColumnString::Chars& chars = value->get_chars();
nullable_serde->get_nested_serde()->write_one_cell_to_binary(
- nullable_col.get_nested_column(), value, row);
+ nullable_col.get_nested_column(), chars, row);
+ value->get_offsets().push_back(chars.size());
}
return;
}
@@ -1114,6 +1116,11 @@ const char* parse_binary_from_sparse_column(TypeIndex
type, const char* data, Fi
end = data + size;
break;
}
+ case TypeIndex::Nothing: {
+ res = Null();
+ end = data;
+ break;
+ }
case TypeIndex::Array: {
const size_t size = *reinterpret_cast<const size_t*>(data);
data += sizeof(size_t);
@@ -1123,9 +1130,9 @@ const char* parse_binary_from_sparse_column(TypeIndex
type, const char* data, Fi
for (size_t i = 0; i < size; ++i) {
Field nested_field;
const auto nested_type =
- assert_cast<const TypeIndex>(*reinterpret_cast<const
uint8_t*>(data++));
+ static_cast<const TypeIndex>(*reinterpret_cast<const
uint8_t*>(data++));
data = parse_binary_from_sparse_column(nested_type, data,
nested_field, info_res);
- array.emplace_back(std::move(nested_field));
+ array[i] = std::move(nested_field);
}
end = data;
break;
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index 57a43fbb381..2c906ce4c49 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -436,9 +436,9 @@ Status DataTypeArraySerDe::read_column_from_pb(IColumn&
column, const PValues& a
}
void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column,
- ColumnString* dst_column,
int64_t row_num) const {
+ ColumnString::Chars& chars,
+ int64_t row_num) const {
const uint8_t type = static_cast<uint8_t>(TypeIndex::Array);
- ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t);
chars.resize(new_size);
@@ -453,9 +453,8 @@ void DataTypeArraySerDe::write_one_cell_to_binary(const
IColumn& src_column,
memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const
char*>(&size),
sizeof(size_t));
for (size_t offset = start; offset != end; ++offset) {
- nested_serde->write_one_cell_to_binary(nested_column, dst_column,
offset);
+ nested_serde->write_one_cell_to_binary(nested_column, chars, offset);
}
- dst_column->get_offsets().push_back(chars.size());
}
} // namespace vectorized
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h
b/be/src/vec/data_types/serde/data_type_array_serde.h
index aaf1a425512..25da83f2cff 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.h
+++ b/be/src/vec/data_types/serde/data_type_array_serde.h
@@ -101,7 +101,7 @@ public:
nested_serde->set_return_object_as_string(value);
}
- void write_one_cell_to_binary(const IColumn& src_column, ColumnString*
dst_column,
+ void write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const override;
private:
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
index 7279a0fc4a6..69dbae7241c 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
@@ -279,13 +279,13 @@ Status DataTypeJsonbSerDe::read_column_from_pb(IColumn&
column, const PValues& a
}
void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column,
- ColumnString* dst_column,
int64_t row_num) const {
+ ColumnString::Chars& chars,
+ int64_t row_num) const {
const uint8_t type = static_cast<uint8_t>(TypeIndex::JSONB);
const auto& col = assert_cast<const ColumnString&>(src_column);
const auto& data_ref = col.get_data_at(row_num);
size_t data_size = data_ref.size;
- ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) +
data_ref.size;
chars.resize(new_size);
@@ -294,7 +294,6 @@ void DataTypeJsonbSerDe::write_one_cell_to_binary(const
IColumn& src_column,
memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const
char*>(&data_size),
sizeof(size_t));
memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t),
data_ref.data, data_size);
- dst_column->get_offsets().push_back(new_size);
}
} // namespace vectorized
} // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
index d6d29cce556..95e510516ed 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
@@ -71,7 +71,7 @@ public:
int64_t end) const override;
Status read_column_from_pb(IColumn& column, const PValues& arg) const
override;
- void write_one_cell_to_binary(const IColumn& src_column, ColumnString*
dst_column,
+ void write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const override;
private:
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index b325ec88e9f..9193a3b0100 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -393,20 +393,21 @@ Status
DataTypeNullableSerDe::read_one_cell_from_json(IColumn& column,
return Status::OK();
}
-// void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn&
src_column,
-// ColumnString*
dst_column,
-// int64_t row_num) const
{
-// auto& col = assert_cast<const ColumnNullable&>(src_column);
-// uint8_t is_null = 0;
-// if (col.is_null_at(row_num)) [[unlikely]] {
-// is_null = 1;
-// dst_column->insert_data(reinterpret_cast<const char*>(is_null),
sizeof(uint8_t));
-// } else {
-// dst_column->insert_data(reinterpret_cast<const char*>(is_null),
sizeof(uint8_t));
-// auto& nested_col = col.get_nested_column();
-// nested_serde->write_one_cell_to_binary(nested_col, dst_column,
row_num);
-// }
-// }
+void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column,
+ ColumnString::Chars&
chars,
+ int64_t row_num) const {
+ auto& col = assert_cast<const ColumnNullable&>(src_column);
+ if (col.is_null_at(row_num)) [[unlikely]] {
+ const uint8_t type = static_cast<uint8_t>(TypeIndex::Nothing);
+ const size_t old_size = chars.size();
+ const size_t new_size = old_size + sizeof(uint8_t);
+ chars.resize(new_size);
+ memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type),
sizeof(uint8_t));
+ } else {
+ auto& nested_col = col.get_nested_column();
+ nested_serde->write_one_cell_to_binary(nested_col, chars, row_num);
+ }
+}
} // namespace vectorized
} // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h
b/be/src/vec/data_types/serde/data_type_nullable_serde.h
index 33cf86ab694..828c079244b 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.h
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h
@@ -99,8 +99,8 @@ public:
int64_t row_num) const override;
Status read_one_cell_from_json(IColumn& column, const rapidjson::Value&
result) const override;
- // void write_one_cell_to_binary(const IColumn& src_column,
ColumnString* dst_column,
- // int64_t row_num) const override;
+ void write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
+ int64_t row_num) const override;
DataTypeSerDeSPtr get_nested_serde() { return nested_serde; }
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index c5f2994f6b2..fcf72e6f992 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -395,19 +395,17 @@ Status DataTypeNumberSerDe<T>::write_column_to_orc(const
std::string& timezone,
template <typename T>
void DataTypeNumberSerDe<T>::write_one_cell_to_binary(const IColumn&
src_column,
- ColumnString* dst_column,
+ ColumnString::Chars&
chars,
int64_t row_num) const {
const uint8_t type = static_cast<uint8_t>(TypeId<T>::value);
const auto& data_ref = assert_cast<const
ColumnType&>(src_column).get_data_at(row_num);
- ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + data_ref.size;
chars.resize(new_size);
memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type),
sizeof(uint8_t));
memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data,
data_ref.size);
- dst_column->get_offsets().push_back(new_size);
}
/// Explicit template instantiations - to avoid code bloat in headers.
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h
b/be/src/vec/data_types/serde/data_type_number_serde.h
index c9073f5e868..db4373e646c 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -107,7 +107,7 @@ public:
int64_t row_num) const override;
Status read_one_cell_from_json(IColumn& column, const rapidjson::Value&
result) const override;
- void write_one_cell_to_binary(const IColumn& src_column, ColumnString*
dst_column,
+ void write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const override;
private:
diff --git a/be/src/vec/data_types/serde/data_type_serde.h
b/be/src/vec/data_types/serde/data_type_serde.h
index 38b2590b062..8a879b5df26 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -337,7 +337,7 @@ public:
Arena& mem_pool, int64_t row_num)
const;
virtual Status read_one_cell_from_json(IColumn& column, const
rapidjson::Value& result) const;
- virtual void write_one_cell_to_binary(const IColumn& src_column,
ColumnString* dst,
+ virtual void write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"write_one_cell_to_binary");
}
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h
b/be/src/vec/data_types/serde/data_type_string_serde.h
index 50acf28c6f2..39a623316a2 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -366,14 +366,13 @@ public:
return Status::OK();
}
- void write_one_cell_to_binary(const IColumn& src_column, ColumnString*
dst_column,
+ void write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const override {
const uint8_t type = static_cast<uint8_t>(TypeIndex::String);
const auto& col = assert_cast<const ColumnType&>(src_column);
const auto& data_ref = col.get_data_at(row_num);
const size_t data_size = data_ref.size;
- ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) +
data_ref.size;
chars.resize(new_size);
@@ -383,7 +382,6 @@ public:
sizeof(size_t));
memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t),
data_ref.data,
data_size);
- dst_column->get_offsets().push_back(chars.size());
}
private:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]