This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8a6f0f91c3f [opt](variant) deserialize sparse binary data to variant
subcolumn (#56977)
8a6f0f91c3f is described below
commit 8a6f0f91c3f2005f979ef7f5ff872c83dabe275a
Author: Sun Chenyang <[email protected]>
AuthorDate: Mon Oct 27 11:35:10 2025 +0800
[opt](variant) deserialize sparse binary data to variant subcolumn (#56977)
---
.../variant/hierarchical_data_iterator.cpp | 13 +-
be/src/olap/tablet_schema.cpp | 115 +++----
be/src/vec/columns/column_variant.cpp | 265 +++------------
be/src/vec/columns/column_variant.h | 21 +-
.../vec/data_types/serde/data_type_array_serde.cpp | 40 +++
.../vec/data_types/serde/data_type_array_serde.h | 5 +
.../data_types/serde/data_type_decimal_serde.cpp | 62 ++++
.../vec/data_types/serde/data_type_decimal_serde.h | 5 +
.../vec/data_types/serde/data_type_jsonb_serde.cpp | 20 ++
.../vec/data_types/serde/data_type_jsonb_serde.h | 5 +
.../data_types/serde/data_type_number_serde.cpp | 111 ++++++
.../vec/data_types/serde/data_type_number_serde.h | 6 +
be/src/vec/data_types/serde/data_type_serde.cpp | 128 +++++++
be/src/vec/data_types/serde/data_type_serde.h | 37 ++
.../vec/data_types/serde/data_type_string_serde.h | 19 ++
be/src/vec/functions/function_variant_element.cpp | 5 +-
.../deserialize_from_sparse_column_test.bin | Bin 0 -> 410 bytes
be/test/vec/columns/column_variant_test.cpp | 114 -------
.../vec/data_types/serde/data_type_serde_test.cpp | 376 +++++++++++++++++++++
19 files changed, 914 insertions(+), 433 deletions(-)
diff --git
a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
index b0af87641f6..b44aa627ff2 100644
--- a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
@@ -381,18 +381,16 @@ Status HierarchicalDataIterator::_process_sparse_column(
// Case 1: subcolumn already created, append this
row's value into it.
if (auto it =
subcolumns_from_sparse_column.find(sub_path);
it != subcolumns_from_sparse_column.end()) {
- const auto& data =
ColumnVariant::deserialize_from_sparse_column(
- &src_sparse_data_values,
lower_bound_index);
- it->second.insert(data.first, data.second);
+
it->second.deserialize_from_sparse_column(&src_sparse_data_values,
+
lower_bound_index);
}
// Case 2: subcolumn not created yet and we still have
quota → create it and insert.
else if (subcolumns_from_sparse_column.size() < count)
{
// Initialize subcolumn with current logical row
index i to align sizes.
ColumnVariant::Subcolumn subcolumn(/*size*/ i,
/*is_nullable*/ true,
false);
- const auto& data =
ColumnVariant::deserialize_from_sparse_column(
- &src_sparse_data_values,
lower_bound_index);
- subcolumn.insert(data.first, data.second);
+
subcolumn.deserialize_from_sparse_column(&src_sparse_data_values,
+
lower_bound_index);
subcolumns_from_sparse_column.emplace(sub_path,
std::move(subcolumn));
}
// Case 3: quota exhausted → keep the key/value in
container's sparse column.
@@ -416,9 +414,8 @@ Status HierarchicalDataIterator::_process_sparse_column(
// return Status::InternalError("Failed to add
subcolumn for sparse column");
// }
}
- const auto& data =
ColumnVariant::deserialize_from_sparse_column(
+
container_variant.get_subcolumn({})->deserialize_from_sparse_column(
&src_sparse_data_values, lower_bound_index);
-
container_variant.get_subcolumn({})->insert(data.first, data.second);
}
}
// if root was created, and not seen in sparse data, insert
default
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index e86ab940f56..bbeb0312dde 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -135,76 +135,51 @@ FieldType
TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) {
}
PrimitiveType TabletColumn::get_primitive_type_by_field_type(FieldType type) {
- switch (type) {
- case FieldType::OLAP_FIELD_TYPE_UNKNOWN:
- return PrimitiveType::INVALID_TYPE;
- case FieldType::OLAP_FIELD_TYPE_NONE:
- return PrimitiveType::TYPE_NULL;
- case FieldType::OLAP_FIELD_TYPE_BOOL:
- return PrimitiveType::TYPE_BOOLEAN;
- case FieldType::OLAP_FIELD_TYPE_TINYINT:
- return PrimitiveType::TYPE_TINYINT;
- case FieldType::OLAP_FIELD_TYPE_SMALLINT:
- return PrimitiveType::TYPE_SMALLINT;
- case FieldType::OLAP_FIELD_TYPE_INT:
- return PrimitiveType::TYPE_INT;
- case FieldType::OLAP_FIELD_TYPE_BIGINT:
- return PrimitiveType::TYPE_BIGINT;
- case FieldType::OLAP_FIELD_TYPE_LARGEINT:
- return PrimitiveType::TYPE_LARGEINT;
- case FieldType::OLAP_FIELD_TYPE_FLOAT:
- return PrimitiveType::TYPE_FLOAT;
- case FieldType::OLAP_FIELD_TYPE_DOUBLE:
- return PrimitiveType::TYPE_DOUBLE;
- case FieldType::OLAP_FIELD_TYPE_VARCHAR:
- return PrimitiveType::TYPE_VARCHAR;
- case FieldType::OLAP_FIELD_TYPE_STRING:
- return PrimitiveType::TYPE_STRING;
- case FieldType::OLAP_FIELD_TYPE_DATE:
- return PrimitiveType::TYPE_DATE;
- case FieldType::OLAP_FIELD_TYPE_DATETIME:
- return PrimitiveType::TYPE_DATETIME;
- case FieldType::OLAP_FIELD_TYPE_CHAR:
- return PrimitiveType::TYPE_CHAR;
- case FieldType::OLAP_FIELD_TYPE_STRUCT:
- return PrimitiveType::TYPE_STRUCT;
- case FieldType::OLAP_FIELD_TYPE_ARRAY:
- return PrimitiveType::TYPE_ARRAY;
- case FieldType::OLAP_FIELD_TYPE_MAP:
- return PrimitiveType::TYPE_MAP;
- case FieldType::OLAP_FIELD_TYPE_HLL:
- return PrimitiveType::TYPE_HLL;
- case FieldType::OLAP_FIELD_TYPE_BITMAP:
- return PrimitiveType::TYPE_BITMAP;
- case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE:
- return PrimitiveType::TYPE_QUANTILE_STATE;
- case FieldType::OLAP_FIELD_TYPE_DATEV2:
- return PrimitiveType::TYPE_DATEV2;
- case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
- return PrimitiveType::TYPE_DATETIMEV2;
- case FieldType::OLAP_FIELD_TYPE_TIMEV2:
- return PrimitiveType::TYPE_TIMEV2;
- case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
- return PrimitiveType::TYPE_DECIMAL32;
- case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
- return PrimitiveType::TYPE_DECIMAL64;
- case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
- return PrimitiveType::TYPE_DECIMAL128I;
- case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
- return PrimitiveType::TYPE_DECIMAL256;
- case FieldType::OLAP_FIELD_TYPE_IPV4:
- return PrimitiveType::TYPE_IPV4;
- case FieldType::OLAP_FIELD_TYPE_IPV6:
- return PrimitiveType::TYPE_IPV6;
- case FieldType::OLAP_FIELD_TYPE_JSONB:
- return PrimitiveType::TYPE_JSONB;
- case FieldType::OLAP_FIELD_TYPE_VARIANT:
- return PrimitiveType::TYPE_VARIANT;
- case FieldType::OLAP_FIELD_TYPE_AGG_STATE:
- return PrimitiveType::TYPE_AGG_STATE;
- default:
- return PrimitiveType::INVALID_TYPE;
- }
+ static const PrimitiveType mapping[] = {
+ /* 0 */ PrimitiveType::INVALID_TYPE,
+ /* 1 OLAP_FIELD_TYPE_TINYINT */
PrimitiveType::TYPE_TINYINT,
+ /* 2 OLAP_FIELD_TYPE_UNSIGNED_TINYINT */
PrimitiveType::INVALID_TYPE,
+ /* 3 OLAP_FIELD_TYPE_SMALLINT */
PrimitiveType::TYPE_SMALLINT,
+ /* 4 OLAP_FIELD_TYPE_UNSIGNED_SMALLINT */
PrimitiveType::INVALID_TYPE,
+ /* 5 OLAP_FIELD_TYPE_INT */ PrimitiveType::TYPE_INT,
+ /* 6 OLAP_FIELD_TYPE_UNSIGNED_INT */
PrimitiveType::INVALID_TYPE,
+ /* 7 OLAP_FIELD_TYPE_BIGINT */
PrimitiveType::TYPE_BIGINT,
+ /* 8 OLAP_FIELD_TYPE_UNSIGNED_BIGINT */
PrimitiveType::INVALID_TYPE,
+ /* 9 OLAP_FIELD_TYPE_LARGEINT */
PrimitiveType::TYPE_LARGEINT,
+ /* 10 OLAP_FIELD_TYPE_FLOAT */
PrimitiveType::TYPE_FLOAT,
+ /* 11 OLAP_FIELD_TYPE_DOUBLE */
PrimitiveType::TYPE_DOUBLE,
+ /* 12 OLAP_FIELD_TYPE_DISCRETE_DOUBLE */
PrimitiveType::INVALID_TYPE,
+ /* 13 OLAP_FIELD_TYPE_CHAR */
PrimitiveType::TYPE_CHAR,
+ /* 14 OLAP_FIELD_TYPE_DATE */
PrimitiveType::TYPE_DATE,
+ /* 15 OLAP_FIELD_TYPE_DATETIME */
PrimitiveType::TYPE_DATETIME,
+ /* 16 OLAP_FIELD_TYPE_DECIMAL */
PrimitiveType::INVALID_TYPE,
+ /* 17 OLAP_FIELD_TYPE_VARCHAR */
PrimitiveType::TYPE_VARCHAR,
+ /* 18 OLAP_FIELD_TYPE_STRUCT */
PrimitiveType::TYPE_STRUCT,
+ /* 19 OLAP_FIELD_TYPE_ARRAY */
PrimitiveType::TYPE_ARRAY,
+ /* 20 OLAP_FIELD_TYPE_MAP */ PrimitiveType::TYPE_MAP,
+ /* 21 OLAP_FIELD_TYPE_UNKNOWN */
PrimitiveType::INVALID_TYPE,
+ /* 22 OLAP_FIELD_TYPE_NONE */
PrimitiveType::TYPE_NULL,
+ /* 23 OLAP_FIELD_TYPE_HLL */ PrimitiveType::TYPE_HLL,
+ /* 24 OLAP_FIELD_TYPE_BOOL */
PrimitiveType::TYPE_BOOLEAN,
+ /* 25 OLAP_FIELD_TYPE_BITMAP */
PrimitiveType::TYPE_BITMAP,
+ /* 26 OLAP_FIELD_TYPE_STRING */
PrimitiveType::TYPE_STRING,
+ /* 27 OLAP_FIELD_TYPE_QUANTILE_STATE */
PrimitiveType::TYPE_QUANTILE_STATE,
+ /* 28 OLAP_FIELD_TYPE_DATEV2 */
PrimitiveType::TYPE_DATEV2,
+ /* 29 OLAP_FIELD_TYPE_DATETIMEV2 */
PrimitiveType::TYPE_DATETIMEV2,
+ /* 30 OLAP_FIELD_TYPE_TIMEV2 */
PrimitiveType::TYPE_TIMEV2,
+ /* 31 OLAP_FIELD_TYPE_DECIMAL32 */
PrimitiveType::TYPE_DECIMAL32,
+ /* 32 OLAP_FIELD_TYPE_DECIMAL64 */
PrimitiveType::TYPE_DECIMAL64,
+ /* 33 OLAP_FIELD_TYPE_DECIMAL128I */
PrimitiveType::TYPE_DECIMAL128I,
+ /* 34 OLAP_FIELD_TYPE_JSONB */
PrimitiveType::TYPE_JSONB,
+ /* 35 OLAP_FIELD_TYPE_VARIANT */
PrimitiveType::TYPE_VARIANT,
+ /* 36 OLAP_FIELD_TYPE_AGG_STATE */
PrimitiveType::TYPE_AGG_STATE,
+ /* 37 OLAP_FIELD_TYPE_DECIMAL256 */
PrimitiveType::TYPE_DECIMAL256,
+ /* 38 OLAP_FIELD_TYPE_IPV4 */
PrimitiveType::TYPE_IPV4,
+ /* 39 OLAP_FIELD_TYPE_IPV6 */
PrimitiveType::TYPE_IPV6,
+ };
+
+ int idx = static_cast<int>(type);
+ return mapping[idx];
}
FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
diff --git a/be/src/vec/columns/column_variant.cpp
b/be/src/vec/columns/column_variant.cpp
index 6f7f43da9cd..a8af33605e0 100644
--- a/be/src/vec/columns/column_variant.cpp
+++ b/be/src/vec/columns/column_variant.cpp
@@ -851,217 +851,14 @@ void
ColumnVariant::Subcolumn::serialize_to_sparse_column(ColumnString* key, std
"Index ({}) for serialize to sparse column is out
of range", row);
}
-struct PackedUInt128 {
- // PackedInt128() : value(0) {}
- PackedUInt128() = default;
-
- PackedUInt128(const unsigned __int128& value_) { value = value_; }
- PackedUInt128& operator=(const unsigned __int128& value_) {
- value = value_;
- return *this;
- }
- PackedUInt128& operator=(const PackedUInt128& rhs) = default;
-
- uint128_t value;
-} __attribute__((packed));
-
-const NO_SANITIZE_UNDEFINED char* parse_binary_from_sparse_column(FieldType
type, const char* data,
- Field& res,
FieldInfo& info_res) {
- info_res.scalar_type_id =
TabletColumn::get_primitive_type_by_field_type(type);
- const char* end = data;
- switch (type) {
- case FieldType::OLAP_FIELD_TYPE_STRING: {
- size_t size = unaligned_load<size_t>(data);
- data += sizeof(size_t);
- res = Field::create_field<TYPE_STRING>(String(data, size));
- end = data + size;
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_TINYINT: {
- Int8 v = unaligned_load<Int8>(data);
- res = Field::create_field<TYPE_TINYINT>(v);
- end = data + sizeof(Int8);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_SMALLINT: {
- Int16 v = unaligned_load<Int16>(data);
- res = Field::create_field<TYPE_SMALLINT>(v);
- end = data + sizeof(Int16);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_INT: {
- Int32 v = unaligned_load<Int32>(data);
- res = Field::create_field<TYPE_INT>(v);
- end = data + sizeof(Int32);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_BIGINT: {
- Int64 v = unaligned_load<Int64>(data);
- res = Field::create_field<TYPE_BIGINT>(v);
- end = data + sizeof(Int64);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_LARGEINT: {
- PackedInt128 pack;
- memcpy(&pack, data, sizeof(PackedInt128));
- res = Field::create_field<TYPE_LARGEINT>(Int128(pack.value));
- end = data + sizeof(PackedInt128);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_FLOAT: {
- Float32 v = unaligned_load<Float32>(data);
- res = Field::create_field<TYPE_FLOAT>(v);
- end = data + sizeof(Float32);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DOUBLE: {
- Float64 v = unaligned_load<Float64>(data);
- res = Field::create_field<TYPE_DOUBLE>(v);
- end = data + sizeof(Float64);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_JSONB: {
- size_t size = unaligned_load<size_t>(data);
- data += sizeof(size_t);
- res = Field::create_field<TYPE_JSONB>(JsonbField(data, size));
- end = data + size;
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_ARRAY: {
- const size_t size = unaligned_load<size_t>(data);
- data += sizeof(size_t);
- res = Field::create_field<TYPE_ARRAY>(Array(size));
- auto& array = res.get<Array>();
- info_res.num_dimensions++;
- FieldType nested_filed_type = FieldType::OLAP_FIELD_TYPE_NONE;
- for (size_t i = 0; i < size; ++i) {
- Field nested_field;
- const auto nested_type =
- static_cast<FieldType>(*reinterpret_cast<const
uint8_t*>(data++));
- data = parse_binary_from_sparse_column(nested_type, data,
nested_field, info_res);
- array[i] = std::move(nested_field);
- if (nested_type != FieldType::OLAP_FIELD_TYPE_NONE) {
- nested_filed_type = nested_type;
- }
- }
- info_res.scalar_type_id =
TabletColumn::get_primitive_type_by_field_type(nested_filed_type);
- end = data;
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_IPV4: {
- IPv4 v = unaligned_load<IPv4>(data);
- res = Field::create_field<TYPE_IPV4>(v);
- end = data + sizeof(IPv4);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_IPV6: {
- PackedUInt128 pack;
- memcpy(&pack, data, sizeof(PackedUInt128));
- auto v = pack.value;
- res = Field::create_field<TYPE_IPV6>(v);
- end = data + sizeof(PackedUInt128);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DATEV2: {
- UInt32 v = unaligned_load<UInt32>(data);
- res = Field::create_field<TYPE_DATEV2>(v);
- end = data + sizeof(UInt32);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
- const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
- data += sizeof(uint8_t);
- UInt64 v = unaligned_load<UInt64>(data);
- res = Field::create_field<TYPE_DATETIMEV2>(v);
- info_res.precision = -1;
- info_res.scale = static_cast<int>(scale);
- end = data + sizeof(UInt64);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DECIMAL32: {
- const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
- data += sizeof(uint8_t);
- const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
- data += sizeof(uint8_t);
- Int32 v = unaligned_load<Int32>(data);
- res = Field::create_field<TYPE_DECIMAL32>(Decimal32(v));
- info_res.precision = static_cast<int>(precision);
- info_res.scale = static_cast<int>(scale);
- end = data + sizeof(Int32);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DECIMAL64: {
- const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
- data += sizeof(uint8_t);
- const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
- data += sizeof(uint8_t);
- Int64 v = unaligned_load<Int64>(data);
- res = Field::create_field<TYPE_DECIMAL64>(Decimal64(v));
- info_res.precision = static_cast<int>(precision);
- info_res.scale = static_cast<int>(scale);
- end = data + sizeof(Int64);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: {
- const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
- data += sizeof(uint8_t);
- const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
- data += sizeof(uint8_t);
- PackedInt128 pack;
- memcpy(&pack, data, sizeof(PackedInt128));
- res = Field::create_field<TYPE_DECIMAL128I>(Decimal128V3(pack.value));
- info_res.precision = static_cast<int>(precision);
- info_res.scale = static_cast<int>(scale);
- end = data + sizeof(PackedInt128);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DECIMAL256: {
- const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
- data += sizeof(uint8_t);
- const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
- data += sizeof(uint8_t);
- wide::Int256 v;
- memcpy(&v, data, sizeof(wide::Int256));
- res = Field::create_field<TYPE_DECIMAL256>(Decimal256(v));
- info_res.precision = static_cast<int>(precision);
- info_res.scale = static_cast<int>(scale);
- end = data + sizeof(wide::Int256);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_BOOL: {
- res = Field::create_field<TYPE_BOOLEAN>(*reinterpret_cast<const
uint8_t*>(data));
- end = data + sizeof(uint8_t);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_NONE: {
- res = Field();
- end = data;
- break;
- }
- default:
- throw doris::Exception(ErrorCode::OUT_OF_BOUND,
- "Type ({}) for deserialize_from_sparse_column
is invalid", type);
- }
- return end;
-}
-
std::pair<Field, FieldInfo>
ColumnVariant::deserialize_from_sparse_column(const ColumnString* value,
size_t row) {
const auto& data_ref = value->get_data_at(row);
- const char* data = data_ref.data;
- DCHECK(data_ref.size > 1);
- const FieldType type = static_cast<FieldType>(*reinterpret_cast<const
uint8_t*>(data++));
+ const auto* start_data = reinterpret_cast<const uint8_t*>(data_ref.data);
Field res;
- FieldInfo info_res = {
- .scalar_type_id =
TabletColumn::get_primitive_type_by_field_type(type),
- .have_nulls = false,
- .need_convert = false,
- .num_dimensions = 0,
- };
- const char* end = parse_binary_from_sparse_column(type, data, res,
info_res);
- DCHECK_EQ(end - data_ref.data, data_ref.size)
- << "FieldType: " << (int)type << " data_ref.size: " <<
data_ref.size << " end: " << end
- << " data: " << data;
+ FieldInfo info_res;
+ const uint8_t* end =
DataTypeSerDe::deserialize_binary_to_field(start_data, res, info_res);
+ CHECK_EQ(end - start_data, data_ref.size);
return {std::move(res), std::move(info_res)};
}
@@ -1296,9 +1093,7 @@ void
ColumnVariant::insert_from_sparse_column_and_fill_remaing_dense_column(
const PathInData column_path(src_sparse_path);
if (auto* subcolumn = get_subcolumn(column_path); subcolumn !=
nullptr) {
// Deserialize binary value into subcolumn from src serialized
sparse column data.
- const auto& data =
-
ColumnVariant::deserialize_from_sparse_column(src_sparse_column_values, i);
- subcolumn->insert(data.first, data.second);
+
subcolumn->deserialize_from_sparse_column(src_sparse_column_values, i);
} else {
// Before inserting this path into sparse column check if we
need to
// insert subcolumns from
sorted_src_subcolumn_for_sparse_column before.
@@ -1811,9 +1606,8 @@ void
ColumnVariant::serialize_one_row_to_json_format(int64_t row_num, BufferWrit
} else {
// To serialize value stored in shared data we should first
deserialize it from binary format.
Subcolumn tmp_subcolumn(0, true);
- const auto& data = ColumnVariant::deserialize_from_sparse_column(
- sparse_data_values, index_in_sparse_data_values++);
- tmp_subcolumn.insert(data.first, data.second);
+ tmp_subcolumn.deserialize_from_sparse_column(sparse_data_values,
+
index_in_sparse_data_values++);
DataTypeSerDe::FormatOptions options;
options.escape_char = '\\';
tmp_subcolumn.serialize_text_json(0, output, options);
@@ -2508,6 +2302,44 @@ size_t
ColumnVariant::find_path_lower_bound_in_sparse_data(StringRef path,
return it.index;
}
+void ColumnVariant::Subcolumn::deserialize_from_sparse_column(const
ColumnString* value,
+ size_t row) {
+ const auto& data_ref = value->get_data_at(row);
+ const auto* start_data = reinterpret_cast<const uint8_t*>(data_ref.data);
+ const PrimitiveType type =
+
TabletColumn::get_primitive_type_by_field_type(static_cast<FieldType>(*start_data));
+ auto check_end = [&](const uint8_t* end_ptr) {
+ DCHECK_EQ(end_ptr - reinterpret_cast<const uint8_t*>(data_ref.data),
data_ref.size);
+ };
+
+ // check if the type is same as least common type
+ // if the type is same as least common type, we can directly deserialize
to the subcolumn
+ // if not, we need to deserialize to the field first, then insert to the
subcolumn
+ bool same_as_least_common_type = type != least_common_type.get_type_id();
+
+ // array needs to check nested type is same as least common type's nested
type
+ if (!same_as_least_common_type && type == PrimitiveType::TYPE_ARRAY) {
+ const auto* nested_start_data = start_data + 1;
+ const PrimitiveType nested_type =
TabletColumn::get_primitive_type_by_field_type(
+ static_cast<FieldType>(*nested_start_data));
+ same_as_least_common_type = (nested_type !=
least_common_type.get_base_type_id());
+ }
+
+ if (same_as_least_common_type) {
+ Field res;
+ FieldInfo info;
+ const uint8_t* end_data =
DataTypeSerDe::deserialize_binary_to_field(start_data, res, info);
+ check_end(end_data);
+ insert(std::move(res), std::move(info));
+ } else {
+ CHECK(data.size() > 0);
+ const uint8_t* end_data =
+ DataTypeSerDe::deserialize_binary_to_column(start_data,
*data.back());
+ check_end(end_data);
+ ++num_rows;
+ }
+}
+
void ColumnVariant::fill_path_column_from_sparse_data(Subcolumn& subcolumn,
NullMap* null_map,
StringRef path,
const ColumnPtr&
sparse_data_column,
@@ -2535,12 +2367,7 @@ void
ColumnVariant::fill_path_column_from_sparse_data(Subcolumn& subcolumn, Null
bool is_null = false;
if (lower_bound_path_index != paths_end &&
sparse_data_paths.get_data_at(lower_bound_path_index) == path) {
- // auto value_data =
sparse_data_values.get_data_at(lower_bound_path_index);
- // ReadBufferFromMemory buf(value_data.data, value_data.size);
- // dynamic_serialization->deserializeBinary(path_column, buf,
getFormatSettings());
- const auto& data = ColumnVariant::deserialize_from_sparse_column(
- &sparse_data_values, lower_bound_path_index);
- subcolumn.insert(data.first, data.second);
+ subcolumn.deserialize_from_sparse_column(&sparse_data_values,
lower_bound_path_index);
is_null = false;
} else {
subcolumn.insert_default();
diff --git a/be/src/vec/columns/column_variant.h
b/be/src/vec/columns/column_variant.h
index 74258fb0b15..9c38d5441a3 100644
--- a/be/src/vec/columns/column_variant.h
+++ b/be/src/vec/columns/column_variant.h
@@ -69,25 +69,6 @@ namespace doris::vectorized {
#define ENABLE_CHECK_CONSISTENCY(this) (this)->check_consistency()
#endif
-/// Info that represents a scalar or array field in a decomposed view.
-/// It allows to recreate field with different number
-/// of dimensions or nullability.
-struct FieldInfo {
- /// The common type id of of all scalars in field.
- PrimitiveType scalar_type_id = PrimitiveType::INVALID_TYPE;
- /// Do we have NULL scalar in field.
- bool have_nulls = false;
- /// If true then we have scalars with different types in array and
- /// we need to convert scalars to the common type.
- bool need_convert = false;
- /// Number of dimension in array. 0 if field is scalar.
- size_t num_dimensions = 0;
-
- // decimal info
- int scale = 0;
- int precision = 0;
-};
-
/** A column that represents object with dynamic set of subcolumns.
* Subcolumns are identified by paths in document and are stored in
* a trie-like structure. ColumnVariant is not suitable for writing into
tables
@@ -195,6 +176,8 @@ public:
/// Returns last inserted field.
Field get_last_field() const;
+ void deserialize_from_sparse_column(const ColumnString* value, size_t
row);
+
/// Returns single column if subcolumn in finalizes.
/// Otherwise -- undefined behaviour.
IColumn& get_finalized_column();
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index 267e0a3d2db..97571257504 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -527,6 +527,46 @@ void DataTypeArraySerDe::write_one_cell_to_binary(const
IColumn& src_column,
}
}
+const uint8_t* DataTypeArraySerDe::deserialize_binary_to_column(const uint8_t*
data,
+ IColumn&
column) {
+ auto& array_col = assert_cast<ColumnArray&,
TypeCheckOnRelease::DISABLE>(column);
+ auto& offsets = array_col.get_offsets();
+ auto& nested_column = array_col.get_data();
+ const size_t nested_size = unaligned_load<size_t>(data);
+ data += sizeof(size_t);
+ if (nested_size == 0) [[unlikely]] {
+ offsets.push_back(offsets.back());
+ return data;
+ }
+
+ for (size_t i = 0; i < nested_size; ++i) {
+ const uint8_t* new_data =
DataTypeSerDe::deserialize_binary_to_column(data, nested_column);
+ data = new_data;
+ }
+ offsets.push_back(offsets.back() + nested_size);
+ return data;
+}
+
+const uint8_t* DataTypeArraySerDe::deserialize_binary_to_field(const uint8_t*
data, Field& field,
+ FieldInfo&
info) {
+ const size_t nested_size = unaligned_load<size_t>(data);
+ data += sizeof(size_t);
+ field = Field::create_field<TYPE_ARRAY>(Array(nested_size));
+ info.num_dimensions++;
+ auto& array = field.get<Array>();
+ PrimitiveType nested_type = PrimitiveType::TYPE_NULL;
+ for (size_t i = 0; i < nested_size; ++i) {
+ Field nested_field;
+ data = DataTypeSerDe::deserialize_binary_to_field(data, nested_field,
info);
+ array[i] = std::move(nested_field);
+ if (info.scalar_type_id != PrimitiveType::TYPE_NULL) {
+ nested_type = info.scalar_type_id;
+ }
+ }
+ info.scalar_type_id = nested_type;
+ return data;
+}
+
void DataTypeArraySerDe::to_string(const IColumn& column, size_t row_num,
BufferWritable& bw) const {
const auto& data_column = assert_cast<const ColumnArray&>(column);
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h
b/be/src/vec/data_types/serde/data_type_array_serde.h
index b53e2da4e55..eeedf502b5f 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.h
+++ b/be/src/vec/data_types/serde/data_type_array_serde.h
@@ -117,6 +117,11 @@ public:
void write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const override;
+ static const uint8_t* deserialize_binary_to_column(const uint8_t* data,
IColumn& column);
+
+ static const uint8_t* deserialize_binary_to_field(const uint8_t* data,
Field& field,
+ FieldInfo& info);
+
void to_string(const IColumn& column, size_t row_num, BufferWritable& bw)
const override;
private:
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index 2014c2258b3..e9f3f5d5dba 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -654,6 +654,68 @@ void
DataTypeDecimalSerDe<T>::write_one_cell_to_binary(const IColumn& src_column
data_ref.data, data_ref.size);
}
+template <PrimitiveType T>
+const uint8_t* DataTypeDecimalSerDe<T>::deserialize_binary_to_column(const
uint8_t* data,
+ IColumn&
column) {
+ auto& col = assert_cast<ColumnDecimal<T>&,
TypeCheckOnRelease::DISABLE>(column);
+ data += sizeof(uint8_t);
+ data += sizeof(uint8_t);
+ if constexpr (T == TYPE_DECIMAL32) {
+ col.insert_value(unaligned_load<Int32>(data));
+ data += sizeof(Int32);
+ } else if constexpr (T == TYPE_DECIMAL64) {
+ col.insert_value(unaligned_load<Int64>(data));
+ data += sizeof(Int64);
+ } else if constexpr (T == TYPE_DECIMAL128I) {
+ col.insert_value(unaligned_load<Int128>(data));
+ data += sizeof(Int128);
+ } else if constexpr (T == TYPE_DECIMAL256) {
+ col.insert_value(Decimal256(unaligned_load<wide::Int256>(data)));
+ data += sizeof(wide::Int256);
+ } else {
+ throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+ "deserialize_binary_to_column with type " +
column.get_name());
+ }
+ return data;
+}
+
+template <PrimitiveType T>
+const uint8_t* DataTypeDecimalSerDe<T>::deserialize_binary_to_field(const
uint8_t* data,
+ Field&
field, FieldInfo& info) {
+ const uint8_t precision = *reinterpret_cast<const uint8_t*>(data);
+ data += sizeof(uint8_t);
+ const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
+ data += sizeof(uint8_t);
+ info.precision = static_cast<int>(precision);
+ info.scale = static_cast<int>(scale);
+ if constexpr (T == TYPE_DECIMAL32) {
+ Int32 v = unaligned_load<Int32>(data);
+ field = Field::create_field<TYPE_DECIMAL32>(Decimal32(v));
+ data += sizeof(Int32);
+ } else if constexpr (T == TYPE_DECIMAL64) {
+ Int64 v = unaligned_load<Int64>(data);
+ field = Field::create_field<TYPE_DECIMAL64>(Decimal64(v));
+ data += sizeof(Int64);
+ } else if constexpr (T == TYPE_DECIMAL128I) {
+ // Because __int128 in memory is not aligned, but GCC7 will generate
SSE instruction
+ // for __int128 load/store. This will cause segment fault.
+ PackedInt128 pack;
+ // use memcpy to avoid unaligned access
+ memcpy(&pack, data, sizeof(PackedInt128));
+ field =
Field::create_field<TYPE_DECIMAL128I>(Decimal128V3(pack.value));
+ data += sizeof(PackedInt128);
+ } else if constexpr (T == TYPE_DECIMAL256) {
+ wide::Int256 v;
+ memcpy(&v, data, sizeof(wide::Int256));
+ field = Field::create_field<TYPE_DECIMAL256>(Decimal256(v));
+ data += sizeof(wide::Int256);
+ } else {
+ throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+ "deserialize_binary_to_field with type " +
type_to_string(T));
+ }
+ return data;
+}
+
template class DataTypeDecimalSerDe<TYPE_DECIMAL32>;
template class DataTypeDecimalSerDe<TYPE_DECIMAL64>;
template class DataTypeDecimalSerDe<TYPE_DECIMAL128I>;
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h
b/be/src/vec/data_types/serde/data_type_decimal_serde.h
index 7e0cf461314..277a0309abd 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.h
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h
@@ -132,6 +132,11 @@ public:
void to_string_batch(const IColumn& column, ColumnString& column_to) const
override;
+ static const uint8_t* deserialize_binary_to_column(const uint8_t* data,
IColumn& column);
+
+ static const uint8_t* deserialize_binary_to_field(const uint8_t* data,
Field& field,
+ FieldInfo& info);
+
private:
template <bool is_binary_format>
Status _write_column_to_mysql(const IColumn& column,
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
index f29f5368b95..072611f362e 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
@@ -330,6 +330,26 @@ void DataTypeJsonbSerDe::write_one_cell_to_binary(const
IColumn& src_column,
memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t),
data_ref.data, data_size);
}
+const uint8_t* DataTypeJsonbSerDe::deserialize_binary_to_column(const uint8_t*
data,
+ IColumn&
column) {
+ auto& col = assert_cast<ColumnString&,
TypeCheckOnRelease::DISABLE>(column);
+ const size_t data_size = unaligned_load<size_t>(data);
+ data += sizeof(size_t);
+ col.insert_data(reinterpret_cast<const char*>(data), data_size);
+ data += data_size;
+ return data;
+}
+
+const uint8_t* DataTypeJsonbSerDe::deserialize_binary_to_field(const uint8_t*
data, Field& field,
+ FieldInfo&
info) {
+ const size_t data_size = unaligned_load<size_t>(data);
+ data += sizeof(size_t);
+ field = Field::create_field<TYPE_JSONB>(
+ JsonbField(reinterpret_cast<const char*>(data), data_size));
+ data += data_size;
+ return data;
+}
+
void DataTypeJsonbSerDe::to_string(const IColumn& column, size_t row_num,
BufferWritable& bw) const {
const auto& col = assert_cast<const ColumnString&,
TypeCheckOnRelease::DISABLE>(column);
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
index ce1ce81a437..1e0a1065b2c 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
@@ -80,6 +80,11 @@ public:
void write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const override;
+ static const uint8_t* deserialize_binary_to_column(const uint8_t* data,
IColumn& column);
+
+ static const uint8_t* deserialize_binary_to_field(const uint8_t* data,
Field& field,
+ FieldInfo& info);
+
void to_string(const IColumn& column, size_t row_num, BufferWritable& bw)
const override;
private:
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index 1a15a02c7b3..0201a3a2ac9 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -770,6 +770,117 @@ void
DataTypeNumberSerDe<T>::write_one_cell_to_binary(const IColumn& src_column,
memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data,
data_ref.size);
}
+template <PrimitiveType T>
+const uint8_t* DataTypeNumberSerDe<T>::deserialize_binary_to_column(const
uint8_t* data,
+ IColumn&
column) {
+ auto& col = assert_cast<ColumnType&, TypeCheckOnRelease::DISABLE>(column);
+ if constexpr (T == TYPE_BOOLEAN) {
+ col.insert_value(unaligned_load<UInt8>(data));
+ data += sizeof(UInt8);
+ } else if constexpr (T == TYPE_TINYINT) {
+ col.insert_value(unaligned_load<Int8>(data));
+ data += sizeof(Int8);
+ } else if constexpr (T == TYPE_SMALLINT) {
+ col.insert_value(unaligned_load<Int16>(data));
+ data += sizeof(Int16);
+ } else if constexpr (T == TYPE_INT) {
+ col.insert_value(unaligned_load<Int32>(data));
+ data += sizeof(Int32);
+ } else if constexpr (T == TYPE_BIGINT) {
+ col.insert_value(unaligned_load<Int64>(data));
+ data += sizeof(Int64);
+ } else if constexpr (T == TYPE_LARGEINT) {
+ col.insert_value(unaligned_load<Int128>(data));
+ data += sizeof(Int128);
+ } else if constexpr (T == TYPE_FLOAT) {
+ col.insert_value(unaligned_load<Float32>(data));
+ data += sizeof(Float32);
+ } else if constexpr (T == TYPE_DOUBLE) {
+ col.insert_value(unaligned_load<Float64>(data));
+ data += sizeof(Float64);
+ } else if constexpr (T == TYPE_IPV4) {
+ col.insert_value(unaligned_load<UInt32>(data));
+ data += sizeof(UInt32);
+ } else if constexpr (T == TYPE_IPV6) {
+ col.insert_value(unaligned_load<Int128>(data));
+ data += sizeof(Int128);
+ } else if constexpr (T == TYPE_DATEV2) {
+ col.insert_value(unaligned_load<UInt32>(data));
+ data += sizeof(UInt32);
+ } else if constexpr (T == TYPE_DATETIMEV2) {
+ data += sizeof(uint8_t);
+ col.insert_value(unaligned_load<UInt64>(data));
+ data += sizeof(UInt64);
+ } else {
+ throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+ "deserialize_binary_to_column with type '{}'",
type_to_string(T));
+ }
+ return data;
+}
+
+template <PrimitiveType T>
+const uint8_t* DataTypeNumberSerDe<T>::deserialize_binary_to_field(const
uint8_t* data,
+ Field&
field, FieldInfo& info) {
+ if constexpr (T == TYPE_BOOLEAN) {
+ field = Field::create_field<TYPE_BOOLEAN>(unaligned_load<UInt8>(data));
+ data += sizeof(UInt8);
+ } else if constexpr (T == TYPE_TINYINT) {
+ Int8 v = unaligned_load<Int8>(data);
+ field = Field::create_field<TYPE_TINYINT>(v);
+ data += sizeof(Int8);
+ } else if constexpr (T == TYPE_SMALLINT) {
+ Int16 v = unaligned_load<Int16>(data);
+ field = Field::create_field<TYPE_SMALLINT>(v);
+ data += sizeof(Int16);
+ } else if constexpr (T == TYPE_INT) {
+ Int32 v = unaligned_load<Int32>(data);
+ field = Field::create_field<TYPE_INT>(v);
+ data += sizeof(Int32);
+ } else if constexpr (T == TYPE_BIGINT) {
+ Int64 v = unaligned_load<Int64>(data);
+ field = Field::create_field<TYPE_BIGINT>(v);
+ data += sizeof(Int64);
+ } else if constexpr (T == TYPE_LARGEINT) {
+ PackedInt128 pack;
+ memcpy(&pack, data, sizeof(PackedInt128));
+ field = Field::create_field<TYPE_LARGEINT>(Int128(pack.value));
+ data += sizeof(PackedInt128);
+ } else if constexpr (T == TYPE_FLOAT) {
+ Float32 v = unaligned_load<Float32>(data);
+ field = Field::create_field<TYPE_FLOAT>(v);
+ data += sizeof(Float32);
+ } else if constexpr (T == TYPE_DOUBLE) {
+ Float64 v = unaligned_load<Float64>(data);
+ field = Field::create_field<TYPE_DOUBLE>(v);
+ data += sizeof(Float64);
+ } else if constexpr (T == TYPE_IPV4) {
+ IPv4 v = unaligned_load<IPv4>(data);
+ field = Field::create_field<TYPE_IPV4>(v);
+ data += sizeof(IPv4);
+ } else if constexpr (T == TYPE_IPV6) {
+ PackedUInt128 pack;
+ memcpy(&pack, data, sizeof(PackedUInt128));
+ auto v = pack.value;
+ field = Field::create_field<TYPE_IPV6>(v);
+ data += sizeof(PackedUInt128);
+ } else if constexpr (T == TYPE_DATEV2) {
+ UInt32 v = unaligned_load<UInt32>(data);
+ field = Field::create_field<TYPE_DATEV2>(v);
+ data += sizeof(UInt32);
+ } else if constexpr (T == TYPE_DATETIMEV2) {
+ const uint8_t scale = *reinterpret_cast<const uint8_t*>(data);
+ data += sizeof(uint8_t);
+ UInt64 v = unaligned_load<UInt64>(data);
+ info.precision = -1;
+ info.scale = static_cast<int>(scale);
+ field = Field::create_field<TYPE_DATETIMEV2>(v);
+ data += sizeof(UInt64);
+ } else {
+ throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+ "deserialize_binary_to_column with type '{}'",
type_to_string(T));
+ }
+ return data;
+}
template <PrimitiveType T>
void value_to_string(const typename PrimitiveTypeTraits<T>::ColumnItemType
value,
BufferWritable& bw, int scale) {
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h
b/be/src/vec/data_types/serde/data_type_number_serde.h
index e38b54a7bb5..783c7fc9bfc 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -129,12 +129,18 @@ public:
void write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const override;
+
void to_string(const IColumn& column, size_t row_num, BufferWritable& bw)
const override;
void to_string_batch(const IColumn& column, ColumnString& column_to) const
override;
// will override in DateTime and Time
virtual int get_scale() const { return 0; }
+ static const uint8_t* deserialize_binary_to_column(const uint8_t* data,
IColumn& column);
+
+ static const uint8_t* deserialize_binary_to_field(const uint8_t* data,
Field& field,
+ FieldInfo& info);
+
private:
template <bool is_binary_format>
Status _write_column_to_mysql(const IColumn& column,
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_serde.cpp
b/be/src/vec/data_types/serde/data_type_serde.cpp
index 683fdfbded8..0b1288895fa 100644
--- a/be/src/vec/data_types/serde/data_type_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_serde.cpp
@@ -25,7 +25,11 @@
#include "vec/columns/column.h"
#include "vec/core/field.h"
#include "vec/data_types/data_type.h"
+#include "vec/data_types/serde/data_type_array_serde.h"
+#include "vec/data_types/serde/data_type_decimal_serde.h"
#include "vec/data_types/serde/data_type_jsonb_serde.h"
+#include "vec/data_types/serde/data_type_number_serde.h"
+#include "vec/data_types/serde/data_type_string_serde.h"
#include "vec/functions/cast/cast_base.h"
namespace doris {
namespace vectorized {
@@ -134,5 +138,129 @@ void DataTypeSerDe::to_string(const IColumn& column,
size_t row_num, BufferWrita
const std::string DataTypeSerDe::NULL_IN_COMPLEX_TYPE = "null";
const std::string DataTypeSerDe::NULL_IN_CSV_FOR_ORDINARY_TYPE = "\\N";
+const uint8_t* DataTypeSerDe::deserialize_binary_to_column(const uint8_t*
data, IColumn& column) {
+ auto& nullable_column = assert_cast<ColumnNullable&,
TypeCheckOnRelease::DISABLE>(column);
+ const FieldType type = static_cast<FieldType>(*data++);
+ const uint8_t* end = data;
+ switch (type) {
+#define HANDLE_SIMPLE_SERDE(FT, SERDE)
\
+ case FieldType::FT: {
\
+ end = SERDE::deserialize_binary_to_column(data,
nullable_column.get_nested_column()); \
+ nullable_column.push_false_to_nullmap(1);
\
+ break;
\
+ }
+
+#define HANDLE_T_NUM_SERDE(FT, TYPEID) \
+ case FieldType::FT: { \
+ end = DataTypeNumberSerDe<TYPEID>::deserialize_binary_to_column( \
+ data, nullable_column.get_nested_column()); \
+ nullable_column.push_false_to_nullmap(1); \
+ break; \
+ }
+
+#define HANDLE_T_DEC_SERDE(FT, TYPEID) \
+ case FieldType::FT: { \
+ end = DataTypeDecimalSerDe<TYPEID>::deserialize_binary_to_column( \
+ data, nullable_column.get_nested_column()); \
+ nullable_column.push_false_to_nullmap(1); \
+ break; \
+ }
+
+ HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_STRING, DataTypeStringSerDe)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_TINYINT, TYPE_TINYINT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_SMALLINT, TYPE_SMALLINT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_INT, TYPE_INT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_BIGINT, TYPE_BIGINT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_LARGEINT, TYPE_LARGEINT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_FLOAT, TYPE_FLOAT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DOUBLE, TYPE_DOUBLE)
+ HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_JSONB, DataTypeJsonbSerDe)
+ HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_ARRAY, DataTypeArraySerDe)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DATEV2, TYPE_DATEV2)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DATETIMEV2, TYPE_DATETIMEV2)
+ HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL32, TYPE_DECIMAL32)
+ HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL64, TYPE_DECIMAL64)
+ HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL128I, TYPE_DECIMAL128I)
+ HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL256, TYPE_DECIMAL256)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_BOOL, TYPE_BOOLEAN)
+
+ case FieldType::OLAP_FIELD_TYPE_NONE: {
+ end = data;
+ nullable_column.insert_default();
+ break;
+ }
+ default:
+ throw doris::Exception(ErrorCode::OUT_OF_BOUND,
+ "Type ({}) for deserialize_binary_to_column is
invalid", type);
+ }
+
+#undef HANDLE_T_DEC_SERDE
+#undef HANDLE_T_NUM_SERDE
+#undef HANDLE_SIMPLE_SERDE
+
+ return end;
+}
+
+const uint8_t* DataTypeSerDe::deserialize_binary_to_field(const uint8_t* data,
Field& field,
+ FieldInfo& info) {
+ const FieldType type = static_cast<FieldType>(*data++);
+ info.scalar_type_id = TabletColumn::get_primitive_type_by_field_type(type);
+ const uint8_t* end = data;
+ switch (type) {
+#define HANDLE_SIMPLE_SERDE(FT, SERDE) \
+ case FieldType::FT: { \
+ end = SERDE::deserialize_binary_to_field(data, field, info); \
+ break; \
+ }
+
+#define HANDLE_T_NUM_SERDE(FT, TYPEID)
\
+ case FieldType::FT: {
\
+ end = DataTypeNumberSerDe<TYPEID>::deserialize_binary_to_field(data,
field, info); \
+ break;
\
+ }
+
+#define HANDLE_T_DEC_SERDE(FT, TYPEID)
\
+ case FieldType::FT: {
\
+ end = DataTypeDecimalSerDe<TYPEID>::deserialize_binary_to_field(data,
field, info); \
+ break;
\
+ }
+
+ HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_STRING, DataTypeStringSerDe)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_TINYINT, TYPE_TINYINT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_SMALLINT, TYPE_SMALLINT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_INT, TYPE_INT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_BIGINT, TYPE_BIGINT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_LARGEINT, TYPE_LARGEINT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_FLOAT, TYPE_FLOAT)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DOUBLE, TYPE_DOUBLE)
+ HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_JSONB, DataTypeJsonbSerDe)
+ HANDLE_SIMPLE_SERDE(OLAP_FIELD_TYPE_ARRAY, DataTypeArraySerDe)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DATEV2, TYPE_DATEV2)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_DATETIMEV2, TYPE_DATETIMEV2)
+ HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL32, TYPE_DECIMAL32)
+ HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL64, TYPE_DECIMAL64)
+ HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL128I, TYPE_DECIMAL128I)
+ HANDLE_T_DEC_SERDE(OLAP_FIELD_TYPE_DECIMAL256, TYPE_DECIMAL256)
+ HANDLE_T_NUM_SERDE(OLAP_FIELD_TYPE_BOOL, TYPE_BOOLEAN)
+
+ case FieldType::OLAP_FIELD_TYPE_NONE: {
+ end = data;
+ break;
+ }
+ default:
+ throw doris::Exception(ErrorCode::OUT_OF_BOUND,
+ "Type ({}) for deserialize_binary_to_field is
invalid", type);
+ }
+
+#undef HANDLE_T_DEC_SERDE
+#undef HANDLE_T_NUM_SERDE
+#undef HANDLE_SIMPLE_SERDE
+ return end;
+}
+
} // namespace vectorized
} // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_serde.h
b/be/src/vec/data_types/serde/data_type_serde.h
index a12357ba9f1..e4914acd739 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -94,6 +94,38 @@ class DataTypeSerDe;
using DataTypeSerDeSPtr = std::shared_ptr<DataTypeSerDe>;
using DataTypeSerDeSPtrs = std::vector<DataTypeSerDeSPtr>;
+/// Info that represents a scalar or array field in a decomposed view.
+/// It allows to recreate field with different number
+/// of dimensions or nullability.
+struct FieldInfo {
+ /// The common type id of of all scalars in field.
+ PrimitiveType scalar_type_id = PrimitiveType::INVALID_TYPE;
+ /// Do we have NULL scalar in field.
+ bool have_nulls = false;
+ /// If true then we have scalars with different types in array and
+ /// we need to convert scalars to the common type.
+ bool need_convert = false;
+ /// Number of dimension in array. 0 if field is scalar.
+ size_t num_dimensions = 0;
+
+ // decimal info
+ int scale = 0;
+ int precision = 0;
+};
+struct PackedUInt128 {
+ // PackedInt128() : value(0) {}
+ PackedUInt128() = default;
+
+ PackedUInt128(const unsigned __int128& value_) { value = value_; }
+ PackedUInt128& operator=(const unsigned __int128& value_) {
+ value = value_;
+ return *this;
+ }
+ PackedUInt128& operator=(const PackedUInt128& rhs) = default;
+
+ uint128_t value;
+} __attribute__((packed));
+
// Deserialize means read from different file format or memory format,
// for example read from arrow, read from parquet.
// Serialize means write the column cell or the total column into another
@@ -421,6 +453,11 @@ public:
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"write_one_cell_to_binary");
}
+ static const uint8_t* deserialize_binary_to_column(const uint8_t* data,
IColumn& column);
+
+ static const uint8_t* deserialize_binary_to_field(const uint8_t* data,
Field& field,
+ FieldInfo& info);
+
protected:
bool _return_object_as_string = false;
// This parameter indicates what level the serde belongs to and is mainly
used for complex types
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h
b/be/src/vec/data_types/serde/data_type_string_serde.h
index 879f2c1dce4..12bc12e6f12 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -233,6 +233,25 @@ public:
data_size);
}
+ static const uint8_t* deserialize_binary_to_column(const uint8_t* data,
IColumn& column) {
+ auto& col = assert_cast<ColumnString&,
TypeCheckOnRelease::DISABLE>(column);
+ const size_t data_size = unaligned_load<size_t>(data);
+ data += sizeof(size_t);
+ col.insert_data(reinterpret_cast<const char*>(data), data_size);
+ data += data_size;
+ return data;
+ }
+
+ static const uint8_t* deserialize_binary_to_field(const uint8_t* data,
Field& field,
+ FieldInfo& info) {
+ const size_t data_size = unaligned_load<size_t>(data);
+ data += sizeof(size_t);
+ field = Field::create_field<TYPE_STRING>(
+ String(reinterpret_cast<const char*>(data), data_size));
+ data += data_size;
+ return data;
+ }
+
void to_string(const IColumn& column, size_t row_num, BufferWritable& bw)
const override;
private:
diff --git a/be/src/vec/functions/function_variant_element.cpp
b/be/src/vec/functions/function_variant_element.cpp
index fd0fd491b8f..aa2d082d156 100644
--- a/be/src/vec/functions/function_variant_element.cpp
+++ b/be/src/vec/functions/function_variant_element.cpp
@@ -217,9 +217,8 @@ private:
// {"b" : {"c" : 456}}
// b maybe in sparse column, and b.c is in
subolumn, put `b` into root column to distinguish
// from "" which is empty path and root
- const auto& data =
ColumnVariant::deserialize_from_sparse_column(
- &src_sparse_data_values,
lower_bound_index);
- root.insert(data.first, data.second);
+
root.deserialize_from_sparse_column(&src_sparse_data_values,
+
lower_bound_index);
}
}
if (root.size() == sparse_data_offsets.size()) {
diff --git a/be/test/util/test_data/deserialize_from_sparse_column_test.bin
b/be/test/util/test_data/deserialize_from_sparse_column_test.bin
new file mode 100644
index 00000000000..663fd5aaf33
Binary files /dev/null and
b/be/test/util/test_data/deserialize_from_sparse_column_test.bin differ
diff --git a/be/test/vec/columns/column_variant_test.cpp
b/be/test/vec/columns/column_variant_test.cpp
index e777634f6e0..de3d31200dc 100644
--- a/be/test/vec/columns/column_variant_test.cpp
+++ b/be/test/vec/columns/column_variant_test.cpp
@@ -3130,120 +3130,6 @@ TEST_F(ColumnVariantTest,
subcolumn_operations_coverage) {
dst_subcolumn->insert_range_from(src_column2->get_subcolumns().get_root()->data,
0, 1);
}
- // Test parse_binary_from_sparse_column
- {
- auto column = VariantUtil::construct_basic_varint_column();
- vectorized::Field res;
- FieldInfo field_info;
-
- // Test String type
- {
- std::string test_str = "test_data";
- std::vector<char> binary_data;
- size_t str_size = test_str.size();
- binary_data.resize(sizeof(size_t) + test_str.size());
- memcpy(binary_data.data(), &str_size, sizeof(size_t));
- memcpy(binary_data.data() + sizeof(size_t), test_str.data(),
test_str.size());
- const char* data = binary_data.data();
- parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_STRING,
data, res,
- field_info);
- EXPECT_EQ(res.get<String>(), "test_data");
- }
-
- // Test integer types
- {
- Int8 int8_val = 42;
- const char* data = reinterpret_cast<const char*>(&int8_val);
-
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_TINYINT, data, res,
- field_info);
- EXPECT_EQ(res.get<Int8>(), 42);
- }
-
- {
- Int16 int16_val = 12345;
- const char* data = reinterpret_cast<const char*>(&int16_val);
-
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_SMALLINT, data, res,
- field_info);
- EXPECT_EQ(res.get<Int16>(), 12345);
- }
-
- {
- Int32 int32_val = 123456789;
- const char* data = reinterpret_cast<const char*>(&int32_val);
- parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_INT,
data, res, field_info);
- EXPECT_EQ(res.get<Int32>(), 123456789);
- }
-
- {
- Int64 int64_val = 1234567890123456789LL;
- const char* data = reinterpret_cast<const char*>(&int64_val);
- parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_BIGINT,
data, res,
- field_info);
- EXPECT_EQ(res.get<Int64>(), 1234567890123456789LL);
- }
-
- // Test floating point types
- {
- Float32 float32_val = 3.1415901f;
- const char* data = reinterpret_cast<const char*>(&float32_val);
- parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_FLOAT,
data, res,
- field_info);
- EXPECT_FLOAT_EQ(res.get<Float32>(), 0);
- }
-
- {
- Float64 float64_val = 3.141592653589793;
- const char* data = reinterpret_cast<const char*>(&float64_val);
- parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_DOUBLE,
data, res,
- field_info);
- EXPECT_DOUBLE_EQ(res.get<Float64>(), 3.141592653589793);
- }
-
- // Test JSONB type
- {
- std::string json_str = "{\"key\": \"value\"}";
- std::vector<char> binary_data;
- size_t json_size = json_str.size();
- binary_data.resize(sizeof(size_t) + json_str.size());
- memcpy(binary_data.data(), &json_size, sizeof(size_t));
- memcpy(binary_data.data() + sizeof(size_t), json_str.data(),
json_str.size());
- const char* data = binary_data.data();
- parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_JSONB,
data, res,
- field_info);
- }
-
- // Test Array type
- {
- std::vector<char> binary_data;
- size_t array_size = 2;
- binary_data.resize(sizeof(size_t) + 2 * (sizeof(uint8_t) +
sizeof(Int32)));
- char* data_ptr = binary_data.data();
-
- // Write array size
- memcpy(data_ptr, &array_size, sizeof(size_t));
- data_ptr += sizeof(size_t);
-
- // Write first element (Int32)
- *data_ptr++ = static_cast<uint8_t>(PrimitiveType::TYPE_INT);
- Int32 val1 = 42;
- memcpy(data_ptr, &val1, sizeof(Int32));
- data_ptr += sizeof(Int32);
-
- // Write second element (Int32)
- *data_ptr++ = static_cast<uint8_t>(PrimitiveType::TYPE_INT);
- Int32 val2 = 43;
- memcpy(data_ptr, &val2, sizeof(Int32));
-
- const char* data = binary_data.data();
- parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_ARRAY,
data, res,
- field_info);
- const Array& array = res.get<Array>();
- EXPECT_EQ(array.size(), 2);
- EXPECT_EQ(array[0].get<Int32>(), 42);
- EXPECT_EQ(array[1].get<Int32>(), 43);
- }
- }
-
// Test add_sub_column
{
auto column = VariantUtil::construct_basic_varint_column();
diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_test.cpp
index f4e36d3ab47..62e4a65f760 100644
--- a/be/test/vec/data_types/serde/data_type_serde_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp
@@ -25,6 +25,7 @@
#include <stdlib.h>
#include <time.h>
+#include <fstream>
#include <iostream>
#include <memory>
#include <string>
@@ -41,6 +42,7 @@
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
+#include "vec/columns/column_variant.h"
#include "vec/columns/column_vector.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
@@ -271,4 +273,378 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
}
}
+TEST(DataTypeSerDeTest, DeserializeFromSparseColumnTest) {
+ auto sparse_column = ColumnVariant::create_sparse_column_fn();
+ auto& column_map = assert_cast<ColumnMap&>(*sparse_column);
+ // auto& key = assert_cast<ColumnString&>(column_map.get_keys());
+ auto& value = assert_cast<ColumnString&>(column_map.get_values());
+ // auto& offsets = column_map.get_offsets();
+ auto data_type = ColumnVariant::get_sparse_column_type();
+ std::string file_path = std::string(getenv("ROOT")) +
+
"/be/test/util/test_data/deserialize_from_sparse_column_test.bin";
+
+ // Field string_field = Field::create_field<TYPE_STRING>("123");
+ // FieldInfo info = {PrimitiveType::TYPE_STRING, false, false, 0};
+ // ColumnVariant::Subcolumn string_subcolumn = {0, true, true};
+ // string_subcolumn.insert(string_field, info);
+ // string_subcolumn.serialize_to_sparse_column(&key, "a", &value, 0);
+
+ // Field int_field = Field::create_field<TYPE_INT>(123);
+ // info.scalar_type_id = PrimitiveType::TYPE_INT;
+ // ColumnVariant::Subcolumn int_subcolumn = {0, true, true};
+ // int_subcolumn.insert(int_field, info);
+ // int_subcolumn.serialize_to_sparse_column(&key, "b", &value, 0);
+
+ // Field largeint_field =
Field::create_field<TYPE_LARGEINT>(__int128_t(123));
+ // info.scalar_type_id = PrimitiveType::TYPE_LARGEINT;
+ // ColumnVariant::Subcolumn largeint_subcolumn = {0, true, true};
+ // largeint_subcolumn.insert(largeint_field, info);
+ // largeint_subcolumn.serialize_to_sparse_column(&key, "c", &value, 0);
+
+ // Field double_field = Field::create_field<TYPE_DOUBLE>(123.456);
+ // info.scalar_type_id = PrimitiveType::TYPE_DOUBLE;
+ // ColumnVariant::Subcolumn double_subcolumn = {0, true, true};
+ // double_subcolumn.insert(double_field, info);
+ // double_subcolumn.serialize_to_sparse_column(&key, "d", &value, 0);
+
+ // Field bool_field = Field::create_field<TYPE_BOOLEAN>(true);
+ // info.scalar_type_id = PrimitiveType::TYPE_BOOLEAN;
+ // ColumnVariant::Subcolumn bool_subcolumn = {0, true, true};
+ // bool_subcolumn.insert(bool_field, info);
+ // bool_subcolumn.serialize_to_sparse_column(&key, "e", &value, 0);
+
+ // Field datetime_field = Field::create_field<TYPE_DATETIMEV2>(23232323);
+ // info.scalar_type_id = PrimitiveType::TYPE_DATETIMEV2;
+ // info.scale = 3;
+ // ColumnVariant::Subcolumn datetime_subcolumn = {0, true, true};
+ // datetime_subcolumn.insert(datetime_field, info);
+ // datetime_subcolumn.serialize_to_sparse_column(&key, "f", &value, 0);
+
+ // Field date_field = Field::create_field<TYPE_DATEV2>(154543245);
+ // info.scalar_type_id = PrimitiveType::TYPE_DATEV2;
+ // info.scale = 3;
+ // ColumnVariant::Subcolumn date_subcolumn = {0, true, true};
+ // date_subcolumn.insert(date_field, info);
+ // date_subcolumn.serialize_to_sparse_column(&key, "g", &value, 0);
+
+ // Field ipv4_field = Field::create_field<TYPE_IPV4>(367357);
+ // info.scalar_type_id = PrimitiveType::TYPE_IPV4;
+ // ColumnVariant::Subcolumn ipv4_subcolumn = {0, true, true};
+ // ipv4_subcolumn.insert(ipv4_field, info);
+ // ipv4_subcolumn.serialize_to_sparse_column(&key, "h", &value, 0);
+
+ // Field ipv6_field = Field::create_field<TYPE_IPV6>(36534645);
+ // info.scalar_type_id = PrimitiveType::TYPE_IPV6;
+ // ColumnVariant::Subcolumn ipv6_subcolumn = {0, true, true};
+ // ipv6_subcolumn.insert(ipv6_field, info);
+ // ipv6_subcolumn.serialize_to_sparse_column(&key, "i", &value, 0);
+
+ // Field decimal32_field =
+ //
Field::create_field<TYPE_DECIMAL32>(DecimalField<Decimal32>(3456345634, 2));
+ // info.scalar_type_id = PrimitiveType::TYPE_DECIMAL32;
+ // info.precision = 5;
+ // info.scale = 2;
+ // ColumnVariant::Subcolumn decimal32_subcolumn = {0, true, true};
+ // decimal32_subcolumn.insert(decimal32_field, info);
+ // decimal32_subcolumn.serialize_to_sparse_column(&key, "j", &value, 0);
+
+ // Field decimal64_field =
+ //
Field::create_field<TYPE_DECIMAL64>(DecimalField<Decimal64>(13452435, 6));
+ // info.scalar_type_id = PrimitiveType::TYPE_DECIMAL64;
+ // info.precision = 12;
+ // info.scale = 6;
+ // ColumnVariant::Subcolumn decimal64_subcolumn = {0, true, true};
+ // decimal64_subcolumn.insert(decimal64_field, info);
+ // decimal64_subcolumn.serialize_to_sparse_column(&key, "k", &value, 0);
+
+ // Field decimal128i_field =
+ //
Field::create_field<TYPE_DECIMAL128I>(DecimalField<Decimal128V3>(2342345, 12));
+ // info.scalar_type_id = PrimitiveType::TYPE_DECIMAL128I;
+ // info.precision = 32;
+ // info.scale = 12;
+ // ColumnVariant::Subcolumn decimal128i_subcolumn = {0, true, true};
+ // decimal128i_subcolumn.insert(decimal128i_field, info);
+ // decimal128i_subcolumn.serialize_to_sparse_column(&key, "l", &value, 0);
+
+ // Field decimal256_field =
+ //
Field::create_field<TYPE_DECIMAL256>(DecimalField<Decimal256>(Decimal256(2345243),
5));
+ // info.scalar_type_id = PrimitiveType::TYPE_DECIMAL256;
+ // info.precision = 52;
+ // info.scale = 5;
+ // ColumnVariant::Subcolumn decimal256_subcolumn = {0, true, true};
+ // decimal256_subcolumn.insert(decimal256_field, info);
+ // decimal256_subcolumn.serialize_to_sparse_column(&key, "m", &value, 0);
+
+ // Field jsonb_field = Field::create_field<TYPE_JSONB>(JsonbField("abc",
3));
+ // info.scalar_type_id = PrimitiveType::TYPE_JSONB;
+ // ColumnVariant::Subcolumn jsonb_subcolumn = {0, true, true};
+ // jsonb_subcolumn.insert(jsonb_field, info);
+ // jsonb_subcolumn.serialize_to_sparse_column(&key, "n", &value, 0);
+
+ // Field array_field = Field::create_field<TYPE_ARRAY>(Array(3));
+ // info.scalar_type_id = PrimitiveType::TYPE_JSONB;
+ // info.num_dimensions = 1;
+ // auto& array = array_field.get<Array>();
+ // array[0] = jsonb_field;
+ // array[1] = Field();
+ // array[2] = jsonb_field;
+
+ // ColumnVariant::Subcolumn array_subcolumn = {0, true, true};
+ // array_subcolumn.insert(array_field, info);
+ // array_subcolumn.serialize_to_sparse_column(&key, "o", &value, 0);
+ // offsets.push_back(key.size());
+
+ // auto size =
data_type->get_uncompressed_serialized_bytes(*sparse_column, 8);
+ // char* buf = new char[size];
+ // data_type->serialize(*sparse_column, buf, 8);
+ // {
+ // std::ofstream ofs(file_path, std::ios::binary);
+ // ASSERT_TRUE(ofs.is_open());
+ // ofs.write(buf, static_cast<std::streamsize>(size));
+ // ofs.close();
+ // }
+ // delete[] buf;
+
+ std::string read_data;
+ {
+ std::ifstream ifs(file_path, std::ios::binary);
+ ASSERT_TRUE(ifs.is_open());
+ ifs.seekg(0, std::ios::end);
+ std::streamsize fsize = ifs.tellg();
+ ifs.seekg(0, std::ios::beg);
+ read_data.resize(static_cast<size_t>(fsize));
+ ifs.read(read_data.data(), fsize);
+ }
+
+ sparse_column->clear();
+
+ data_type->deserialize(read_data.data(), &sparse_column, 8);
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 0);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_STRING);
+ EXPECT_EQ(subcolumn.get_last_field().get<String>(), "123");
+ subcolumn.deserialize_from_sparse_column(&value, 0);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_STRING);
+ EXPECT_EQ(subcolumn.get_last_field().get<String>(), "123");
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 1);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
PrimitiveType::TYPE_INT);
+ EXPECT_EQ(subcolumn.get_last_field().get<Int32>(), 123);
+ subcolumn.deserialize_from_sparse_column(&value, 1);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
PrimitiveType::TYPE_INT);
+ EXPECT_EQ(subcolumn.get_last_field().get<Int32>(), 123);
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 2);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_LARGEINT);
+ EXPECT_EQ(subcolumn.get_last_field().get<Int64>(), 123);
+ subcolumn.deserialize_from_sparse_column(&value, 2);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_LARGEINT);
+ EXPECT_EQ(subcolumn.get_last_field().get<Int64>(), 123);
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 3);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DOUBLE);
+ EXPECT_EQ(subcolumn.get_last_field().get<double>(), 123.456);
+ subcolumn.deserialize_from_sparse_column(&value, 3);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DOUBLE);
+ EXPECT_EQ(subcolumn.get_last_field().get<double>(), 123.456);
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 4);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_BOOLEAN);
+ EXPECT_EQ(subcolumn.get_last_field().get<bool>(), true);
+ subcolumn.deserialize_from_sparse_column(&value, 4);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_BOOLEAN);
+ EXPECT_EQ(subcolumn.get_last_field().get<bool>(), true);
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 5);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DATETIMEV2);
+ EXPECT_EQ(subcolumn.get_last_field().get<UInt64>(), 23232323);
+ subcolumn.deserialize_from_sparse_column(&value, 5);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DATETIMEV2);
+ EXPECT_EQ(subcolumn.get_last_field().get<UInt64>(), 23232323);
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 6);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DATEV2);
+ EXPECT_EQ(subcolumn.get_last_field().get<UInt64>(), 154543245);
+ subcolumn.deserialize_from_sparse_column(&value, 6);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DATEV2);
+ EXPECT_EQ(subcolumn.get_last_field().get<UInt64>(), 154543245);
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 7);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_IPV4);
+ EXPECT_EQ(subcolumn.get_last_field().get<IPv4>(),
static_cast<IPv4>(367357));
+ subcolumn.deserialize_from_sparse_column(&value, 7);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_IPV4);
+ EXPECT_EQ(subcolumn.get_last_field().get<IPv4>(),
static_cast<IPv4>(367357));
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 8);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_IPV6);
+ EXPECT_EQ(subcolumn.get_last_field().get<IPv6>(),
static_cast<IPv6>(36534645));
+ subcolumn.deserialize_from_sparse_column(&value, 8);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_IPV6);
+ EXPECT_EQ(subcolumn.get_last_field().get<IPv6>(),
static_cast<IPv6>(36534645));
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 9);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DECIMAL32);
+ auto v = subcolumn.get_last_field().get<DecimalField<Decimal32>>();
+ EXPECT_EQ(static_cast<Int32>(v.get_value()),
static_cast<Int32>(3456345634));
+ subcolumn.deserialize_from_sparse_column(&value, 9);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DECIMAL32);
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 10);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DECIMAL64);
+ auto v = subcolumn.get_last_field().get<DecimalField<Decimal64>>();
+ EXPECT_EQ(static_cast<Int64>(v.get_value()),
static_cast<Int64>(13452435));
+ subcolumn.deserialize_from_sparse_column(&value, 10);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DECIMAL64);
+ v = subcolumn.get_last_field().get<DecimalField<Decimal64>>();
+ EXPECT_EQ(static_cast<Int64>(v.get_value()),
static_cast<Int64>(13452435));
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 11);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DECIMAL128I);
+ auto v = subcolumn.get_last_field().get<DecimalField<Decimal128V3>>();
+ EXPECT_EQ(static_cast<Int128>(v.get_value()),
static_cast<Int128>(2342345));
+ subcolumn.deserialize_from_sparse_column(&value, 11);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DECIMAL128I);
+ v = subcolumn.get_last_field().get<DecimalField<Decimal128V3>>();
+ EXPECT_EQ(static_cast<Int128>(v.get_value()),
static_cast<Int128>(2342345));
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 12);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DECIMAL256);
+ auto v = subcolumn.get_last_field().get<DecimalField<Decimal256>>();
+ EXPECT_TRUE(v.get_value() == Decimal256(2345243));
+ subcolumn.deserialize_from_sparse_column(&value, 12);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_DECIMAL256);
+ v = subcolumn.get_last_field().get<DecimalField<Decimal256>>();
+ EXPECT_TRUE(v.get_value() == Decimal256(2345243));
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 13);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_JSONB);
+ subcolumn.deserialize_from_sparse_column(&value, 13);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_JSONB);
+ }
+
+ {
+ ColumnVariant::Subcolumn subcolumn = {0, true, true};
+ subcolumn.deserialize_from_sparse_column(&value, 14);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_ARRAY);
+ EXPECT_EQ(subcolumn.get_dimensions(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_base_type_id(),
PrimitiveType::TYPE_JSONB);
+ auto v = subcolumn.get_last_field();
+ auto& arr = v.get<Array>();
+ EXPECT_EQ(arr.size(), 3);
+ EXPECT_FALSE(arr[0].is_null());
+ EXPECT_TRUE(arr[1].is_null());
+ EXPECT_FALSE(arr[2].is_null());
+ subcolumn.deserialize_from_sparse_column(&value, 14);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_ARRAY);
+ EXPECT_EQ(subcolumn.get_dimensions(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_base_type_id(),
PrimitiveType::TYPE_JSONB);
+
+ v = subcolumn.get_last_field();
+ arr = v.get<Array>();
+ EXPECT_EQ(arr.size(), 3);
+ EXPECT_FALSE(arr[0].is_null());
+ EXPECT_TRUE(arr[1].is_null());
+ EXPECT_FALSE(arr[2].is_null());
+ }
+}
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]