This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push:
new f7c19d49806 [feature](function) implement variant_type to get inner
types in variant columns (#49909)
f7c19d49806 is described below
commit f7c19d49806b63aa2b27168e054afa2d8523973d
Author: lihangyu <[email protected]>
AuthorDate: Thu Apr 10 10:49:24 2025 +0800
[feature](function) implement variant_type to get inner types in variant
columns (#49909)
---
be/src/vec/columns/column_object.cpp | 290 +++------------------
be/src/vec/columns/column_object.h | 1 -
be/src/vec/common/schema_util.cpp | 226 ++++++++++++++++
be/src/vec/common/schema_util.h | 2 +
be/src/vec/data_types/data_type_jsonb.h | 4 +-
be/src/vec/functions/function_variant_type.cpp | 105 ++++++++
be/src/vec/functions/simple_function_factory.h | 2 +
be/src/vec/json/parse2column.cpp | 2 +-
.../doris/catalog/BuiltinScalarFunctions.java | 4 +-
.../functions/scalar/GetVariantType.java | 69 +++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 +
regression-test/data/variant_p0/predefine/load.out | Bin 7454 -> 7526 bytes
.../suites/variant_p0/predefine/load.groovy | 22 +-
13 files changed, 466 insertions(+), 266 deletions(-)
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index 18ff9033cf9..1b607a7e87e 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -137,265 +137,8 @@ size_t get_number_of_dimensions(const IDataType& type) {
}
return num_dimensions;
}
-
-/// Calculates number of dimensions in array field.
-/// Returns 0 for scalar fields.
-class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> {
-public:
- size_t operator()(const Array& x) const {
- const size_t size = x.size();
- size_t dimensions = 0;
- for (size_t i = 0; i < size; ++i) {
- size_t element_dimensions = apply_visitor(*this, x[i]);
- dimensions = std::max(dimensions, element_dimensions);
- }
- return 1 + dimensions;
- }
- size_t operator()(const VariantField& x) { return apply_visitor(*this,
x.get_field()); }
- template <typename T>
- size_t operator()(const T&) const {
- return 0;
- }
-};
-
-// Visitor that allows to get type of scalar field
-// but exclude fields contain complex field.This is a faster version
-// for FieldVisitorToScalarType which does not support complex field.
-class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> {
-public:
- size_t operator()(const Array& x) {
- throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not
supported");
- }
- size_t operator()(const UInt64& x) {
- if (x <= std::numeric_limits<Int8>::max()) {
- type = TypeIndex::Int8;
- } else if (x <= std::numeric_limits<Int16>::max()) {
- type = TypeIndex::Int16;
- } else if (x <= std::numeric_limits<Int32>::max()) {
- type = TypeIndex::Int32;
- } else {
- type = TypeIndex::Int64;
- }
- return 1;
- }
- size_t operator()(const Int64& x) {
- if (x <= std::numeric_limits<Int8>::max() && x >=
std::numeric_limits<Int8>::min()) {
- type = TypeIndex::Int8;
- } else if (x <= std::numeric_limits<Int16>::max() &&
- x >= std::numeric_limits<Int16>::min()) {
- type = TypeIndex::Int16;
- } else if (x <= std::numeric_limits<Int32>::max() &&
- x >= std::numeric_limits<Int32>::min()) {
- type = TypeIndex::Int32;
- } else {
- type = TypeIndex::Int64;
- }
- return 1;
- }
- size_t operator()(const JsonbField& x) {
- type = TypeIndex::JSONB;
- return 1;
- }
- size_t operator()(const Null&) {
- have_nulls = true;
- return 1;
- }
- size_t operator()(const VariantMap&) {
- type = TypeIndex::VARIANT;
- return 1;
- }
- size_t operator()(const VariantField& x) {
- typed_field_info =
- FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(),
x.get_precision()};
- return 1;
- }
- template <typename T>
- size_t operator()(const T&) {
- type = TypeId<NearestFieldType<T>>::value;
- return 1;
- }
- void get_scalar_type(TypeIndex* data_type, int* precision, int* scale)
const {
- if (typed_field_info.has_value()) {
- *data_type = typed_field_info->scalar_type_id;
- *precision = typed_field_info->precision;
- *scale = typed_field_info->scale;
- return;
- }
- *data_type = type;
- }
- bool contain_nulls() const { return have_nulls; }
-
- bool need_convert_field() const { return false; }
-
-private:
- // initialized when operator()(const VariantField& x)
- std::optional<FieldInfo> typed_field_info;
- TypeIndex type = TypeIndex::Nothing;
- bool have_nulls = false;
-};
-
-/// Visitor that allows to get type of scalar field
-/// or least common type of scalars in array.
-/// More optimized version of FieldToDataType.
-class FieldVisitorToScalarType : public StaticVisitor<size_t> {
-public:
- using FieldType = Field::Types::Which;
- size_t operator()(const Array& x) {
- size_t size = x.size();
- for (size_t i = 0; i < size; ++i) {
- apply_visitor(*this, x[i]);
- }
- return 0;
- }
- // TODO doris not support unsigned integers for now
- // treat as signed integers
- size_t operator()(const UInt64& x) {
- field_types.insert(FieldType::UInt64);
- if (x <= std::numeric_limits<Int8>::max()) {
- type_indexes.insert(TypeIndex::Int8);
- } else if (x <= std::numeric_limits<Int16>::max()) {
- type_indexes.insert(TypeIndex::Int16);
- } else if (x <= std::numeric_limits<Int32>::max()) {
- type_indexes.insert(TypeIndex::Int32);
- } else {
- type_indexes.insert(TypeIndex::Int64);
- }
- return 0;
- }
- size_t operator()(const Int64& x) {
- field_types.insert(FieldType::Int64);
- if (x <= std::numeric_limits<Int8>::max() && x >=
std::numeric_limits<Int8>::min()) {
- type_indexes.insert(TypeIndex::Int8);
- } else if (x <= std::numeric_limits<Int16>::max() &&
- x >= std::numeric_limits<Int16>::min()) {
- type_indexes.insert(TypeIndex::Int16);
- } else if (x <= std::numeric_limits<Int32>::max() &&
- x >= std::numeric_limits<Int32>::min()) {
- type_indexes.insert(TypeIndex::Int32);
- } else {
- type_indexes.insert(TypeIndex::Int64);
- }
- return 0;
- }
- size_t operator()(const JsonbField& x) {
- field_types.insert(FieldType::JSONB);
- type_indexes.insert(TypeIndex::JSONB);
- return 0;
- }
- size_t operator()(const VariantMap&) {
- field_types.insert(FieldType::VariantMap);
- type_indexes.insert(TypeIndex::VARIANT);
- return 0;
- }
- size_t operator()(const VariantField& x) {
- if (x.get_type_id() == TypeIndex::Array) {
- apply_visitor(*this, x.get_field());
- } else {
- typed_field_info =
- FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(),
x.get_precision()};
- }
- return 0;
- }
- size_t operator()(const Null&) {
- have_nulls = true;
- return 0;
- }
- template <typename T>
- size_t operator()(const T&) {
- Field::EnumToType<Field::Types::Array>::Type a;
- field_types.insert(Field::TypeToEnum<NearestFieldType<T>>::value);
- type_indexes.insert(TypeId<NearestFieldType<T>>::value);
- return 0;
- }
- void get_scalar_type(TypeIndex* type, int* precision, int* scale) const {
- if (typed_field_info.has_value()) {
- // fast path
- *type = typed_field_info->scalar_type_id;
- *precision = typed_field_info->precision;
- *scale = typed_field_info->scale;
- return;
- }
- DataTypePtr data_type;
- get_least_supertype_jsonb(type_indexes, &data_type);
- *type = data_type->get_type_id();
- }
- bool contain_nulls() const { return have_nulls; }
- bool need_convert_field() const { return field_types.size() > 1; }
-
-private:
- // initialized when operator()(const VariantField& x)
- std::optional<FieldInfo> typed_field_info;
- phmap::flat_hash_set<TypeIndex> type_indexes;
- phmap::flat_hash_set<FieldType> field_types;
- bool have_nulls = false;
-};
-
-/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
-/// and replaces all scalars or nested arrays to @replacement at that level.
-class FieldVisitorReplaceScalars : public StaticVisitor<Field> {
-public:
- FieldVisitorReplaceScalars(const Field& replacement_, size_t
num_dimensions_to_keep_)
- : replacement(replacement_),
num_dimensions_to_keep(num_dimensions_to_keep_) {}
-
- Field operator()(const Array& x) const {
- if (num_dimensions_to_keep == 0) {
- return replacement;
- }
-
- const size_t size = x.size();
- Array res(size);
- for (size_t i = 0; i < size; ++i) {
- res[i] = apply_visitor(
- FieldVisitorReplaceScalars(replacement,
num_dimensions_to_keep - 1), x[i]);
- }
- return res;
- }
-
- template <typename T>
- Field operator()(const T&) const {
- return replacement;
- }
-
-private:
- const Field& replacement;
- size_t num_dimensions_to_keep;
-};
-
} // namespace
-template <typename Visitor>
-void get_field_info_impl(const Field& field, FieldInfo* info) {
- Visitor to_scalar_type_visitor;
- apply_visitor(to_scalar_type_visitor, field);
- TypeIndex type_id;
- int precision = 0;
- int scale = 0;
- to_scalar_type_visitor.get_scalar_type(&type_id, &precision, &scale);
- // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
- *info = {
- type_id,
- to_scalar_type_visitor.contain_nulls(),
- to_scalar_type_visitor.need_convert_field(),
- apply_visitor(FieldVisitorToNumberOfDimensions(), field),
- scale,
- precision,
- };
-}
-
-bool is_complex_field(const Field& field) {
- return field.is_complex_field() ||
- (field.is_variant_field() &&
- field.get<const VariantField&>().get_field().is_complex_field());
-}
-
-void get_field_info(const Field& field, FieldInfo* info) {
- if (is_complex_field(field)) {
- get_field_info_impl<FieldVisitorToScalarType>(field, info);
- } else {
- get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info);
- }
-}
-
#ifdef NDEBUG
#define ENABLE_CHECK_CONSISTENCY (void)/* Nothing */
#else
@@ -472,7 +215,7 @@ Field get_field_from_variant_field(const Field& field) {
void ColumnObject::Subcolumn::insert(Field field) {
FieldInfo info;
- get_field_info(field, &info);
+ schema_util::get_field_info(field, &info);
field = get_field_from_variant_field(field);
insert(std::move(field), std::move(info));
}
@@ -2481,6 +2224,37 @@ bool
ColumnObject::try_insert_many_defaults_from_nested(const Subcolumns::NodePt
return true;
}
+/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
+/// and replaces all scalars or nested arrays to @replacement at that level.
+class FieldVisitorReplaceScalars : public StaticVisitor<Field> {
+public:
+ FieldVisitorReplaceScalars(const Field& replacement_, size_t
num_dimensions_to_keep_)
+ : replacement(replacement_),
num_dimensions_to_keep(num_dimensions_to_keep_) {}
+
+ Field operator()(const Array& x) const {
+ if (num_dimensions_to_keep == 0) {
+ return replacement;
+ }
+
+ const size_t size = x.size();
+ Array res(size);
+ for (size_t i = 0; i < size; ++i) {
+ res[i] = apply_visitor(
+ FieldVisitorReplaceScalars(replacement,
num_dimensions_to_keep - 1), x[i]);
+ }
+ return res;
+ }
+
+ template <typename T>
+ Field operator()(const T&) const {
+ return replacement;
+ }
+
+private:
+ const Field& replacement;
+ size_t num_dimensions_to_keep;
+};
+
bool ColumnObject::try_insert_default_from_nested(const Subcolumns::NodePtr&
entry) const {
const auto* leaf = get_leaf_of_the_same_nested(entry);
if (!leaf) {
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index ad8a693e736..8a76151bba6 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -84,7 +84,6 @@ struct FieldInfo {
int precision = 0;
};
-void get_field_info(const Field& field, FieldInfo* info);
/** A column that represents object with dynamic set of subcolumns.
* Subcolumns are identified by paths in document and are stored in
* a trie-like structure. ColumnObject is not suitable for writing into tables
diff --git a/be/src/vec/common/schema_util.cpp
b/be/src/vec/common/schema_util.cpp
index 17430380310..12049e20358 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -64,6 +64,7 @@
#include "vec/columns/column_object.h"
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
+#include "vec/common/field_visitors.h"
#include "vec/common/typeid_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
@@ -904,5 +905,230 @@ void calculate_variant_stats(const IColumn&
encoded_sparse_column,
}
}
+/// Calculates number of dimensions in array field.
+/// Returns 0 for scalar fields.
+class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> {
+public:
+ size_t operator()(const Array& x) const {
+ const size_t size = x.size();
+ size_t dimensions = 0;
+ for (size_t i = 0; i < size; ++i) {
+ size_t element_dimensions = apply_visitor(*this, x[i]);
+ dimensions = std::max(dimensions, element_dimensions);
+ }
+ return 1 + dimensions;
+ }
+ size_t operator()(const VariantField& x) { return apply_visitor(*this,
x.get_field()); }
+ template <typename T>
+ size_t operator()(const T&) const {
+ return 0;
+ }
+};
+
+// Visitor that allows to get type of scalar field
+// but exclude fields contain complex field.This is a faster version
+// for FieldVisitorToScalarType which does not support complex field.
+class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> {
+public:
+ size_t operator()(const Array& x) {
+ throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not
supported");
+ }
+ size_t operator()(const UInt64& x) {
+ if (x <= std::numeric_limits<Int8>::max()) {
+ type = TypeIndex::Int8;
+ } else if (x <= std::numeric_limits<Int16>::max()) {
+ type = TypeIndex::Int16;
+ } else if (x <= std::numeric_limits<Int32>::max()) {
+ type = TypeIndex::Int32;
+ } else {
+ type = TypeIndex::Int64;
+ }
+ return 1;
+ }
+ size_t operator()(const Int64& x) {
+ if (x <= std::numeric_limits<Int8>::max() && x >=
std::numeric_limits<Int8>::min()) {
+ type = TypeIndex::Int8;
+ } else if (x <= std::numeric_limits<Int16>::max() &&
+ x >= std::numeric_limits<Int16>::min()) {
+ type = TypeIndex::Int16;
+ } else if (x <= std::numeric_limits<Int32>::max() &&
+ x >= std::numeric_limits<Int32>::min()) {
+ type = TypeIndex::Int32;
+ } else {
+ type = TypeIndex::Int64;
+ }
+ return 1;
+ }
+ size_t operator()(const JsonbField& x) {
+ type = TypeIndex::JSONB;
+ return 1;
+ }
+ size_t operator()(const Null&) {
+ have_nulls = true;
+ return 1;
+ }
+ size_t operator()(const VariantMap&) {
+ type = TypeIndex::VARIANT;
+ return 1;
+ }
+ size_t operator()(const VariantField& x) {
+ typed_field_info =
+ FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(),
x.get_precision()};
+ return 1;
+ }
+ template <typename T>
+ size_t operator()(const T&) {
+ type = TypeId<NearestFieldType<T>>::value;
+ return 1;
+ }
+ void get_scalar_type(TypeIndex* data_type, int* precision, int* scale)
const {
+ if (typed_field_info.has_value()) {
+ *data_type = typed_field_info->scalar_type_id;
+ *precision = typed_field_info->precision;
+ *scale = typed_field_info->scale;
+ return;
+ }
+ *data_type = type;
+ }
+ bool contain_nulls() const { return have_nulls; }
+
+ bool need_convert_field() const { return false; }
+
+private:
+ // initialized when operator()(const VariantField& x)
+ std::optional<FieldInfo> typed_field_info;
+ TypeIndex type = TypeIndex::Nothing;
+ bool have_nulls = false;
+};
+
+/// Visitor that allows to get type of scalar field
+/// or least common type of scalars in array.
+/// More optimized version of FieldToDataType.
+class FieldVisitorToScalarType : public StaticVisitor<size_t> {
+public:
+ using FieldType = Field::Types::Which;
+ size_t operator()(const Array& x) {
+ size_t size = x.size();
+ for (size_t i = 0; i < size; ++i) {
+ apply_visitor(*this, x[i]);
+ }
+ return 0;
+ }
+ // TODO doris not support unsigned integers for now
+ // treat as signed integers
+ size_t operator()(const UInt64& x) {
+ field_types.insert(FieldType::UInt64);
+ if (x <= std::numeric_limits<Int8>::max()) {
+ type_indexes.insert(TypeIndex::Int8);
+ } else if (x <= std::numeric_limits<Int16>::max()) {
+ type_indexes.insert(TypeIndex::Int16);
+ } else if (x <= std::numeric_limits<Int32>::max()) {
+ type_indexes.insert(TypeIndex::Int32);
+ } else {
+ type_indexes.insert(TypeIndex::Int64);
+ }
+ return 0;
+ }
+ size_t operator()(const Int64& x) {
+ field_types.insert(FieldType::Int64);
+ if (x <= std::numeric_limits<Int8>::max() && x >=
std::numeric_limits<Int8>::min()) {
+ type_indexes.insert(TypeIndex::Int8);
+ } else if (x <= std::numeric_limits<Int16>::max() &&
+ x >= std::numeric_limits<Int16>::min()) {
+ type_indexes.insert(TypeIndex::Int16);
+ } else if (x <= std::numeric_limits<Int32>::max() &&
+ x >= std::numeric_limits<Int32>::min()) {
+ type_indexes.insert(TypeIndex::Int32);
+ } else {
+ type_indexes.insert(TypeIndex::Int64);
+ }
+ return 0;
+ }
+ size_t operator()(const JsonbField& x) {
+ field_types.insert(FieldType::JSONB);
+ type_indexes.insert(TypeIndex::JSONB);
+ return 0;
+ }
+ size_t operator()(const VariantMap&) {
+ field_types.insert(FieldType::VariantMap);
+ type_indexes.insert(TypeIndex::VARIANT);
+ return 0;
+ }
+ size_t operator()(const VariantField& x) {
+ if (x.get_type_id() == TypeIndex::Array) {
+ apply_visitor(*this, x.get_field());
+ } else {
+ typed_field_info =
+ FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(),
x.get_precision()};
+ }
+ return 0;
+ }
+ size_t operator()(const Null&) {
+ have_nulls = true;
+ return 0;
+ }
+ template <typename T>
+ size_t operator()(const T&) {
+ Field::EnumToType<Field::Types::Array>::Type a;
+ field_types.insert(Field::TypeToEnum<NearestFieldType<T>>::value);
+ type_indexes.insert(TypeId<NearestFieldType<T>>::value);
+ return 0;
+ }
+ void get_scalar_type(TypeIndex* type, int* precision, int* scale) const {
+ if (typed_field_info.has_value()) {
+ // fast path
+ *type = typed_field_info->scalar_type_id;
+ *precision = typed_field_info->precision;
+ *scale = typed_field_info->scale;
+ return;
+ }
+ DataTypePtr data_type;
+ get_least_supertype_jsonb(type_indexes, &data_type);
+ *type = data_type->get_type_id();
+ }
+ bool contain_nulls() const { return have_nulls; }
+ bool need_convert_field() const { return field_types.size() > 1; }
+
+private:
+ // initialized when operator()(const VariantField& x)
+ std::optional<FieldInfo> typed_field_info;
+ phmap::flat_hash_set<TypeIndex> type_indexes;
+ phmap::flat_hash_set<FieldType> field_types;
+ bool have_nulls = false;
+};
+
+template <typename Visitor>
+void get_field_info_impl(const Field& field, FieldInfo* info) {
+ Visitor to_scalar_type_visitor;
+ apply_visitor(to_scalar_type_visitor, field);
+ TypeIndex type_id;
+ int precision = 0;
+ int scale = 0;
+ to_scalar_type_visitor.get_scalar_type(&type_id, &precision, &scale);
+ // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
+ *info = {
+ type_id,
+ to_scalar_type_visitor.contain_nulls(),
+ to_scalar_type_visitor.need_convert_field(),
+ apply_visitor(FieldVisitorToNumberOfDimensions(), field),
+ scale,
+ precision,
+ };
+}
+
+bool is_complex_field(const Field& field) {
+ return field.is_complex_field() ||
+ (field.is_variant_field() &&
+ field.get<const VariantField&>().get_field().is_complex_field());
+}
+
+void get_field_info(const Field& field, FieldInfo* info) {
+ if (is_complex_field(field)) {
+ get_field_info_impl<FieldVisitorToScalarType>(field, info);
+ } else {
+ get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info);
+ }
+}
+
#include "common/compile_check_end.h"
} // namespace doris::vectorized::schema_util
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index 8281cdec7b6..eba6869964e 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -152,4 +152,6 @@ void calculate_variant_stats(const IColumn&
encoded_sparse_column,
segment_v2::VariantStatisticsPB* stats, size_t
row_pos,
size_t num_rows);
+void get_field_info(const Field& field, FieldInfo* info);
+
} // namespace doris::vectorized::schema_util
diff --git a/be/src/vec/data_types/data_type_jsonb.h
b/be/src/vec/data_types/data_type_jsonb.h
index 25d90d37482..6fd188574dd 100644
--- a/be/src/vec/data_types/data_type_jsonb.h
+++ b/be/src/vec/data_types/data_type_jsonb.h
@@ -87,9 +87,7 @@ public:
// Return JsonbField.
Field get_type_field(const IColumn& column, size_t row) const override {
const auto& column_data = static_cast<const ColumnString&>(column);
- Field field =
- JsonbField(column_data.get_data_at(row).data,
column_data.get_data_at(row).size);
- return VariantField(std::move(field), TypeIndex::JSONB);
+ return JsonbField(column_data.get_data_at(row).data,
column_data.get_data_at(row).size);
}
bool equals(const IDataType& rhs) const override;
diff --git a/be/src/vec/functions/function_variant_type.cpp
b/be/src/vec/functions/function_variant_type.cpp
new file mode 100644
index 00000000000..8e541a6958f
--- /dev/null
+++ b/be/src/vec/functions/function_variant_type.cpp
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include <glog/logging.h>
+
+#include "vec/columns/column_object.h"
+#include "vec/common/schema_util.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris {
+class FunctionContext;
+} // namespace doris
+
+namespace doris::vectorized {
+
+// get data type of variant column
+class FunctionVariantType : public IFunction {
+public:
+ static constexpr auto name = "variant_type";
+ static FunctionPtr create() { return
std::make_shared<FunctionVariantType>(); }
+
+ String get_name() const override { return name; }
+
+ size_t get_number_of_arguments() const override { return 1; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return std::make_shared<DataTypeString>();
+ }
+
+ std::map<std::string, std::string> get_type_info(const ColumnObject&
column, size_t row) const {
+ std::map<std::string, std::string> result;
+ Field field = column[row];
+ const auto& variant_map = field.get<const VariantMap&>();
+ for (const auto& [key, value] : variant_map) {
+ if (key.empty() && value.get_type() == Field::Types::JSONB &&
+ value.get<const JsonbField&>().get_size() == 0) {
+ // ignore empty jsonb root, it's tricky here
+ continue;
+ }
+ FieldInfo info;
+ schema_util::get_field_info(value, &info);
+ result[key] = getTypeName(info.scalar_type_id);
+ }
+ return result;
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ uint32_t result, size_t input_rows_count) const
override {
+ const auto& arg_column =
+ assert_cast<const
ColumnObject&>(*block.get_by_position(arguments[0]).column);
+ auto result_column = ColumnString::create();
+ auto arg_real_type = arg_column.get_root_type();
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ const Field& variant_map = arg_column[i];
+ auto type_info = get_type_info(arg_column, i);
+
+ // Use ColumnString as buffer for JSON serialization
+ VectorBufferWriter writer(*result_column.get());
+
+ // Write JSON object
+ writeChar('{', writer);
+
+ bool first = true;
+ for (const auto& [key, value] : type_info) {
+ if (!first) {
+ writeChar(',', writer);
+ }
+ first = false;
+
+ // Write key
+ writeJSONString(key, writer);
+ writeCString(":", writer);
+
+ // Write value
+ writeJSONString(value, writer);
+ }
+
+ writeChar('}', writer);
+ writer.commit();
+ }
+
+ block.replace_by_position(result, std::move(result_column));
+ return Status::OK();
+ }
+};
+
+void register_function_variant_type(SimpleFunctionFactory& factory) {
+ factory.register_function<FunctionVariantType>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/simple_function_factory.h
b/be/src/vec/functions/simple_function_factory.h
index 46eca0cb419..2e4e54d49c2 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -112,6 +112,7 @@ void
register_function_split_by_regexp(SimpleFunctionFactory& factory);
void register_function_assert_true(SimpleFunctionFactory& factory);
void register_function_compress(SimpleFunctionFactory& factory);
void register_function_bit_test(SimpleFunctionFactory& factory);
+void register_function_variant_type(SimpleFunctionFactory& factory);
class SimpleFunctionFactory {
using Creator = std::function<FunctionBuilderPtr()>;
@@ -303,6 +304,7 @@ public:
register_function_assert_true(instance);
register_function_bit_test(instance);
register_function_compress(instance);
+ register_function_variant_type(instance);
});
return instance;
}
diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp
index 4ef416dc865..3dc281a6fda 100644
--- a/be/src/vec/json/parse2column.cpp
+++ b/be/src/vec/json/parse2column.cpp
@@ -157,7 +157,7 @@ void parse_json_to_variant(IColumn& column, const char*
src, size_t length,
size_t old_num_rows = column_object.rows();
for (size_t i = 0; i < paths.size(); ++i) {
FieldInfo field_info;
- get_field_info(values[i], &field_info);
+ schema_util::get_field_info(values[i], &field_info);
if (WhichDataType(field_info.scalar_type_id).is_nothing()) {
continue;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index e482b419ac5..46b79d6f514 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -199,6 +199,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonBigInt
import
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonDouble;
import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonInt;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonString;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.GetVariantType;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Greatest;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Grouping;
import org.apache.doris.nereids.trees.expressions.functions.scalar.GroupingId;
@@ -982,7 +983,8 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(SessionUser.class, "session_user"),
scalar(LastQueryId.class, "last_query_id"),
scalar(Compress.class, "compress"),
- scalar(Uncompress.class, "uncompress"));
+ scalar(Uncompress.class, "uncompress"),
+ scalar(GetVariantType.class, "variant_type"));
public static final BuiltinScalarFunctions INSTANCE = new
BuiltinScalarFunctions();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/GetVariantType.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/GetVariantType.java
new file mode 100644
index 00000000000..b3e4c4e6f41
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/GetVariantType.java
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VariantType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'get_variant_type'. This class is generated by
GenerateFunction.
+ */
+public class GetVariantType extends ScalarFunction
+ implements BinaryExpression, ExplicitlyCastableSignature,
AlwaysNullable {
+
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+ FunctionSignature.ret(StringType.INSTANCE).args(new VariantType(0))
+ );
+
+ /**
+ * constructor with 2 arguments.
+ */
+ public GetVariantType(Expression arg0) {
+ super("variant_type", arg0);
+ }
+
+ /**
+ * withChildren.
+ */
+ @Override
+ public GetVariantType withChildren(List<Expression> children) {
+ Preconditions.checkArgument(children.size() == 1);
+ return new GetVariantType(children.get(0));
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitGetVariantType(this, context);
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index 4346fb71591..b4b16a86c29 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -204,6 +204,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonBigInt
import
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonDouble;
import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonInt;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonString;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.GetVariantType;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Greatest;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Hex;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.HllCardinality;
@@ -2348,4 +2349,8 @@ public interface ScalarFunctionVisitor<R, C> {
default R visitUncompress(Uncompress uncompress, C context) {
return visitScalarFunction(uncompress, context);
}
+
+ default R visitGetVariantType(GetVariantType getVariantType, C context) {
+ return visitScalarFunction(getVariantType, context);
+ }
}
diff --git a/regression-test/data/variant_p0/predefine/load.out
b/regression-test/data/variant_p0/predefine/load.out
index e2347eb8508..edc57afcd76 100644
Binary files a/regression-test/data/variant_p0/predefine/load.out and
b/regression-test/data/variant_p0/predefine/load.out differ
diff --git a/regression-test/suites/variant_p0/predefine/load.groovy
b/regression-test/suites/variant_p0/predefine/load.groovy
index cf0ad73491f..c83c93b2116 100644
--- a/regression-test/suites/variant_p0/predefine/load.groovy
+++ b/regression-test/suites/variant_p0/predefine/load.groovy
@@ -318,5 +318,23 @@ suite("regression_test_variant_predefine_schema", "p0"){
sql """insert into test_array_with_nulls values(5, '{"array_decimal" :
[1.1, 2.2, 3.3, 4.4]}')"""
sql """insert into test_array_with_nulls values(6, '{"array_decimal" :
[]}')"""
sql """insert into test_array_with_nulls values(7, '{"array_decimal" :
[null, null]}')"""
- qt_sql_arr_null_2 "select * from test_array_with_nulls order by k"
-}
\ No newline at end of file
+ qt_sql_arr_null_2 "select * from test_array_with_nulls order by k limit 5"
+
+ // test variant_type
+ sql "DROP TABLE IF EXISTS test_variant_type"
+ sql """
+ CREATE TABLE `test_variant_type` (
+ `k` bigint NULL,
+ `var` variant<match_name 'dcm' : decimal, 'db' : double, 'dt' :
datetime, 'a.b.c' : array<int>>
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k`)
+ DISTRIBUTED BY HASH(`k`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "min_load_replica_num" = "-1",
+ "variant_max_subcolumns_count" = "0"
+ );
+ """
+ sql """insert into test_variant_type values(1, '{"dcm" : 1.1, "db" : 2.2,
"dt" : "2021-01-01 00:00:00", "a.b.c" : [1, 2, 3]}')"""
+ qt_sql "select variant_type(var) from test_variant_type"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]