This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new a13b90a7043 branch-4.0: [opt](function) Enhance the handling of a
single struct-type argument in the EXPLODE function #57050 (#57093)
a13b90a7043 is described below
commit a13b90a70439b3b22a979f9ddf29b0fcbe595096
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Oct 17 17:29:30 2025 +0800
branch-4.0: [opt](function) Enhance the handling of a single struct-type
argument in the EXPLODE function #57050 (#57093)
Cherry-picked from #57050
Co-authored-by: Jerry Hu <[email protected]>
---
be/src/vec/exprs/table_function/vexplode_v2.cpp | 71 ++++++++++++++--------
be/src/vec/functions/function_fake.cpp | 6 +-
be/test/vec/function/table_function_test.cpp | 46 ++++++--------
.../nereids/rules/analysis/BindExpression.java | 22 ++++---
.../expressions/functions/generator/Explode.java | 7 ++-
.../functions/generator/ExplodeOuter.java | 7 ++-
.../functions/generator/ExplodeVariantArray.java | 7 ++-
.../sql_functions/table_function/explode.out | 16 +++++
.../hive/test_parquet_nested_types.groovy | 8 +--
.../sql_functions/table_function/explode.groovy | 22 +++++++
10 files changed, 142 insertions(+), 70 deletions(-)
diff --git a/be/src/vec/exprs/table_function/vexplode_v2.cpp
b/be/src/vec/exprs/table_function/vexplode_v2.cpp
index 47a4f41147b..dbeef4f438b 100644
--- a/be/src/vec/exprs/table_function/vexplode_v2.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_v2.cpp
@@ -29,6 +29,7 @@
#include "vec/columns/column_array.h"
#include "vec/columns/column_nothing.h"
#include "vec/columns/column_variant.h"
+#include "vec/common/assert_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/data_types/data_type.h"
@@ -134,18 +135,28 @@ void
VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int
return;
}
ColumnStruct* struct_column = nullptr;
- if (_is_nullable) {
- auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
- struct_column =
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
- auto* nullmap_column =
-
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
- nullmap_column->insert_many_defaults(length);
+ std::vector<IColumn*> columns;
+
+ const bool multi_sub_columns = _multi_detail.size() > 1 ||
_generate_row_index;
+
+ if (multi_sub_columns) {
+ if (_is_nullable) {
+ auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
+ struct_column =
+
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
+ auto* nullmap_column =
+
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
+ nullmap_column->insert_many_defaults(length);
+
+ } else {
+ struct_column = assert_cast<ColumnStruct*>(column.get());
+ }
+
+ for (size_t i = 0; i != _multi_detail.size(); ++i) {
+ columns.emplace_back(&struct_column->get_column(i +
(_generate_row_index ? 1 : 0)));
+ }
} else {
- struct_column = assert_cast<ColumnStruct*>(column.get());
- }
- if (!struct_column) {
- throw Exception(ErrorCode::INTERNAL_ERROR,
- "Only multiple columns can be returned within a
struct.");
+ columns.push_back(column.get());
}
if (_generate_row_index) {
@@ -157,7 +168,7 @@ void
VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int
auto& detail = _multi_detail[i];
size_t pos = _array_offsets[i] + _cur_offset;
size_t element_size = _multi_detail[i].array_col->size_at(_row_idx);
- auto& struct_field = struct_column->get_column(i +
(_generate_row_index ? 1 : 0));
+ auto& struct_field = *columns.at(i);
if ((detail.array_nullmap_data &&
detail.array_nullmap_data[_row_idx])) {
struct_field.insert_many_defaults(length);
} else {
@@ -179,25 +190,33 @@ void
VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int
int VExplodeV2TableFunction::get_value(MutableColumnPtr& column, int max_step)
{
max_step = std::min(max_step, (int)(_cur_size - _cur_offset));
+ const bool multi_sub_columns = _multi_detail.size() > 1 ||
_generate_row_index;
+
+ ColumnStruct* struct_column = nullptr;
+ std::vector<IColumn*> columns;
+
if (current_empty()) {
column->insert_default();
max_step = 1;
} else {
- ColumnStruct* struct_column = nullptr;
- if (_is_nullable) {
- auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
- struct_column =
-
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
- auto* nullmap_column =
-
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
- nullmap_column->insert_many_defaults(max_step);
+ if (multi_sub_columns) {
+ if (_is_nullable) {
+ auto* nullable_column =
assert_cast<ColumnNullable*>(column.get());
+ struct_column =
+
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
+ auto* nullmap_column =
+
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
+ nullmap_column->insert_many_defaults(max_step);
+
+ } else {
+ struct_column = assert_cast<ColumnStruct*>(column.get());
+ }
+ for (size_t i = 0; i != _multi_detail.size(); ++i) {
+ columns.emplace_back(&struct_column->get_column(i +
(_generate_row_index ? 1 : 0)));
+ }
} else {
- struct_column = assert_cast<ColumnStruct*>(column.get());
- }
- if (!struct_column) {
- throw Exception(ErrorCode::INTERNAL_ERROR,
- "Only multiple columns can be returned within a
struct.");
+ columns.emplace_back(column.get());
}
if (_generate_row_index) {
@@ -210,7 +229,7 @@ int VExplodeV2TableFunction::get_value(MutableColumnPtr&
column, int max_step) {
auto& detail = _multi_detail[i];
size_t pos = _array_offsets[i] + _cur_offset;
size_t element_size =
_multi_detail[i].array_col->size_at(_row_idx);
- auto& struct_field = struct_column->get_column(i +
(_generate_row_index ? 1 : 0));
+ auto& struct_field = *columns.at(i);
if (detail.array_nullmap_data &&
detail.array_nullmap_data[_row_idx]) {
struct_field.insert_many_defaults(max_step);
} else {
diff --git a/be/src/vec/functions/function_fake.cpp
b/be/src/vec/functions/function_fake.cpp
index db20245683f..350f6b4e95f 100644
--- a/be/src/vec/functions/function_fake.cpp
+++ b/be/src/vec/functions/function_fake.cpp
@@ -90,7 +90,11 @@ struct FunctionExplodeV2 {
}
}
- return
make_nullable(std::make_shared<vectorized::DataTypeStruct>(fieldTypes));
+ if (fieldTypes.size() > 1) {
+ return
make_nullable(std::make_shared<vectorized::DataTypeStruct>(fieldTypes));
+ } else {
+ return make_nullable(fieldTypes[0]);
+ }
}
static DataTypes get_variadic_argument_types() { return {}; }
static std::string get_error_msg() { return "Fake function do not support
execute"; }
diff --git a/be/test/vec/function/table_function_test.cpp
b/be/test/vec/function/table_function_test.cpp
index fb25913e304..7ef2e9f56af 100644
--- a/be/test/vec/function/table_function_test.cpp
+++ b/be/test/vec/function/table_function_test.cpp
@@ -132,11 +132,10 @@ TEST_F(TableFunctionTest, vexplode_outer_v2) {
TestArray vec = {Int32(1), Null(), Int32(2), Int32(3)};
InputDataSet input_set = {{AnyType {vec}}, {Null()}, {AnyType
{TestArray {}}}};
- InputTypeSet output_types = {PrimitiveType::TYPE_STRUCT,
PrimitiveType::TYPE_INT};
+ InputTypeSet output_types = {PrimitiveType::TYPE_INT};
- InputDataSet output_set = {{{TestArray {Int32(1)}}}, {{TestArray
{Null()}}},
- {{TestArray {Int32(2)}}}, {{TestArray
{Int32(3)}}},
- {{TestArray {Null()}}}, {{TestArray
{Null()}}}};
+ InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)},
+ {Int32(3)}, {Null()}, {Null()}};
check_vec_table_function(&explode_outer, input_types, input_set,
output_types, output_set);
}
@@ -147,29 +146,26 @@ TEST_F(TableFunctionTest, vexplode_outer_v2) {
TestArray vec = {std::string("abc"), std::string(""),
std::string("def")};
InputDataSet input_set = {{Null()}, {AnyType {TestArray {}}}, {AnyType
{vec}}};
- InputTypeSet output_types = {PrimitiveType::TYPE_STRUCT,
PrimitiveType::TYPE_VARCHAR};
+ InputTypeSet output_types = {PrimitiveType::TYPE_VARCHAR};
- InputDataSet output_set = {{{TestArray {Null()}}},
- {{TestArray {Null()}}},
- {{TestArray {std::string("abc")}}},
- {{TestArray {std::string("")}}},
- {{TestArray {std::string("def")}}}};
+ InputDataSet output_set = {
+ {Null()}, {Null()}, {std::string("abc")}, {std::string("")},
{std::string("def")}};
check_vec_table_function(&explode_outer, input_types, input_set,
output_types, output_set);
}
- // explode_outer(Array<Decimal>)
+ // // explode_outer(Array<Decimal>)
{
InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY,
PrimitiveType::TYPE_DECIMALV2};
TestArray vec = {ut_type::DECIMALV2(17014116.67),
ut_type::DECIMALV2(-17014116.67)};
InputDataSet input_set = {{Null()}, {AnyType {TestArray {}}}, {AnyType
{vec}}};
- InputTypeSet output_types = {PrimitiveType::TYPE_STRUCT,
PrimitiveType::TYPE_DECIMALV2};
+ InputTypeSet output_types = {PrimitiveType::TYPE_DECIMALV2};
- InputDataSet output_set = {{{TestArray {Null()}}},
- {{TestArray {Null()}}},
- {{TestArray
{ut_type::DECIMALV2(17014116.67)}}},
- {{TestArray
{ut_type::DECIMALV2(-17014116.67)}}}};
+ InputDataSet output_set = {{Null()},
+ {Null()},
+ {ut_type::DECIMALV2(17014116.67)},
+ {ut_type::DECIMALV2(-17014116.67)}};
check_vec_table_function(&explode_outer, input_types, input_set,
output_types, output_set);
}
@@ -230,11 +226,8 @@ TEST_F(TableFunctionTest, vexplode_v2) {
TestArray vec = {Int32(1), Null(), Int32(2), Int32(3)};
InputDataSet input_set = {{AnyType {vec}}, {Null()}, {AnyType
{TestArray {}}}};
- InputTypeSet output_types = {PrimitiveType::TYPE_STRUCT,
PrimitiveType::TYPE_INT};
- InputDataSet output_set = {{{TestArray {Int32(1)}}},
- {{TestArray {Null()}}},
- {{TestArray {Int32(2)}}},
- {{TestArray {Int32(3)}}}};
+ InputTypeSet output_types = {PrimitiveType::TYPE_INT};
+ InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)},
{Int32(3)}};
check_vec_table_function(&explode, input_types, input_set,
output_types, output_set);
}
@@ -245,11 +238,9 @@ TEST_F(TableFunctionTest, vexplode_v2) {
TestArray vec = {std::string("abc"), std::string(""),
std::string("def")};
InputDataSet input_set = {{Null()}, {AnyType {TestArray {}}}, {AnyType
{vec}}};
- InputTypeSet output_types = {PrimitiveType::TYPE_STRUCT,
PrimitiveType::TYPE_VARCHAR};
+ InputTypeSet output_types = {PrimitiveType::TYPE_VARCHAR};
- InputDataSet output_set = {{{TestArray {std::string("abc")}}},
- {{TestArray {std::string("")}}},
- {{TestArray {std::string("def")}}}};
+ InputDataSet output_set = {{std::string("abc")}, {std::string("")},
{std::string("def")}};
check_vec_table_function(&explode, input_types, input_set,
output_types, output_set);
}
@@ -259,10 +250,9 @@ TEST_F(TableFunctionTest, vexplode_v2) {
TestArray vec = {Null(), std::string("2022-01-02")};
InputDataSet input_set = {{Null()}, {AnyType {TestArray {}}}, {AnyType
{vec}}};
- InputTypeSet output_types = {PrimitiveType::TYPE_STRUCT,
PrimitiveType::TYPE_DATE};
+ InputTypeSet output_types = {PrimitiveType::TYPE_DATE};
- InputDataSet output_set = {{{TestArray {Null()}}},
- {{TestArray {std::string("2022-01-02")}}}};
+ InputDataSet output_set = {{Null()}, {std::string("2022-01-02")}};
check_vec_table_function(&explode, input_types, input_set,
output_types, output_set);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
index 20d8e981af1..b41968450ec 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
@@ -294,14 +294,20 @@ public class BindExpression implements
AnalysisRuleFactory {
// 2. the expandColumnsAlias is empty, we should use origin
boundSlot
if (generate.getExpandColumnAlias() != null && i <
generate.getExpandColumnAlias().size()
&&
!CollectionUtils.isEmpty(generate.getExpandColumnAlias().get(i))) {
- // if the alias is not empty, we should bind it with
struct_element as child expr with alias
- // struct_element(#expand_col#k, #k) as #k
- // struct_element(#expand_col#v, #v) as #v
- List<StructField> fields = ((StructType)
boundSlot.getDataType()).getFields();
- for (int idx = 0; idx < fields.size(); ++idx) {
- expandAlias.add(new Alias(new StructElement(
- boundSlot, new
StringLiteral(fields.get(idx).getName())),
- generate.getExpandColumnAlias().get(i).get(idx),
+ if (boundSlot.getDataType() instanceof StructType
+ && generate.getExpandColumnAlias().get(i).size() > 1) {
+ // if the alias is not empty, we should bind it with
struct_element as child expr with alias
+ // struct_element(#expand_col#k, #k) as #k
+ // struct_element(#expand_col#v, #v) as #v
+ List<StructField> fields = ((StructType)
boundSlot.getDataType()).getFields();
+ for (int idx = 0; idx < fields.size(); ++idx) {
+ expandAlias.add(new Alias(new StructElement(
+ boundSlot, new
StringLiteral(fields.get(idx).getName())),
+
generate.getExpandColumnAlias().get(i).get(idx),
+ slot.getQualifier()));
+ }
+ } else {
+ expandAlias.add(new Alias(boundSlot,
generate.getExpandColumnAlias().get(i).get(0),
slot.getQualifier()));
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java
index 472e6d7461b..c8e23b2a0b7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java
@@ -87,7 +87,12 @@ public class Explode extends TableGeneratingFunction
implements CustomSignature,
SearchSignature.throwCanNotFoundFunctionException(this.getName(),
getArguments());
}
}
- return FunctionSignature.of(new StructType(structFields.build()),
arguments);
+
+ StructType structType = new StructType(structFields.build());
+ if (arguments.size() == 1) {
+ return
FunctionSignature.of(structType.getFields().get(0).getDataType(), arguments);
+ }
+ return FunctionSignature.of(structType, arguments);
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java
index 15b2772e128..4a26ea14fb6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java
@@ -87,7 +87,12 @@ public class ExplodeOuter extends TableGeneratingFunction
implements CustomSigna
SearchSignature.throwCanNotFoundFunctionException(this.getName(),
getArguments());
}
}
- return FunctionSignature.of(new StructType(structFields.build()),
arguments);
+
+ StructType structType = new StructType(structFields.build());
+ if (arguments.size() == 1) {
+ return
FunctionSignature.of(structType.getFields().get(0).getDataType(), arguments);
+ }
+ return FunctionSignature.of(structType, arguments);
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeVariantArray.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeVariantArray.java
index 5ba897c3c33..8b0d29683af 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeVariantArray.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeVariantArray.java
@@ -82,7 +82,12 @@ public class ExplodeVariantArray extends
TableGeneratingFunction implements
SearchSignature.throwCanNotFoundFunctionException(this.getName(),
getArguments());
}
}
- return FunctionSignature.of(new StructType(structFields.build()),
arguments);
+
+ StructType structType = new StructType(structFields.build());
+ if (arguments.size() == 1) {
+ return
FunctionSignature.of(structType.getFields().get(0).getDataType(), arguments);
+ }
+ return FunctionSignature.of(structType, arguments);
}
@Override
diff --git
a/regression-test/data/query_p0/sql_functions/table_function/explode.out
b/regression-test/data/query_p0/sql_functions/table_function/explode.out
index 1a6280bb84f..377a757a429 100644
--- a/regression-test/data/query_p0/sql_functions/table_function/explode.out
+++ b/regression-test/data/query_p0/sql_functions/table_function/explode.out
@@ -747,3 +747,19 @@ j \N 2 cd \N
j \N 4 \N 2
j \N 5 \N 3
+-- !select25 --
+1 {"col1":1, "col2":"a"}
+1 {"col1":2, "col2":"b"}
+
+-- !select26 --
+1 1 a
+1 2 b
+
+-- !select27 --
+1 {"col1":{"col1":1, "col2":"a"}, "col2":{"col1":3, "col2":"c"}}
+1 {"col1":{"col1":2, "col2":"b"}, "col2":{"col1":4, "col2":"d"}}
+
+-- !select28 --
+1 {"col1":1, "col2":"a"} {"col1":3, "col2":"c"}
+1 {"col1":2, "col2":"b"} {"col1":4, "col2":"d"}
+
diff --git
a/regression-test/suites/external_table_p0/hive/test_parquet_nested_types.groovy
b/regression-test/suites/external_table_p0/hive/test_parquet_nested_types.groovy
index 034559aaa78..d94f909142e 100644
---
a/regression-test/suites/external_table_p0/hive/test_parquet_nested_types.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_parquet_nested_types.groovy
@@ -132,11 +132,11 @@ suite("test_parquet_nested_types",
"p0,external,hive,external_docker,external_do
order_qt_nested_cross_page2_parquet_q5 """
SELECT
id,
- STRUCT_ELEMENT(item, 'x'),
- STRUCT_ELEMENT(item, 'y')
+ item_x as x_value,
+ item_y as y_value
FROM nested_cross_page2_parquet
- LATERAL VIEW EXPLODE(array_struct_col) tmp AS item
- WHERE id = 1 AND STRUCT_ELEMENT(item, 'x') > 100
+ LATERAL VIEW EXPLODE(array_struct_col) tmp AS item_x, item_y
+ WHERE id = 1 AND item_x > 100
"""
order_qt_nested_cross_page2_parquet_q6 """
diff --git
a/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy
b/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy
index fb1a9e29e09..51b8dcf4ec6 100644
---
a/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy
+++
b/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy
@@ -213,4 +213,26 @@ suite("explode") {
lateral view explode([], [1, 2, null, 4, 5], ["ab", "cd", "ef"],
[null, null, 1, 2, 3, 4, 5]) t2 as c0, c1, c2, c3
order by 1,2,3,4,5;
"""
+
+ qt_select25 """
+ select * from (select 1) t1 lateral view explode(array(struct(1, "a"),
struct(2, "b"))) t2 as c1 order by 1,2;
+ """
+ qt_select26 """
+ select * from (select 1) t1 lateral view explode(array(struct(1, "a"),
struct(2, "b"))) t2 as c1, c2 order by 1,2,3;
+ """
+
+ qt_select27 """
+ select * from (select 1) t1
+ lateral view explode(
+ array(struct(1, "a"), struct(2, "b")),
+ array(struct(3, "c"), struct(4, "d"))
+ ) t2 as c1 order by 1,2;
+ """
+ qt_select28 """
+ select * from (select 1) t1
+ lateral view explode(
+ array(struct(1, "a"), struct(2, "b")),
+ array(struct(3, "c"), struct(4, "d"))
+ ) t2 as c1, c2 order by 1,2;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]