This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new c58034244b0 branch-3.1: [opt](function) Enhance the handling of a
single struct-type argument in the EXPLODE function #57050 (#57827)
c58034244b0 is described below
commit c58034244b02ffb358f31fa6afa1b1a5eda89d2d
Author: Jerry Hu <[email protected]>
AuthorDate: Mon Nov 10 14:18:41 2025 +0800
branch-3.1: [opt](function) Enhance the handling of a single struct-type
argument in the EXPLODE function #57050 (#57827)
Cherry-picked from #57050
Co-authored-by: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
---
be/src/vec/exprs/table_function/vexplode_v2.cpp | 73 ++++++++++++++--------
be/src/vec/functions/function_fake.cpp | 6 +-
be/test/vec/function/table_function_test.cpp | 45 ++++++-------
.../nereids/rules/analysis/BindExpression.java | 22 ++++---
.../expressions/functions/generator/Explode.java | 7 ++-
.../functions/generator/ExplodeOuter.java | 7 ++-
.../functions/generator/ExplodeVariantArray.java | 7 ++-
.../sql_functions/table_function/explode.out | 16 +++++
.../hive/test_parquet_nested_types.groovy | 8 +--
.../sql_functions/table_function/explode.groovy | 22 +++++++
10 files changed, 144 insertions(+), 69 deletions(-)
diff --git a/be/src/vec/exprs/table_function/vexplode_v2.cpp
b/be/src/vec/exprs/table_function/vexplode_v2.cpp
index 02864c58115..bc1740c4a23 100644
--- a/be/src/vec/exprs/table_function/vexplode_v2.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_v2.cpp
@@ -26,6 +26,7 @@
#include "vec/columns/column_array.h"
#include "vec/columns/column_nothing.h"
#include "vec/columns/column_object.h"
+#include "vec/common/assert_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/data_types/data_type.h"
@@ -134,24 +135,35 @@ void
VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int
return;
}
ColumnStruct* struct_column = nullptr;
- if (_is_nullable) {
- auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
- struct_column =
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
- auto* nullmap_column =
-
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
- nullmap_column->insert_many_defaults(length);
+ std::vector<IColumn*> columns;
+
+ const bool multi_sub_columns = _multi_detail.size() > 1;
+
+ if (multi_sub_columns) {
+ if (_is_nullable) {
+ auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
+ struct_column =
+
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
+ auto* nullmap_column =
+
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
+ nullmap_column->insert_many_defaults(length);
+
+ } else {
+ struct_column = assert_cast<ColumnStruct*>(column.get());
+ }
+
+ for (size_t i = 0; i != _multi_detail.size(); ++i) {
+ columns.emplace_back(&struct_column->get_column(i));
+ }
} else {
- struct_column = assert_cast<ColumnStruct*>(column.get());
- }
- if (!struct_column) {
- throw Exception(ErrorCode::INTERNAL_ERROR,
- "Only multiple columns can be returned within a
struct.");
+ columns.push_back(column.get());
}
for (int i = 0; i < _multi_detail.size(); i++) {
auto& detail = _multi_detail[i];
size_t pos = _array_offsets[i] + _cur_offset;
size_t element_size = _multi_detail[i].array_col->size_at(_row_idx);
- auto& struct_field = struct_column->get_column(i);
+
+ auto& struct_field = *columns.at(i);
if ((detail.array_nullmap_data &&
detail.array_nullmap_data[_row_idx])) {
struct_field.insert_many_defaults(length);
} else {
@@ -173,31 +185,40 @@ void
VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int
int VExplodeV2TableFunction::get_value(MutableColumnPtr& column, int max_step)
{
max_step = std::min(max_step, (int)(_cur_size - _cur_offset));
+ const bool multi_sub_columns = _multi_detail.size() > 1;
+
+ ColumnStruct* struct_column = nullptr;
+ std::vector<IColumn*> columns;
+
if (current_empty()) {
column->insert_default();
max_step = 1;
} else {
- ColumnStruct* struct_column = nullptr;
- if (_is_nullable) {
- auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
- struct_column =
-
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
- auto* nullmap_column =
-
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
- nullmap_column->insert_many_defaults(max_step);
+ if (multi_sub_columns) {
+ if (_is_nullable) {
+ auto* nullable_column =
assert_cast<ColumnNullable*>(column.get());
+ struct_column =
+
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
+ auto* nullmap_column =
+
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
+ nullmap_column->insert_many_defaults(max_step);
+
+ } else {
+ struct_column = assert_cast<ColumnStruct*>(column.get());
+ }
+ for (size_t i = 0; i != _multi_detail.size(); ++i) {
+ columns.emplace_back(&struct_column->get_column(i));
+ }
} else {
- struct_column = assert_cast<ColumnStruct*>(column.get());
- }
- if (!struct_column) {
- throw Exception(ErrorCode::INTERNAL_ERROR,
- "Only multiple columns can be returned within a
struct.");
+ columns.emplace_back(column.get());
}
for (int i = 0; i < _multi_detail.size(); i++) {
auto& detail = _multi_detail[i];
size_t pos = _array_offsets[i] + _cur_offset;
size_t element_size =
_multi_detail[i].array_col->size_at(_row_idx);
- auto& struct_field = struct_column->get_column(i);
+
+ auto& struct_field = *columns.at(i);
if (detail.array_nullmap_data &&
detail.array_nullmap_data[_row_idx]) {
struct_field.insert_many_defaults(max_step);
} else {
diff --git a/be/src/vec/functions/function_fake.cpp
b/be/src/vec/functions/function_fake.cpp
index 70828fa4a68..f41075d3360 100644
--- a/be/src/vec/functions/function_fake.cpp
+++ b/be/src/vec/functions/function_fake.cpp
@@ -89,7 +89,11 @@ struct FunctionExplodeV2 {
}
}
- return
make_nullable(std::make_shared<vectorized::DataTypeStruct>(fieldTypes));
+ if (fieldTypes.size() > 1) {
+ return
make_nullable(std::make_shared<vectorized::DataTypeStruct>(fieldTypes));
+ } else {
+ return make_nullable(fieldTypes[0]);
+ }
}
static DataTypes get_variadic_argument_types() { return {}; }
static std::string get_error_msg() { return "Fake function do not support
execute"; }
diff --git a/be/test/vec/function/table_function_test.cpp
b/be/test/vec/function/table_function_test.cpp
index 8703a8feb02..17fc5ba187a 100644
--- a/be/test/vec/function/table_function_test.cpp
+++ b/be/test/vec/function/table_function_test.cpp
@@ -136,11 +136,10 @@ TEST_F(TableFunctionTest, vexplode_outer_v2) {
Array vec = {Int32(1), Null(), Int32(2), Int32(3)};
InputDataSet input_set = {{vec}, {Null()}, {Array()}};
- InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Int32};
+ InputTypeSet output_types = {TypeIndex::Int32};
- InputDataSet output_set = {{{CellSet {Int32(1)}}}, {{CellSet
{Null()}}},
- {{CellSet {Int32(2)}}}, {{CellSet
{Int32(3)}}},
- {{CellSet {Null()}}}, {{CellSet
{Null()}}}};
+ InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)},
+ {Int32(3)}, {Null()}, {Null()}};
check_vec_table_function(&explode_outer, input_types, input_set,
output_types, output_set);
}
@@ -151,29 +150,26 @@ TEST_F(TableFunctionTest, vexplode_outer_v2) {
Array vec = {Field(std::string("abc")), Field(std::string("")),
Field(std::string("def"))};
InputDataSet input_set = {{Null()}, {Array()}, {vec}};
- InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String};
+ InputTypeSet output_types = {TypeIndex::String};
- InputDataSet output_set = {{{CellSet {Null()}}},
- {{CellSet {Null()}}},
- {{CellSet {std::string("abc")}}},
- {{CellSet {std::string("")}}},
- {{CellSet {std::string("def")}}}};
+ InputDataSet output_set = {
+ {Null()}, {Null()}, {std::string("abc")}, {std::string("")},
{std::string("def")}};
check_vec_table_function(&explode_outer, input_types, input_set,
output_types, output_set);
}
- // explode_outer(Array<Decimal>)
+ // // explode_outer(Array<Decimal>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128V2};
Array vec = {ut_type::DECIMALFIELD(17014116.67),
ut_type::DECIMALFIELD(-17014116.67)};
InputDataSet input_set = {{Null()}, {Array()}, {vec}};
- InputTypeSet output_types = {TypeIndex::Struct,
TypeIndex::Decimal128V2};
+ InputTypeSet output_types = {TypeIndex::Decimal128V2};
- InputDataSet output_set = {{{CellSet {Null()}}},
- {{CellSet {Null()}}},
- {{CellSet
{ut_type::DECIMALV2(17014116.67)}}},
- {{CellSet
{ut_type::DECIMALV2(-17014116.67)}}}};
+ InputDataSet output_set = {{Null()},
+ {Null()},
+ {ut_type::DECIMALV2(17014116.67)},
+ {ut_type::DECIMALV2(-17014116.67)}};
check_vec_table_function(&explode_outer, input_types, input_set,
output_types, output_set);
}
@@ -234,11 +230,8 @@ TEST_F(TableFunctionTest, vexplode_v2) {
Array vec = {Int32(1), Null(), Int32(2), Int32(3)};
InputDataSet input_set = {{vec}, {Null()}, {Array()}};
- InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Int32};
- InputDataSet output_set = {{{CellSet {Int32(1)}}},
- {{CellSet {Null()}}},
- {{CellSet {Int32(2)}}},
- {{CellSet {Int32(3)}}}};
+ InputTypeSet output_types = {TypeIndex::Int32};
+ InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)},
{Int32(3)}};
check_vec_table_function(&explode, input_types, input_set,
output_types, output_set);
}
@@ -249,11 +242,9 @@ TEST_F(TableFunctionTest, vexplode_v2) {
Array vec = {Field(std::string("abc")), Field(std::string("")),
Field(std::string("def"))};
InputDataSet input_set = {{Null()}, {Array()}, {vec}};
- InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String};
+ InputTypeSet output_types = {TypeIndex::String};
- InputDataSet output_set = {{{CellSet {std::string("abc")}}},
- {{CellSet {std::string("")}}},
- {{CellSet {std::string("def")}}}};
+ InputDataSet output_set = {{std::string("abc")}, {std::string("")},
{std::string("def")}};
check_vec_table_function(&explode, input_types, input_set,
output_types, output_set);
}
@@ -263,9 +254,9 @@ TEST_F(TableFunctionTest, vexplode_v2) {
Array vec = {Null(), str_to_date_time("2022-01-02", false)};
InputDataSet input_set = {{Null()}, {Array()}, {vec}};
- InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Date};
+ InputTypeSet output_types = {TypeIndex::Date};
- InputDataSet output_set = {{{CellSet {Null()}}}, {{CellSet
{std::string("2022-01-02")}}}};
+ InputDataSet output_set = {{Null()}, {std::string("2022-01-02")}};
check_vec_table_function(&explode, input_types, input_set,
output_types, output_set);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
index c7780c3bd7b..8ce5cc27096 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
@@ -267,14 +267,20 @@ public class BindExpression implements
AnalysisRuleFactory {
// 2. the expandColumnsAlias is empty, we should use origin
boundSlot
if (generate.getExpandColumnAlias() != null && i <
generate.getExpandColumnAlias().size()
&&
!CollectionUtils.isEmpty(generate.getExpandColumnAlias().get(i))) {
- // if the alias is not empty, we should bind it with
struct_element as child expr with alias
- // struct_element(#expand_col#k, #k) as #k
- // struct_element(#expand_col#v, #v) as #v
- List<StructField> fields = ((StructType)
boundSlot.getDataType()).getFields();
- for (int idx = 0; idx < fields.size(); ++idx) {
- expandAlias.add(new Alias(new StructElement(
- boundSlot, new
StringLiteral(fields.get(idx).getName())),
- generate.getExpandColumnAlias().get(i).get(idx),
+ if (boundSlot.getDataType() instanceof StructType
+ && generate.getExpandColumnAlias().get(i).size() > 1) {
+ // if the alias is not empty, we should bind it with
struct_element as child expr with alias
+ // struct_element(#expand_col#k, #k) as #k
+ // struct_element(#expand_col#v, #v) as #v
+ List<StructField> fields = ((StructType)
boundSlot.getDataType()).getFields();
+ for (int idx = 0; idx < fields.size(); ++idx) {
+ expandAlias.add(new Alias(new StructElement(
+ boundSlot, new
StringLiteral(fields.get(idx).getName())),
+
generate.getExpandColumnAlias().get(i).get(idx),
+ slot.getQualifier()));
+ }
+ } else {
+ expandAlias.add(new Alias(boundSlot,
generate.getExpandColumnAlias().get(i).get(0),
slot.getQualifier()));
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java
index 22b80c7f6d4..fae5e4c8641 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java
@@ -82,7 +82,12 @@ public class Explode extends TableGeneratingFunction
implements CustomSignature,
SearchSignature.throwCanNotFoundFunctionException(this.getName(),
getArguments());
}
}
- return FunctionSignature.of(new StructType(structFields.build()),
arguments);
+
+ StructType structType = new StructType(structFields.build());
+ if (arguments.size() == 1) {
+ return
FunctionSignature.of(structType.getFields().get(0).getDataType(), arguments);
+ }
+ return FunctionSignature.of(structType, arguments);
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java
index cbcc2eaa1c1..a57da72c21f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java
@@ -82,7 +82,12 @@ public class ExplodeOuter extends TableGeneratingFunction
implements CustomSigna
SearchSignature.throwCanNotFoundFunctionException(this.getName(),
getArguments());
}
}
- return FunctionSignature.of(new StructType(structFields.build()),
arguments);
+
+ StructType structType = new StructType(structFields.build());
+ if (arguments.size() == 1) {
+ return
FunctionSignature.of(structType.getFields().get(0).getDataType(), arguments);
+ }
+ return FunctionSignature.of(structType, arguments);
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeVariantArray.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeVariantArray.java
index 3dd0dcabe5a..de37705045f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeVariantArray.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeVariantArray.java
@@ -81,7 +81,12 @@ public class ExplodeVariantArray extends
TableGeneratingFunction implements
SearchSignature.throwCanNotFoundFunctionException(this.getName(),
getArguments());
}
}
- return FunctionSignature.of(new StructType(structFields.build()),
arguments);
+
+ StructType structType = new StructType(structFields.build());
+ if (arguments.size() == 1) {
+ return
FunctionSignature.of(structType.getFields().get(0).getDataType(), arguments);
+ }
+ return FunctionSignature.of(structType, arguments);
}
@Override
diff --git
a/regression-test/data/query_p0/sql_functions/table_function/explode.out
b/regression-test/data/query_p0/sql_functions/table_function/explode.out
index 1a6280bb84f..377a757a429 100644
--- a/regression-test/data/query_p0/sql_functions/table_function/explode.out
+++ b/regression-test/data/query_p0/sql_functions/table_function/explode.out
@@ -747,3 +747,19 @@ j \N 2 cd \N
j \N 4 \N 2
j \N 5 \N 3
+-- !select25 --
+1 {"col1":1, "col2":"a"}
+1 {"col1":2, "col2":"b"}
+
+-- !select26 --
+1 1 a
+1 2 b
+
+-- !select27 --
+1 {"col1":{"col1":1, "col2":"a"}, "col2":{"col1":3, "col2":"c"}}
+1 {"col1":{"col1":2, "col2":"b"}, "col2":{"col1":4, "col2":"d"}}
+
+-- !select28 --
+1 {"col1":1, "col2":"a"} {"col1":3, "col2":"c"}
+1 {"col1":2, "col2":"b"} {"col1":4, "col2":"d"}
+
diff --git
a/regression-test/suites/external_table_p0/hive/test_parquet_nested_types.groovy
b/regression-test/suites/external_table_p0/hive/test_parquet_nested_types.groovy
index 034559aaa78..d94f909142e 100644
---
a/regression-test/suites/external_table_p0/hive/test_parquet_nested_types.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_parquet_nested_types.groovy
@@ -132,11 +132,11 @@ suite("test_parquet_nested_types",
"p0,external,hive,external_docker,external_do
order_qt_nested_cross_page2_parquet_q5 """
SELECT
id,
- STRUCT_ELEMENT(item, 'x'),
- STRUCT_ELEMENT(item, 'y')
+ item_x as x_value,
+ item_y as y_value
FROM nested_cross_page2_parquet
- LATERAL VIEW EXPLODE(array_struct_col) tmp AS item
- WHERE id = 1 AND STRUCT_ELEMENT(item, 'x') > 100
+ LATERAL VIEW EXPLODE(array_struct_col) tmp AS item_x, item_y
+ WHERE id = 1 AND item_x > 100
"""
order_qt_nested_cross_page2_parquet_q6 """
diff --git
a/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy
b/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy
index 88f7a85023f..9211f9e9e56 100644
---
a/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy
+++
b/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy
@@ -214,4 +214,26 @@ suite("explode") {
lateral view explode([], [1, 2, null, 4, 5], ["ab", "cd", "ef"],
[null, null, 1, 2, 3, 4, 5]) t2 as c0, c1, c2, c3
order by 1,2,3,4,5;
"""
+
+ qt_select25 """
+ select * from (select 1) t1 lateral view explode(array(struct(1, "a"),
struct(2, "b"))) t2 as c1 order by 1, struct_element(c1, 1);
+ """
+ qt_select26 """
+ select * from (select 1) t1 lateral view explode(array(struct(1, "a"),
struct(2, "b"))) t2 as c1, c2 order by 1, 2, 3;
+ """
+
+ qt_select27 """
+ select * from (select 1) t1
+ lateral view explode(
+ array(struct(1, "a"), struct(2, "b")),
+ array(struct(3, "c"), struct(4, "d"))
+ ) t2 as c1 order by 1, struct_element(struct_element(c1, 1), 1),
struct_element(struct_element(c1, 2), 1);
+ """
+ qt_select28 """
+ select * from (select 1) t1
+ lateral view explode(
+ array(struct(1, "a"), struct(2, "b")),
+ array(struct(3, "c"), struct(4, "d"))
+ ) t2 as c1, c2 order by 1, struct_element(c1, 1),
struct_element(c2, 1);
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]