This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 46f04313212952ae2e8f432cb622457918bae6cd Author: Daniel Becker <[email protected]> AuthorDate: Mon Jan 29 16:11:31 2024 +0100 IMPALA-12763: Union with string struct crashes in ASAN In ASAN builds, if we UNION ALL an array containing a struct of a string with itself, Impala crashes. This is how to reproduce it: In Hive: create table su (arr ARRAY<STRUCT<s: STRING>>) stored as parquet; insert into su values (array(named_struct("s", "A"))); In Impala: select 1, arr from su union all select 2, arr from su; The ASAN error message indicates a heap-use-after-free. Normally, UNIONs of structs are not supported yet (see IMPALA-10752), but if the struct is inside an array it is allowed now. This was probably not intentional and it leads to the above error, so this change disables structs in unions completely, including embedded structs. Testing: - adjusted existing tests - added a query that tests that types with embedded structs are not allowed in a UNION statement, in mixed-collections-and-structs.test Change-Id: Id728f1254b74636be594a33313a478b0b77c7ae4 Reviewed-on: http://gerrit.cloudera.org:8080/20970 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/analysis/Analyzer.java | 8 +++++--- .../main/java/org/apache/impala/catalog/Type.java | 22 ++++++++++++++++++++++ .../apache/impala/analysis/AnalyzeStmtsTest.java | 10 ++++++++-- .../QueryTest/mixed-collections-and-structs.test | 6 ++++++ .../QueryTest/nested-array-in-select-list.test | 2 +- .../QueryTest/nested-map-in-select-list.test | 2 +- .../queries/QueryTest/struct-in-select-list.test | 2 +- 7 files changed, 44 insertions(+), 8 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index f6ce7b2bd..600cc9a8f 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -48,6 +48,7 @@ import org.apache.impala.authorization.PrivilegeRequest; import org.apache.impala.authorization.PrivilegeRequestBuilder; import org.apache.impala.authorization.TableMask; import org.apache.impala.authorization.User; +import org.apache.impala.catalog.ArrayType; import org.apache.impala.catalog.Column; import org.apache.impala.catalog.DatabaseNotFoundException; import org.apache.impala.catalog.FeCatalog; @@ -62,6 +63,7 @@ import org.apache.impala.catalog.FeTable; import org.apache.impala.catalog.FeView; import org.apache.impala.catalog.IcebergTimeTravelTable; import org.apache.impala.catalog.KuduTable; +import org.apache.impala.catalog.MapType; import org.apache.impala.catalog.MaterializedViewHdfsTable; import org.apache.impala.catalog.ScalarType; import org.apache.impala.catalog.StructField; @@ -3400,10 +3402,10 @@ public class Analyzer { // Initialize with type of i-th expr in first list. Type compatibleType = firstList.get(i).getType(); if (firstList.get(i) instanceof SlotRef && - compatibleType.isStructType()) { + compatibleType.containsStruct()) { throw new AnalysisException(String.format( - "Set operations don't support STRUCT type. %s in %s", compatibleType.toSql(), - firstList.get(i).toSql())); + "Set operations don't support STRUCT types or types containing " + + "STRUCT types. %s in %s.", compatibleType.toSql(), firstList.get(i).toSql())); } widestExprs.add(firstList.get(i)); diff --git a/fe/src/main/java/org/apache/impala/catalog/Type.java b/fe/src/main/java/org/apache/impala/catalog/Type.java index f0246d2f9..98c051da8 100644 --- a/fe/src/main/java/org/apache/impala/catalog/Type.java +++ b/fe/src/main/java/org/apache/impala/catalog/Type.java @@ -243,6 +243,28 @@ public abstract class Type { return this instanceof CollectionStructType; } + /** + * Returns true if this type + * - is a struct type or + * - contains a struct type (recursively); for example + * ARRAY<STRUCT<i: INT>>. + */ + public boolean containsStruct() { + if (isStructType()) return true; + + if (isArrayType()) { + ArrayType arrayType = (ArrayType) this; + return arrayType.getItemType().containsStruct(); + } else if (isMapType()) { + MapType mapType = (MapType) this; + return mapType.getKeyType().containsStruct() || + mapType.getValueType().containsStruct(); + } + + return false; + } + + /** * Returns true if this type * - is a collection type or diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java index fd0dbc043..6da0a5b45 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java @@ -1016,7 +1016,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest { // Empty star expansion, but non empty result exprs. AnalyzesOk("select 1, * from only_complex_types"); - // Struct in select list works only if codegen is OFF. + // Struct in select list. AnalysisContext ctx = createAnalysisCtx(); AnalyzesOk("select alltypes from functional_orc_def.complextypes_structs", ctx); AnalyzesOk("select int_array_col from functional.allcomplextypes"); @@ -1030,7 +1030,13 @@ public class AnalyzeStmtsTest extends AnalyzerTest { "collection 'int_array_col' of type 'ARRAY<INT>'"); AnalysisError("select tiny_struct from functional_orc_def.complextypes_structs " + "union all select tiny_struct from functional_orc_def.complextypes_structs", ctx, - "Set operations don't support STRUCT type. STRUCT<b:BOOLEAN> in tiny_struct"); + "Set operations don't support STRUCT types or types containing STRUCT types." + + " STRUCT<b:BOOLEAN> in tiny_struct"); + AnalysisError("select all_mix from functional_parquet.collection_struct_mix " + + "union all select all_mix from functional_parquet.collection_struct_mix", ctx, + "Set operations don't support STRUCT types or types containing STRUCT types. " + + "MAP<INT,STRUCT<big:STRUCT<arr:ARRAY<STRUCT<inner_arr:ARRAY<ARRAY<INT>>," + + "m:TIMESTAMP>>,n:INT>,small:STRUCT<str:STRING,i:INT>>> in all_mix."); AnalyzesOk("select 1 from " + "(select int_array_col from functional.allcomplextypes) v"); AnalyzesOk("select int_array_col from " + diff --git a/testdata/workloads/functional-query/queries/QueryTest/mixed-collections-and-structs.test b/testdata/workloads/functional-query/queries/QueryTest/mixed-collections-and-structs.test index 56175127a..732aac87c 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/mixed-collections-and-structs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/mixed-collections-and-structs.test @@ -549,3 +549,9 @@ from collection_struct_mix, collection_struct_mix.arr_contains_nested_struct arr ---- TYPES INT,STRING,SMALLINT ==== +---- QUERY +# Union of types containing structs are not allowed. +select all_mix from collection_struct_mix union all select all_mix from collection_struct_mix +---- CATCH +AnalysisException: Set operations don't support STRUCT types or types containing STRUCT types. MAP<INT,STRUCT<big:STRUCT<arr:ARRAY<STRUCT<inner_arr:ARRAY<ARRAY<INT>>,m:TIMESTAMP>>,n:INT>,small:STRUCT<str:STRING,i:INT>>> in all_mix. +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test index 4d4abc5ab..13dae41ea 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test @@ -133,7 +133,7 @@ tinyint,string,string select 1, struct_contains_arr, struct_contains_nested_arr, all_mix from collection_struct_mix union all select 2, struct_contains_arr, struct_contains_nested_arr, all_mix from collection_struct_mix ---- CATCH -AnalysisException: Set operations don't support STRUCT type. STRUCT<arr:ARRAY<INT>> in struct_contains_arr +AnalysisException: Set operations don't support STRUCT types or types containing STRUCT types. STRUCT<arr:ARRAY<INT>> in struct_contains_arr. ==== ---- QUERY select 1 from (select int_array from complextypestbl) s diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test index aedebd289..e21747c08 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test @@ -193,7 +193,7 @@ tinyint,string,string select 1, struct_contains_map, all_mix from collection_struct_mix union all select 2, struct_contains_map, all_mix from collection_struct_mix ---- CATCH -AnalysisException: Set operations don't support STRUCT type. STRUCT<m:MAP<INT,STRING>> in struct_contains_map +AnalysisException: Set operations don't support STRUCT types or types containing STRUCT types. STRUCT<m:MAP<INT,STRING>> in struct_contains_map. ==== ---- QUERY select 1 from (select int_map from complextypestbl) s diff --git a/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test index 082892649..546d1c527 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test +++ b/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test @@ -603,7 +603,7 @@ select id, tiny_struct from complextypes_structs union all select id, tiny_struct from complextypes_structs; ---- CATCH -AnalysisException: Set operations don't support STRUCT type. STRUCT<b:BOOLEAN> in tiny_struct +AnalysisException: Set operations don't support STRUCT types or types containing STRUCT types. STRUCT<b:BOOLEAN> in tiny_struct. ==== ---- QUERY # Ordering by struct column is not supported.
