This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4c28ab02c658b559c47cf9993448218e228d61c9 Author: Daniel Becker <[email protected]> AuthorDate: Tue Dec 5 15:00:15 2023 +0100 IMPALA-12001: Informative error message for complex types with DISTINCT Before this change, queries with SELECT DISTINCT on a complex type failed. With structs, we got a FE exception: use functional_parquet; select distinct(struct_val) from alltypes_structs; ERROR: IllegalStateException: null With collections, the BE hits a DCHECK and crashes: use functional_parquet; select distinct(arr1) from complextypes_arrays; Socket error 104: [Errno 104] Connection reset by peer Aggregate functions with complex DISTINCT parameters also failed without a clear error message. For example: select count(distinct struct_val) from alltypes_structs; select count(distinct arr1) from complextypes_arrays; To support DISTINCT for complex types we would need to implement equality and hash for them. We are not planning to do it in the near future, so this change introduces informative error messages in these cases. Testing: - added test queries for SELECT DISTINCT and SELECT COUNT(DISTINCT ...) with arrays, maps and structs, expecting the correct error messages. Change-Id: Ibe2642d1683a10fd05a95e2ad8470d16f0d5242c Reviewed-on: http://gerrit.cloudera.org:8080/20752 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../org/apache/impala/analysis/MultiAggregateInfo.java | 16 ++++++++++++++++ .../java/org/apache/impala/analysis/SelectStmt.java | 6 ++++++ .../queries/QueryTest/nested-array-in-select-list.test | 12 +++++++++++- .../queries/QueryTest/nested-map-in-select-list.test | 18 ++++++++++++++---- .../QueryTest/nested-struct-in-select-list.test | 13 +++++++++++-- 5 files changed, 58 insertions(+), 7 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/MultiAggregateInfo.java b/fe/src/main/java/org/apache/impala/analysis/MultiAggregateInfo.java index 48cb37f96..24c12fda9 100644 --- a/fe/src/main/java/org/apache/impala/analysis/MultiAggregateInfo.java +++ b/fe/src/main/java/org/apache/impala/analysis/MultiAggregateInfo.java @@ -249,6 +249,10 @@ public class MultiAggregateInfo { groupingBuiltinExprs.add(aggExpr); } else if (aggExpr.isDistinct()) { List<Expr> children = AggregateFunction.getCanonicalDistinctAggChildren(aggExpr); + + // Complex types are not supported as DISTINCT parameters of aggregate functions. + checkComplexDistinctParams(aggExpr, children); + int groupIdx = distinctExprs.indexOf(children); List<FunctionCallExpr> groupAggFns; if (groupIdx == -1) { @@ -342,6 +346,18 @@ public class MultiAggregateInfo { } } + private static void checkComplexDistinctParams(FunctionCallExpr aggExpr, + List<Expr> params) throws AnalysisException { + for (Expr child : params) { + if (child.getType().isComplexType()) { + throw new AnalysisException("Complex types are not supported " + + "as DISTINCT parameters of aggregate functions. Distinct parameter: '" + + child.toSql() + "', type: '" + child.getType().toSql() + + "' in aggregate function '" + aggExpr.toSql() + "'."); + } + } + } + /** * Implementation of analyze() for aggregation with grouping sets. * Does not handle distinct aggregate functions yet. diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java index 25a79c189..48c123759 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java @@ -562,6 +562,12 @@ public class SelectStmt extends QueryStmt { } for (Expr expr: resultExprs_) { + if (selectList_.isDistinct() && expr.getType().isComplexType()) { + throw new AnalysisException("Complex types are not supported " + + "in SELECT DISTINCT clauses. Expr: '" + expr.toSql() + "', type: '" + + expr.getType().toSql() + "'."); + } + if (expr.getType().isArrayType()) { ArrayType arrayType = (ArrayType) expr.getType(); if (!arrayType.getItemType().isSupported()) { diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test index 5c84778bd..4d4abc5ab 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test @@ -425,4 +425,14 @@ on a.id = b.id where a.id < 3; 0,'NULL','NULL' ---- TYPES INT,STRING,STRING -===== +==== +---- QUERY +select distinct arr1 from complextypes_arrays +---- CATCH +AnalysisException: Complex types are not supported in SELECT DISTINCT clauses. +==== +---- QUERY +select count(distinct arr1) from complextypes_arrays +---- CATCH +AnalysisException: Complex types are not supported as DISTINCT parameters of aggregate functions. +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test index 337dfbd55..aedebd289 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test @@ -392,7 +392,7 @@ select id, map_1d, map_2d, map_3d, arr_int_3d, map_map_array from collection_tbl 3,'{645:"fourth even-toed ungulate",5:"fifth"}','{1:{10:"ten",20:"twentieth even-toed ungulate"},2:{30:"thirty even-toed ungulates",40:"forty"}}','{1:{10:{100:"hundred",200:"two hundred even-toed ungulates"},20:{300:"three hundred even-toed ungulates",400:"four hundred"}},2:{30:{500:"five hundred even-toed ungulates",600:"six hundred"},40:{700:"seven hundred even-toed ungulates",800:"eight hundred"}}}','[[[1,null,2,null],[null,15]],[[null,4]]]','{1:{10:[100,200],20:[300,400]},2:{30:[500, [...] ---- TYPES INT,STRING,STRING,STRING,STRING,STRING -===== +==== ---- QUERY select id, map_1d, map_2d, mma.value mma_value, ma.value ma_value from collection_tbl c, c.map_map_array mma, mma.value ma; @@ -411,7 +411,7 @@ from collection_tbl c, c.map_map_array mma, mma.value ma; 3,'{645:"fourth even-toed ungulate",5:"fifth"}','{1:{10:"ten",20:"twentieth even-toed ungulate"},2:{30:"thirty even-toed ungulates",40:"forty"}}','{30:[500,600],40:[700,800]}','[700,800]' ---- TYPES INT,STRING,STRING,STRING,STRING -===== +==== ---- QUERY -- Test that map keys are printed correctly. set CONVERT_LEGACY_HIVE_PARQUET_UTC_TIMESTAMPS=1; @@ -435,7 +435,7 @@ from collection_tbl; '{true:"true even-toed ungulate",false:"false"}','{-1:"a nice even-toed ungulate",0:"best even-toed ungulate",1:"c"}','{-1:"a nice even-toed ungulate",0:"best even-toed ungulate",1:"c"}','{-1:"a nice even-toed ungulate",0:"best even-toed ungulate",1:"c"}','{-1.5:"a nice even-toed ungulate",0.25:"best even-toed ungulate",1.75:"c"}','{-1.5:"a nice even-toed ungulate",0.25:"best even-toed ungulate",1.75:"c"}','{-1.8:"a nice even-toed ungulate",0.2:"best even-toed ungulate",1.2:"c"}','{"one" [...] ---- TYPES STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING -===== +==== ---- QUERY -- Test that map keys are printed correctly with STRINGIFY_MAP_KEYS=true. set CONVERT_LEGACY_HIVE_PARQUET_UTC_TIMESTAMPS=1; @@ -460,4 +460,14 @@ from collection_tbl; '{"true":"true even-toed ungulate","false":"false"}','{"-1":"a nice even-toed ungulate","0":"best even-toed ungulate","1":"c"}','{"-1":"a nice even-toed ungulate","0":"best even-toed ungulate","1":"c"}','{"-1":"a nice even-toed ungulate","0":"best even-toed ungulate","1":"c"}','{"-1.5":"a nice even-toed ungulate","0.25":"best even-toed ungulate","1.75":"c"}','{"-1.5":"a nice even-toed ungulate","0.25":"best even-toed ungulate","1.75":"c"}','{"-1.8":"a nice even-toed ungulate","0.2":"best [...] ---- TYPES STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING -===== +==== +---- QUERY +select distinct int_map from complextypestbl +---- CATCH +AnalysisException: Complex types are not supported in SELECT DISTINCT clauses. +==== +---- QUERY +select count(distinct int_map) from complextypestbl; +---- CATCH +AnalysisException: Complex types are not supported as DISTINCT parameters of aggregate functions. +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test index 846c78d2f..e00500632 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test @@ -452,8 +452,7 @@ from complextypes_nested_structs; INT,STRING ==== ---- QUERY -# Subquery that returns a complex type is not supported. -# IMPALA-9500 +# Complex types in IN predicates are not supported. select outer_struct from complextypes_nested_structs where outer_struct in @@ -462,3 +461,13 @@ where outer_struct in AnalysisException: A subquery can't return complex types. (SELECT outer_struct FROM functional_parquet.complex types_nested_structs) ==== +---- QUERY +select distinct outer_struct from complextypes_nested_structs +---- CATCH +AnalysisException: Complex types are not supported in SELECT DISTINCT clauses. +==== +---- QUERY +select count(distinct outer_struct) from complextypes_nested_structs; +---- CATCH +AnalysisException: Complex types are not supported as DISTINCT parameters of aggregate functions. +====
