IMPALA-5339: Fix analysis with sort.columns and expr rewrites IMPALA-4166 introduced a bug by duplicating code that adds sort expressions. Upon re-analysis, this code would hit an IndexOutOfBoundsException.
Change-Id: Ibebba29509ae7eaa691fe305500cda6bd41a179a Reviewed-on: http://gerrit.cloudera.org:8080/6921 Reviewed-by: Lars Volker <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/3610533f Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/3610533f Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/3610533f Branch: refs/heads/master Commit: 3610533f4bbe8b5db74e64ad0457e9e2aff589ce Parents: d9d4a6c Author: Lars Volker <[email protected]> Authored: Thu May 18 19:57:05 2017 +0200 Committer: Impala Public Jenkins <[email protected]> Committed: Sat May 20 00:56:45 2017 +0000 ---------------------------------------------------------------------- .../analysis/AlterTableSetTblProperties.java | 3 +- .../org/apache/impala/analysis/InsertStmt.java | 4 +- .../org/apache/impala/analysis/TableDef.java | 2 +- .../queries/PlannerTest/insert-sort-by.test | 79 ++++++++++++++++++++ 4 files changed, 82 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3610533f/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java index 44024f4..b03d56d 100644 --- a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java +++ b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java @@ -42,7 +42,6 @@ import org.apache.impala.util.MetaStoreUtil; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.base.Strings; -import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; /** @@ -203,7 +202,7 @@ public class AlterTableSetTblProperties extends AlterTableSetStmt { Map<String, String> tblProperties) throws AnalysisException { if (!tblProperties.containsKey( AlterTableSortByStmt.TBL_PROP_SORT_COLUMNS)) { - return ImmutableList.of(); + return Lists.newArrayList(); } // ALTER TABLE SET is not supported on HBase tables at all, see http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3610533f/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java b/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java index ae7149f..412586c 100644 --- a/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java @@ -240,6 +240,7 @@ public class InsertStmt extends StatementBase { hasClusteredHint_ = false; hasNoClusteredHint_ = false; sortExprs_.clear(); + sortColumns_.clear(); resultExprs_.clear(); mentionedColumns_.clear(); primaryKeyExprs_.clear(); @@ -518,9 +519,6 @@ public class InsertStmt extends StatementBase { } } - // Assign sortExprs_ based on sortColumns_. - for (Integer colIdx: sortColumns_) sortExprs_.add(resultExprs_.get(colIdx)); - if (isHBaseTable && overwrite_) { throw new AnalysisException("HBase doesn't have a way to perform INSERT OVERWRITE"); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3610533f/fe/src/main/java/org/apache/impala/analysis/TableDef.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/TableDef.java b/fe/src/main/java/org/apache/impala/analysis/TableDef.java index f524595..bd07af8 100644 --- a/fe/src/main/java/org/apache/impala/analysis/TableDef.java +++ b/fe/src/main/java/org/apache/impala/analysis/TableDef.java @@ -332,7 +332,7 @@ class TableDef { } } Preconditions.checkState(numColumns == colIdxs.size()); - return ImmutableList.copyOf(colIdxs); + return Lists.newArrayList(colIdxs); } private void analyzeOptions(Analyzer analyzer) throws AnalysisException { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3610533f/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test b/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test index 788703b..2a9d272 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test @@ -324,6 +324,85 @@ WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(b.year,a.month)] partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> b.id ==== +# IMPALA-5339: Sort columns with a union to trigger expr rewrite +insert into table test_sort_by.t_nopart +select 0, cast(id as int), false from (select 1 as id union select cast(2 as int) as id) sub +---- PLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +02:SORT +| order by: CAST(id AS INT) ASC NULLS LAST +| +01:AGGREGATE [FINALIZE] +| group by: id +| +00:UNION + constant-operands=2 +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +02:SORT +| order by: CAST(id AS INT) ASC NULLS LAST +| +01:AGGREGATE [FINALIZE] +| group by: id +| +00:UNION + constant-operands=2 +==== +# IMPALA-5339: Sort columns with a subquery to trigger expr rewrite +insert into table test_sort_by.t_nopart +select 0, id, false from test_sort_by.t_nopart where id = (select min(id) from test_sort_by.t) +---- PLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +04:SORT +| order by: id ASC NULLS LAST +| +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: id = min(id) +| runtime filters: RF000 <- min(id) +| +|--02:AGGREGATE [FINALIZE] +| | output: min(id) +| | +| 01:SCAN HDFS [test_sort_by.t] +| partitions=0/0 files=0 size=0B +| +00:SCAN HDFS [test_sort_by.t_nopart] + partitions=1/0 files=0 size=0B + runtime filters: RF000 -> id +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +07:SORT +| order by: id ASC NULLS LAST +| +03:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: id = min(id) +| runtime filters: RF000 <- min(id) +| +|--06:EXCHANGE [BROADCAST] +| | +| 05:AGGREGATE [FINALIZE] +| | output: min:merge(id) +| | +| 04:EXCHANGE [UNPARTITIONED] +| | +| 02:AGGREGATE +| | output: min(id) +| | +| 01:SCAN HDFS [test_sort_by.t] +| partitions=0/0 files=0 size=0B +| +00:SCAN HDFS [test_sort_by.t_nopart] + partitions=1/0 files=0 size=0B + runtime filters: RF000 -> id +==== # Test that using both the sortby hint and sort columns work. # TODO: Remove this in IMPALA-5157. insert into table test_sort_by.t partition(year, month) /*+ shuffle,sortby(id) */
