IMPALA-6307: CTAS statement fails with duplicate column exception. A CTAS statement with a 'partition by' clause causes the statement to fail with a duplicate column name exception. This is happening because on expression rewrite, the partition defs state is not reset.
IMPALA-5796 added TableDef::reset(). This patch expands the method by adding calls to reset ColumnDefs and PartitionColumnDefs. Testing: * Regression test added to AnalyzeDDLTest. * Exhaustive Jenkins build and test. Change-Id: Iee053abecd4384e15eec8db10cb06f5ace159da2 Reviewed-on: http://gerrit.cloudera.org:8080/8930 Reviewed-by: Dimitris Tsirogiannis <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/91d109d6 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/91d109d6 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/91d109d6 Branch: refs/heads/master Commit: 91d109d6e982b83500c7869f866ec719f73eb7b6 Parents: 6bff0bd Author: Zoram Thanga <[email protected]> Authored: Tue Jan 2 11:02:17 2018 -0800 Committer: Impala Public Jenkins <[email protected]> Committed: Sat Jan 13 03:44:43 2018 +0000 ---------------------------------------------------------------------- .../org/apache/impala/analysis/AnalysisContext.java | 3 ++- .../apache/impala/analysis/CreateTableAsSelectStmt.java | 9 ++++++--- .../org/apache/impala/analysis/TableDataLayout.java | 5 +++++ .../main/java/org/apache/impala/analysis/TableDef.java | 3 ++- .../java/org/apache/impala/analysis/AnalyzeDDLTest.java | 12 ++++++++++++ 5 files changed, 27 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/91d109d6/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java index 53aa000..71a185e 100644 --- a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java +++ b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java @@ -404,7 +404,8 @@ public class AnalysisContext { if (reAnalyze) { // The rewrites should have no user-visible effect. Remember the original result // types and column labels to restore them after the rewritten stmt has been - // reset() and re-analyzed. + // reset() and re-analyzed. For a CTAS statement, the types represent column types + // of the table that will be created, including the partition columns, if any. List<Type> origResultTypes = Lists.newArrayList(); for (Expr e: analysisResult_.stmt_.getResultExprs()) { origResultTypes.add(e.getType()); http://git-wip-us.apache.org/repos/asf/impala/blob/91d109d6/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java index 842083e..ba9a02a 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java @@ -158,7 +158,6 @@ public class CreateTableAsSelectStmt extends StatementBase { // Add the columns from the select statement to the create statement. int colCnt = tmpQueryStmt.getColLabels().size(); - createStmt_.getColumnDefs().clear(); for (int i = 0; i < colCnt; ++i) { ColumnDef colDef = new ColumnDef(tmpQueryStmt.getColLabels().get(i), null, Collections.<ColumnDef.Option, Object>emptyMap()); @@ -220,8 +219,12 @@ public class CreateTableAsSelectStmt extends StatementBase { super.castResultExprs(types); // Set types of column definitions. List<ColumnDef> colDefs = createStmt_.getColumnDefs(); - Preconditions.checkState(colDefs.size() == types.size()); - for (int i = 0; i < types.size(); ++i) colDefs.get(i).setType(types.get(i)); + List<ColumnDef> partitionColDefs = createStmt_.getPartitionColumnDefs(); + Preconditions.checkState(colDefs.size() + partitionColDefs.size() == types.size()); + for (int i = 0; i < colDefs.size(); ++i) colDefs.get(i).setType(types.get(i)); + for (int i = 0; i < partitionColDefs.size(); ++i) { + partitionColDefs.get(i).setType(types.get(i + colDefs.size())); + } } @Override http://git-wip-us.apache.org/repos/asf/impala/blob/91d109d6/fe/src/main/java/org/apache/impala/analysis/TableDataLayout.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/TableDataLayout.java b/fe/src/main/java/org/apache/impala/analysis/TableDataLayout.java index aef5732..fea809d 100644 --- a/fe/src/main/java/org/apache/impala/analysis/TableDataLayout.java +++ b/fe/src/main/java/org/apache/impala/analysis/TableDataLayout.java @@ -52,4 +52,9 @@ class TableDataLayout { List<ColumnDef> getPartitionColumnDefs() { return partitionColDefs_; } List<KuduPartitionParam> getKuduPartitionParams() { return kuduPartitionParams_; } + + public void reset() { + partitionColDefs_.clear(); + kuduPartitionParams_.clear(); + } } http://git-wip-us.apache.org/repos/asf/impala/blob/91d109d6/fe/src/main/java/org/apache/impala/analysis/TableDef.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/TableDef.java b/fe/src/main/java/org/apache/impala/analysis/TableDef.java index f4901f9..178a976 100644 --- a/fe/src/main/java/org/apache/impala/analysis/TableDef.java +++ b/fe/src/main/java/org/apache/impala/analysis/TableDef.java @@ -35,7 +35,6 @@ import org.apache.impala.thrift.TCatalogObjectType; import org.apache.impala.thrift.THdfsFileFormat; import org.apache.impala.util.MetaStoreUtil; -import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -158,6 +157,8 @@ class TableDef { public void reset() { primaryKeyColDefs_.clear(); + dataLayout_.reset(); + columnDefs_.clear(); isAnalyzed_ = false; } http://git-wip-us.apache.org/repos/asf/impala/blob/91d109d6/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java index 09f5a73..53ee930 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java @@ -1507,6 +1507,12 @@ public class AnalyzeDDLTest extends FrontendTestBase { "as select count(*) as CNT from functional.alltypes"); AnalyzesOk("create table functional.tbl as select a.* from functional.alltypes a " + "join functional.alltypes b on (a.int_col=b.int_col) limit 1000"); + // CTAS with a select query that requires expression rewrite (IMPALA-6307) + AnalyzesOk("create table functional.ctas_tbl partitioned by (year) as " + + "with tmp as (select a.timestamp_col, a.year from functional.alltypes a " + + "left join functional.alltypes b " + + "on b.timestamp_col between a.timestamp_col and a.timestamp_col) " + + "select a.timestamp_col, a.year from tmp a"); // Caching operations AnalyzesOk("create table functional.newtbl cached in 'testPool'" + @@ -1606,6 +1612,12 @@ public class AnalyzeDDLTest extends FrontendTestBase { // IMPALA-5796: CTAS into a Kudu table with expr rewriting. AnalyzesOk("create table t primary key(id) stored as kudu as select id, bool_col " + "from functional.alltypestiny where id between 0 and 10"); + // CTAS with a select query that requires expression rewrite (IMPALA-6307) + AnalyzesOk("create table t primary key(year) stored as kudu as " + + "with tmp as (select a.timestamp_col, a.year from functional.alltypes a " + + "left join functional.alltypes b " + + "on b.timestamp_col between a.timestamp_col and a.timestamp_col) " + + "select a.timestamp_col, a.year from tmp a"); // CTAS in an external Kudu table AnalysisError("create external table t stored as kudu " + "tblproperties('kudu.table_name'='t') as select id, int_col from " +
