Repository: incubator-impala Updated Branches: refs/heads/master 0b9d2cecd -> a2caacf87
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java index 98b55f9..26def5b 100644 --- a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java @@ -296,22 +296,62 @@ public class ToSqlTest extends FrontendTestBase { @Test public void TestCreateTable() throws AnalysisException { - testToSql("create table p (a int) partitioned by (day string) " + + testToSql("create table p (a int) partitioned by (day string) sort by (a) " + "comment 'This is a test'", "default", "CREATE TABLE default.p ( a INT ) PARTITIONED BY ( day STRING ) " + - "COMMENT 'This is a test' STORED AS TEXTFILE", true); + "SORT BY ( a ) COMMENT 'This is a test' STORED AS TEXTFILE" , true); + // Table with SORT BY clause. + testToSql("create table p (a int, b int) partitioned by (day string) sort by (a ,b) ", + "default", + "CREATE TABLE default.p ( a INT, b INT ) PARTITIONED BY ( day STRING ) " + + "SORT BY ( a, b ) STORED AS TEXTFILE" , true); } @Test public void TestCreateTableAsSelect() throws AnalysisException { // Partitioned table. testToSql("create table p partitioned by (int_col) as " + - "select double_col, int_col from functional.alltypes", - "default", + "select double_col, int_col from functional.alltypes", "default", "CREATE TABLE default.p PARTITIONED BY ( int_col ) STORED AS " + "TEXTFILE AS SELECT double_col, int_col FROM functional.alltypes", true); + // Table with a comment. + testToSql("create table p partitioned by (int_col) comment 'This is a test' as " + + "select double_col, int_col from functional.alltypes", "default", + "CREATE TABLE default.p PARTITIONED BY ( int_col ) COMMENT 'This is a test' " + + "STORED AS TEXTFILE AS SELECT double_col, int_col FROM functional.alltypes", + true); + // Table with SORT BY clause. + testToSql("create table p partitioned by (int_col) sort by (string_col) as " + + "select double_col, string_col, int_col from functional.alltypes", "default", + "CREATE TABLE default.p PARTITIONED BY ( int_col ) SORT BY ( string_col ) " + + "STORED AS TEXTFILE AS SELECT double_col, string_col, int_col FROM " + + "functional.alltypes", true); + } + + @Test + public void TestCreateTableLike() throws AnalysisException { + testToSql("create table p like functional.alltypes", "default", + "CREATE TABLE p LIKE functional.alltypes"); + // Table with sort columns. + testToSql("create table p sort by (id) like functional.alltypes", "default", + "CREATE TABLE p SORT BY (id) LIKE functional.alltypes"); + } + + @Test + public void TestCreateTableLikeFile() throws AnalysisException { + testToSql("create table if not exists p like parquet " + + "'/test-warehouse/schemas/alltypestiny.parquet'", "default", + "CREATE TABLE IF NOT EXISTS default.p LIKE PARQUET " + + "'hdfs://localhost:20500/test-warehouse/schemas/alltypestiny.parquet' " + + "STORED AS TEXTFILE", true); + // Table with sort columns. + testToSql("create table if not exists p like parquet " + + "'/test-warehouse/schemas/alltypestiny.parquet' sort by (int_col, id)", "default", + "CREATE TABLE IF NOT EXISTS default.p LIKE PARQUET " + + "'hdfs://localhost:20500/test-warehouse/schemas/alltypestiny.parquet' " + + "SORT BY ( int_col, id ) STORED AS TEXTFILE", true); } @Test http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java index 6202991..033b0e2 100644 --- a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java +++ b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java @@ -38,6 +38,7 @@ import org.apache.impala.analysis.SqlScanner; import org.apache.impala.authorization.AuthorizationConfig; import org.apache.impala.catalog.AggregateFunction; import org.apache.impala.catalog.Catalog; +import org.apache.impala.catalog.CatalogException; import org.apache.impala.catalog.Column; import org.apache.impala.catalog.Db; import org.apache.impala.catalog.Function; @@ -48,6 +49,7 @@ import org.apache.impala.catalog.ScalarType; import org.apache.impala.catalog.Table; import org.apache.impala.catalog.Type; import org.apache.impala.catalog.View; +import org.apache.impala.service.CatalogOpExecutor; import org.apache.impala.service.Frontend; import org.apache.impala.testutil.ImpaladTestCatalog; import org.apache.impala.testutil.TestUtils; @@ -164,23 +166,33 @@ public class FrontendTestBase { /** * Add a new dummy table to the catalog based on the given CREATE TABLE sql. - * The dummy table only has the column definitions and no other metadata. + * The dummy table only has the column definitions and the metastore table set, but no + * other metadata. * Returns the new dummy table. * The test tables are registered in testTables_ and removed in the @After method. */ protected Table addTestTable(String createTableSql) { CreateTableStmt createTableStmt = (CreateTableStmt) AnalyzesOk(createTableSql); - // Currently does not support partitioned tables. - Preconditions.checkState(createTableStmt.getPartitionColumnDefs().isEmpty()); Db db = catalog_.getDb(createTableStmt.getDb()); Preconditions.checkNotNull(db, "Test tables must be created in an existing db."); - HdfsTable dummyTable = new HdfsTable(null, db, + org.apache.hadoop.hive.metastore.api.Table msTbl = + CatalogOpExecutor.createMetaStoreTable(createTableStmt.toThrift()); + HdfsTable dummyTable = new HdfsTable(msTbl, db, createTableStmt.getTbl(), createTableStmt.getOwner()); - List<ColumnDef> columnDefs = createTableStmt.getColumnDefs(); + List<ColumnDef> columnDefs = Lists.newArrayList( + createTableStmt.getPartitionColumnDefs()); + dummyTable.setNumClusteringCols(columnDefs.size()); + columnDefs.addAll(createTableStmt.getColumnDefs()); for (int i = 0; i < columnDefs.size(); ++i) { ColumnDef colDef = columnDefs.get(i); dummyTable.addColumn(new Column(colDef.getColName(), colDef.getType(), i)); } + try { + dummyTable.addDefaultPartition(msTbl.getSd()); + } catch (CatalogException e) { + e.printStackTrace(); + fail("Failed to add test table:\n" + createTableSql); + } db.addTable(dummyTable); testTables_.add(dummyTable); return dummyTable; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/fe/src/test/java/org/apache/impala/planner/PlannerTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index a67d353..b056951 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -94,6 +94,19 @@ public class PlannerTest extends PlannerTestBase { } @Test + public void testInsertSortBy() { + // Add a test table with a SORT BY clause to test that the corresponding sort nodes + // are added by the insert statements in insert-sort-by.test. + addTestDb("test_sort_by", "Test DB for SORT BY clause."); + addTestTable("create table test_sort_by.t (id int, int_col int, " + + "bool_col boolean) partitioned by (year int, month int) " + + "sort by (int_col, bool_col) location '/'"); + addTestTable("create table test_sort_by.t_nopart (id int, int_col int, " + + "bool_col boolean) sort by (int_col, bool_col) location '/'"); + runPlannerTestFile("insert-sort-by", "test_sort_by"); + } + + @Test public void testHdfs() { runPlannerTestFile("hdfs"); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test b/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test index ede917d..0129ad1 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test @@ -158,3 +158,110 @@ WRITE TO HDFS [default.t, OVERWRITE=false] 00:SCAN HDFS [functional.alltypestiny] partitions=4/4 files=4 size=460B ==== +# CTAS with sort columns +create table t sort by (int_col, bool_col) as +select * from functional.alltypes +---- PLAN +WRITE TO HDFS [default.t, OVERWRITE=false] +| partitions=1 +| +01:SORT +| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [default.t, OVERWRITE=false] +| partitions=1 +| +01:SORT +| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# CTAS with partitions and sort columns +create table t partitioned by (year, month) sort by (int_col, bool_col) as +select * from functional.alltypes +---- PLAN +WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +01:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +02:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# CTAS with more complex select query +create table t partitioned by (c_nationkey) sort by (c_custkey) as +select c_custkey, max(o_totalprice) as maxprice, c_nationkey + from tpch.orders join tpch.customer on c_custkey = o_custkey + where c_nationkey < 10 + group by c_custkey, c_nationkey +---- PLAN +WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(c_nationkey)] +| partitions=25 +| +04:SORT +| order by: c_nationkey ASC NULLS LAST, c_custkey ASC NULLS LAST +| +03:AGGREGATE [FINALIZE] +| output: max(o_totalprice) +| group by: c_custkey, c_nationkey +| +02:HASH JOIN [INNER JOIN] +| hash predicates: o_custkey = c_custkey +| runtime filters: RF000 <- c_custkey +| +|--01:SCAN HDFS [tpch.customer] +| partitions=1/1 files=1 size=23.08MB +| predicates: c_nationkey < 10 +| +00:SCAN HDFS [tpch.orders] + partitions=1/1 files=1 size=162.56MB + runtime filters: RF000 -> o_custkey +---- DISTRIBUTEDPLAN +WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(c_nationkey)] +| partitions=25 +| +08:SORT +| order by: c_nationkey ASC NULLS LAST, c_custkey ASC NULLS LAST +| +07:EXCHANGE [HASH(c_nationkey)] +| +06:AGGREGATE [FINALIZE] +| output: max:merge(o_totalprice) +| group by: c_custkey, c_nationkey +| +05:EXCHANGE [HASH(c_custkey,c_nationkey)] +| +03:AGGREGATE [STREAMING] +| output: max(o_totalprice) +| group by: c_custkey, c_nationkey +| +02:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: o_custkey = c_custkey +| runtime filters: RF000 <- c_custkey +| +|--04:EXCHANGE [BROADCAST] +| | +| 01:SCAN HDFS [tpch.customer] +| partitions=1/1 files=1 size=23.08MB +| predicates: c_nationkey < 10 +| +00:SCAN HDFS [tpch.orders] + partitions=1/1 files=1 size=162.56MB + runtime filters: RF000 -> o_custkey +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test b/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test new file mode 100644 index 0000000..788703b --- /dev/null +++ b/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test @@ -0,0 +1,377 @@ +# IMPALA-4166: insert into tables with sort.columns property adds sort node. Clustering +# columns are added to the sort columns. +insert into table test_sort_by.t partition(year, month) /*+ shuffle */ +select id, int_col, bool_col, year, month from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +01:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +02:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +01:EXCHANGE [HASH(year,month)] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# IMPALA-4166: insert with noshuffle hint into tables with sort.columns property adds +# sort node. +insert into table test_sort_by.t partition(year, month) /*+ noshuffle */ +select id, int_col, bool_col, year, month from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +01:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +01:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# IMPALA-4166: insert into tables with sort.columns property adds sort node. Clustering +# columns are added to the sort columns. noclustered hint is ignored when sort.columns +# are specified. +insert into table test_sort_by.t partition(year, month) /*+ noclustered */ +select id, int_col, bool_col, year, month from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +01:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +02:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +01:EXCHANGE [HASH(year,month)] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# IMPALA-4166: insert into tables with sort.columns property adds sort node. +insert into table test_sort_by.t_nopart /*+ shuffle */ +select id, int_col, bool_col from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +01:SORT +| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +02:SORT +| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +01:EXCHANGE [UNPARTITIONED] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# IMPALA-4166: insert with noshuffle hint into tables with sort.columns property adds +# sort node. +insert into table test_sort_by.t_nopart /*+ noshuffle */ +select id, int_col, bool_col from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +01:SORT +| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +01:SORT +| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# IMPALA-4166: sort columns are correct when using an identity column permutation. +insert into table test_sort_by.t_nopart (id, int_col, bool_col) /*+ shuffle */ +select id, int_col, bool_col from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +01:SORT +| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +02:SORT +| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +01:EXCHANGE [UNPARTITIONED] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# IMPALA-4166: sort columns are correct when using a non-trivial column permutation. +insert into table test_sort_by.t_nopart (bool_col, id, int_col) /*+ shuffle */ +select bool_col, id, int_col from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +01:SORT +| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +02:SORT +| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +01:EXCHANGE [UNPARTITIONED] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# IMPALA-4166: sort columns are correct when using a partial column permutation. +insert into table test_sort_by.t_nopart (bool_col, id) /*+ shuffle */ +select bool_col, id from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +01:SORT +| order by: bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +02:SORT +| order by: bool_col ASC NULLS LAST +| +01:EXCHANGE [UNPARTITIONED] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# IMPALA-4166: no sort node is added when using a partial column permutation and none of +# the sort columns appear in the permutation. +insert into table test_sort_by.t_nopart (id) /*+ shuffle */ +select id from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false] +| partitions=1 +| +01:EXCHANGE [UNPARTITIONED] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# IMPALA-4166: sort columns with a join +insert into table test_sort_by.t partition(year, month) /*+ noclustered */ +select a.id, b.int_col, a.bool_col, b.year, a.month +from functional.alltypes a join functional.alltypes b on a.id = b.id order by b.string_col +limit 10 +---- PLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +04:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +03:TOP-N [LIMIT=10] +| order by: string_col ASC +| +02:HASH JOIN [INNER JOIN] +| hash predicates: b.id = a.id +| runtime filters: RF000 <- a.id +| +|--00:SCAN HDFS [functional.alltypes a] +| partitions=24/24 files=24 size=478.45KB +| +01:SCAN HDFS [functional.alltypes b] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> b.id +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +07:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| +06:MERGING-EXCHANGE [UNPARTITIONED] +| order by: string_col ASC +| limit: 10 +| +03:TOP-N [LIMIT=10] +| order by: string_col ASC +| +02:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: b.id = a.id +| runtime filters: RF000 <- a.id +| +|--05:EXCHANGE [HASH(a.id)] +| | +| 00:SCAN HDFS [functional.alltypes a] +| partitions=24/24 files=24 size=478.45KB +| +04:EXCHANGE [HASH(b.id)] +| +01:SCAN HDFS [functional.alltypes b] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> b.id +==== +# IMPALA-4166: sort columns with a join and agg +insert into table test_sort_by.t partition(year, month) /*+ noclustered */ +select a.id, max(b.int_col), min(a.bool_col), b.year, a.month +from functional.alltypes a join functional.alltypes b on a.id = b.id +group by a.id, b.year, a.month +---- PLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(b.year,a.month)] +| partitions=24 +| +04:SORT +| order by: b.year ASC NULLS LAST, a.month ASC NULLS LAST, max(b.int_col) ASC NULLS LAST, min(a.bool_col) ASC NULLS LAST +| +03:AGGREGATE [FINALIZE] +| output: max(b.int_col), min(a.bool_col) +| group by: a.id, b.year, a.month +| +02:HASH JOIN [INNER JOIN] +| hash predicates: b.id = a.id +| runtime filters: RF000 <- a.id +| +|--00:SCAN HDFS [functional.alltypes a] +| partitions=24/24 files=24 size=478.45KB +| +01:SCAN HDFS [functional.alltypes b] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> b.id +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(b.year,a.month)] +| partitions=24 +| +09:SORT +| order by: b.year ASC NULLS LAST, a.month ASC NULLS LAST, max(b.int_col) ASC NULLS LAST, min(a.bool_col) ASC NULLS LAST +| +08:EXCHANGE [HASH(b.year,a.month)] +| +07:AGGREGATE [FINALIZE] +| output: max:merge(b.int_col), min:merge(a.bool_col) +| group by: a.id, b.year, a.month +| +06:EXCHANGE [HASH(a.id,b.year,a.month)] +| +03:AGGREGATE [STREAMING] +| output: max(b.int_col), min(a.bool_col) +| group by: a.id, b.year, a.month +| +02:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: b.id = a.id +| runtime filters: RF000 <- a.id +| +|--05:EXCHANGE [HASH(a.id)] +| | +| 00:SCAN HDFS [functional.alltypes a] +| partitions=24/24 files=24 size=478.45KB +| +04:EXCHANGE [HASH(b.id)] +| +01:SCAN HDFS [functional.alltypes b] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> b.id +==== +# Test that using both the sortby hint and sort columns work. +# TODO: Remove this in IMPALA-5157. +insert into table test_sort_by.t partition(year, month) /*+ shuffle,sortby(id) */ +select id, int_col, bool_col, year, month from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +01:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +02:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST +| +01:EXCHANGE [HASH(year,month)] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# Test that using both the sortby hint and sort columns work when the column lists +# overlap. +# TODO: Remove this in IMPALA-5157. +insert into table test_sort_by.t partition(year, month) /*+ shuffle,sortby(id, bool_col) */ +select id, int_col, bool_col, year, month from functional.alltypes +---- PLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +01:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST, bool_col ASC NULLS LAST +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] +| partitions=24 +| +02:SORT +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST, bool_col ASC NULLS LAST +| +01:EXCHANGE [HASH(year,month)] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-planner/queries/PlannerTest/insert.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/insert.test b/testdata/workloads/functional-planner/queries/PlannerTest/insert.test index 5f54f86..e3edae0 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/insert.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/insert.test @@ -714,7 +714,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month) | partitions=24 | 01:SORT -| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB @@ -723,7 +723,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month) | partitions=24 | 02:SORT -| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST | 01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)] | @@ -739,7 +739,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month) | partitions=24 | 01:SORT -| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB @@ -748,7 +748,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month) | partitions=24 | 01:SORT -| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/alter-table.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/alter-table.test b/testdata/workloads/functional-query/queries/QueryTest/alter-table.test index 69eede9..2b087ff 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/alter-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/alter-table.test @@ -1055,3 +1055,216 @@ select x, a from i1670A_alter_1 order by x, a; ---- TYPES INT, INT ==== +---- QUERY +create table insert_data (i int, d double, f float, b boolean); +insert into insert_data values (1, 2, 3, false), (4, 5, 6, true); +==== +---- QUERY +create table insert_sorted (i int, d double, f float, b boolean); +==== +---- QUERY +# Test setting the sort.columns property +alter table insert_sorted sort by(i, d); +describe formatted insert_sorted; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','i,d ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test inserting after alter table +insert into table insert_sorted select i, d, f, b from insert_data; +---- RUNTIME_PROFILE +row_regex: .*order by: i ASC NULLS LAST, d ASC NULLS LAST +==== +---- QUERY +# Test selection after alter table +select count(*) from insert_sorted; +---- RESULTS +2 +==== +---- QUERY +# Test altering the sort.columns property +alter table insert_sorted sort by(b, d, f); +describe formatted insert_sorted; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','b,d,f ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test inserting after alter table +insert into table insert_sorted select i, d, f, b from insert_data; +---- RUNTIME_PROFILE +row_regex: .*order by: b ASC NULLS LAST, d ASC NULLS LAST, f ASC NULLS LAST +==== +---- QUERY +# Test selection after alter table +select count(*) from insert_sorted; +---- RESULTS +4 +==== +---- QUERY +# Test renaming a column in the sort by list. +alter table insert_sorted change d e double; +describe formatted insert_sorted; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','b,e,f ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test inserting after alter table +insert into table insert_sorted select i, d, f, b from insert_data; +---- RUNTIME_PROFILE +row_regex: .*order by: b ASC NULLS LAST, d ASC NULLS LAST, f ASC NULLS LAST +==== +---- QUERY +# Test selection after alter table +select count(*) from insert_sorted; +---- RESULTS +6 +==== +---- QUERY +# Test replacing the column list, including a column in the sort by list. +alter table insert_sorted replace columns (i bigint, e decimal(12,2), f boolean); +describe formatted insert_sorted; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','e,f ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test inserting after alter table +insert into table insert_sorted select i, cast(d as decimal(12,2)), b from insert_data; +---- RUNTIME_PROFILE +row_regex: .*order by: CAST\(d AS DECIMAL\(12,2\)\) ASC NULLS LAST, b ASC NULLS LAST +==== +---- QUERY +# Test selection after alter table +select count(*) from insert_sorted; +---- RESULTS +8 +==== +---- QUERY +# Test dropping a column in the sort by list +alter table insert_sorted drop column f; +describe formatted insert_sorted; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','e ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test that a dropped column cannot be added as a sort column +alter table insert_sorted sort by(f); +---- CATCH +AnalysisException: Could not find SORT BY column 'f' in table. +==== +---- QUERY +# Test that erroneous query didn't change sort columns +describe formatted insert_sorted; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','e ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test inserting after alter table +insert into table insert_sorted select i, cast(d as decimal(12,2)) from insert_data; +---- RUNTIME_PROFILE +row_regex: .*order by: CAST\(d AS DECIMAL\(12,2\)\) ASC NULLS LAST +==== +---- QUERY +# Test selection after alter table +select count(*) from insert_sorted; +---- RESULTS +10 +==== +---- QUERY +# Test removing the sort.columns property +alter table insert_sorted sort by(); +describe formatted insert_sorted; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ',' ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test inserting after alter table +insert into table insert_sorted select i, cast(d as decimal(12,2)) from insert_data; +---- RUNTIME_PROFILE +aggregation(SUM, InitialRunsCreated): 0 +==== +---- QUERY +# Test selection after alter table +select count(*) from insert_sorted; +---- RESULTS +12 +==== +---- QUERY +create table insert_sorted_partitioned (i int, d double, f float, b boolean) partitioned by (p int) sort by (i); +==== +---- QUERY +# Test removing all sort columns. +alter table insert_sorted_partitioned sort by(); +describe formatted insert_sorted_partitioned; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ',' ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test inserting after alter table +insert into table insert_sorted_partitioned partition (p=1) select i, d, f, b from insert_data; +---- RUNTIME_PROFILE +aggregation(SUM, InitialRunsCreated): 0 +==== +---- QUERY +# Test selection after alter table +select count(*) from insert_sorted_partitioned; +---- RESULTS +2 +==== +---- QUERY +# Re-add a sort column. +alter table insert_sorted_partitioned sort by(i); +describe formatted insert_sorted_partitioned; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','i ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test inserting after alter table +insert into table insert_sorted_partitioned partition (p=1) select i, d, f, b from insert_data; +---- RUNTIME_PROFILE +row_regex: .*order by: i ASC NULLS LAST +==== +---- QUERY +# Test selection after alter table +select count(*) from insert_sorted_partitioned; +---- RESULTS +4 +==== +---- QUERY +# Test dropping the sort column +alter table insert_sorted_partitioned drop column i; +describe formatted insert_sorted_partitioned; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ',' ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test inserting after alter table +insert into table insert_sorted_partitioned partition (p=1) select d, f, b from insert_data; +---- RUNTIME_PROFILE +aggregation(SUM, InitialRunsCreated): 0 +==== +---- QUERY +# Test selection after alter table +select count(*) from insert_sorted_partitioned; +---- RESULTS +6 +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test index 9090d61..f157681 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test +++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test @@ -189,3 +189,13 @@ create table if not exists cast_null_as_int as (select cast(null as int) c); ---- RESULTS 'Inserted 1 row(s)' ==== +---- QUERY +# Test adding sort.columns when creating a table from a select query. +create table sortbytest sort by (int_col, bool_col) as +select * from functional.alltypessmall; +describe formatted sortbytest; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','int_col,bool_col ' +---- TYPES +STRING,STRING,STRING +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test index 313275c..43a7929 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test +++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test @@ -168,3 +168,14 @@ STRING, STRING, STRING drop table if exists $DATABASE.temp_modern_table ---- RESULTS ==== +---- QUERY +# Test adding sort.columns when creating a table like a parquet file. +create table $DATABASE.sorted_zipcodes_file like parquet +'$FILESYSTEM_PREFIX/test-warehouse/schemas/zipcode_incomes.parquet' +sort by (id, zip) stored as textfile; +describe formatted $DATABASE.sorted_zipcodes_file; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','id,zip ' +---- TYPES +STRING,STRING,STRING +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test index 946ac3a..0a4df27 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test @@ -213,3 +213,30 @@ drop table like_no_avro_schema drop table no_avro_schema ---- RESULTS ==== +---- QUERY +# Test setting sort.columns when using create table like. +create table sortbytest sort by (int_col, bool_col) like functional.alltypes; +describe formatted sortbytest; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','int_col,bool_col ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test that sort.columns will be inherited from the source table. +create table sortbytest_clone like sortbytest; +describe formatted sortbytest_clone; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','int_col,bool_col ' +---- TYPES +STRING,STRING,STRING +==== +---- QUERY +# Test that sort.columns can be overridden in the query. +create table sortbytest_override sort by (id, string_col) like sortbytest; +describe formatted sortbytest_override; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','id,string_col ' +---- TYPES +STRING,STRING,STRING +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/create-table.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table.test b/testdata/workloads/functional-query/queries/QueryTest/create-table.test index e66fb6f..2661394 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/create-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/create-table.test @@ -274,3 +274,12 @@ from $DATABASE.struct_keywords t, t.a, t.m ---- TYPES INT, STRING, DECIMAL, BOOLEAN, STRING, STRING, INT, TIMESTAMP, INT, INT ==== +---- QUERY +# Make sure that specifying sort columns sets the 'sort.columns' property correctly. +create table sortbytest (i int, d double, b boolean) sort by (d, i); +describe formatted sortbytest; +---- RESULTS: VERIFY_IS_SUBSET +'','sort.columns ','d,i ' +---- TYPES +STRING,STRING,STRING +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test index 946d229..81b619b 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test @@ -348,3 +348,27 @@ CREATE VIEW show_create_table_test_db.view_with_subquery AS SELECT * FROM functional.alltypestiny WHERE id IN (SELECT id FROM functional.alltypesagg) ==== +---- CREATE_TABLE +# Simple table with sort columns. +CREATE TABLE test1 (id INT) +SORT BY (id) +STORED AS TEXTFILE +---- RESULTS +CREATE TABLE show_create_table_test_db.test1 (id INT) +SORT BY (id) +STORED AS TEXTFILE +LOCATION '$$location_uri$$' +==== +---- CREATE_TABLE +# Simple partitioned table with sort columns. +CREATE TABLE test1 (id INT) +PARTITIONED BY (x INT, y INT) +SORT BY (id) +STORED AS TEXTFILE +---- RESULTS +CREATE TABLE show_create_table_test_db.test1 (id INT) +PARTITIONED BY (x INT, y INT) +SORT BY (id) +STORED AS TEXTFILE +LOCATION '$$location_uri$$' +====
