Repository: incubator-impala
Updated Branches:
  refs/heads/master 0b9d2cecd -> a2caacf87


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java 
b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
index 98b55f9..26def5b 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
@@ -296,22 +296,62 @@ public class ToSqlTest extends FrontendTestBase {
 
   @Test
   public void TestCreateTable() throws AnalysisException {
-    testToSql("create table p (a int) partitioned by (day string) " +
+    testToSql("create table p (a int) partitioned by (day string) sort by (a) 
" +
         "comment 'This is a test'",
         "default",
         "CREATE TABLE default.p ( a INT ) PARTITIONED BY ( day STRING ) " +
-        "COMMENT 'This is a test' STORED AS TEXTFILE", true);
+        "SORT BY ( a ) COMMENT 'This is a test' STORED AS TEXTFILE" , true);
+    // Table with SORT BY clause.
+    testToSql("create table p (a int, b int) partitioned by (day string) sort 
by (a ,b) ",
+        "default",
+        "CREATE TABLE default.p ( a INT, b INT ) PARTITIONED BY ( day STRING ) 
" +
+        "SORT BY ( a, b ) STORED AS TEXTFILE" , true);
   }
 
   @Test
   public void TestCreateTableAsSelect() throws AnalysisException {
     // Partitioned table.
     testToSql("create table p partitioned by (int_col) as " +
-        "select double_col, int_col from functional.alltypes",
-        "default",
+        "select double_col, int_col from functional.alltypes", "default",
         "CREATE TABLE default.p PARTITIONED BY ( int_col ) STORED AS " +
         "TEXTFILE AS SELECT double_col, int_col FROM functional.alltypes",
         true);
+    // Table with a comment.
+    testToSql("create table p partitioned by (int_col) comment 'This is a 
test' as " +
+        "select double_col, int_col from functional.alltypes", "default",
+        "CREATE TABLE default.p PARTITIONED BY ( int_col ) COMMENT 'This is a 
test' " +
+        "STORED AS TEXTFILE AS SELECT double_col, int_col FROM 
functional.alltypes",
+        true);
+    // Table with SORT BY clause.
+    testToSql("create table p partitioned by (int_col) sort by (string_col) as 
" +
+        "select double_col, string_col, int_col from functional.alltypes", 
"default",
+        "CREATE TABLE default.p PARTITIONED BY ( int_col ) SORT BY ( 
string_col ) " +
+        "STORED AS TEXTFILE AS SELECT double_col, string_col, int_col FROM " +
+        "functional.alltypes", true);
+  }
+
+  @Test
+  public void TestCreateTableLike() throws AnalysisException {
+    testToSql("create table p like functional.alltypes", "default",
+        "CREATE TABLE p LIKE functional.alltypes");
+    // Table with sort columns.
+    testToSql("create table p sort by (id) like functional.alltypes", 
"default",
+        "CREATE TABLE p SORT BY (id) LIKE functional.alltypes");
+  }
+
+  @Test
+  public void TestCreateTableLikeFile() throws AnalysisException {
+    testToSql("create table if not exists p like parquet " +
+        "'/test-warehouse/schemas/alltypestiny.parquet'", "default",
+        "CREATE TABLE IF NOT EXISTS default.p LIKE PARQUET " +
+        "'hdfs://localhost:20500/test-warehouse/schemas/alltypestiny.parquet' 
" +
+        "STORED AS TEXTFILE", true);
+    // Table with sort columns.
+    testToSql("create table if not exists p like parquet " +
+        "'/test-warehouse/schemas/alltypestiny.parquet' sort by (int_col, 
id)", "default",
+        "CREATE TABLE IF NOT EXISTS default.p LIKE PARQUET " +
+        "'hdfs://localhost:20500/test-warehouse/schemas/alltypestiny.parquet' 
" +
+        "SORT BY ( int_col, id ) STORED AS TEXTFILE", true);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java 
b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
index 6202991..033b0e2 100644
--- a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
+++ b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
@@ -38,6 +38,7 @@ import org.apache.impala.analysis.SqlScanner;
 import org.apache.impala.authorization.AuthorizationConfig;
 import org.apache.impala.catalog.AggregateFunction;
 import org.apache.impala.catalog.Catalog;
+import org.apache.impala.catalog.CatalogException;
 import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.Db;
 import org.apache.impala.catalog.Function;
@@ -48,6 +49,7 @@ import org.apache.impala.catalog.ScalarType;
 import org.apache.impala.catalog.Table;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.catalog.View;
+import org.apache.impala.service.CatalogOpExecutor;
 import org.apache.impala.service.Frontend;
 import org.apache.impala.testutil.ImpaladTestCatalog;
 import org.apache.impala.testutil.TestUtils;
@@ -164,23 +166,33 @@ public class FrontendTestBase {
 
   /**
    * Add a new dummy table to the catalog based on the given CREATE TABLE sql.
-   * The dummy table only has the column definitions and no other metadata.
+   * The dummy table only has the column definitions and the metastore table 
set, but no
+   * other metadata.
    * Returns the new dummy table.
    * The test tables are registered in testTables_ and removed in the @After 
method.
    */
   protected Table addTestTable(String createTableSql) {
     CreateTableStmt createTableStmt = (CreateTableStmt) 
AnalyzesOk(createTableSql);
-    // Currently does not support partitioned tables.
-    
Preconditions.checkState(createTableStmt.getPartitionColumnDefs().isEmpty());
     Db db = catalog_.getDb(createTableStmt.getDb());
     Preconditions.checkNotNull(db, "Test tables must be created in an existing 
db.");
-    HdfsTable dummyTable = new HdfsTable(null, db,
+    org.apache.hadoop.hive.metastore.api.Table msTbl =
+        CatalogOpExecutor.createMetaStoreTable(createTableStmt.toThrift());
+    HdfsTable dummyTable = new HdfsTable(msTbl, db,
         createTableStmt.getTbl(), createTableStmt.getOwner());
-    List<ColumnDef> columnDefs = createTableStmt.getColumnDefs();
+    List<ColumnDef> columnDefs = Lists.newArrayList(
+        createTableStmt.getPartitionColumnDefs());
+    dummyTable.setNumClusteringCols(columnDefs.size());
+    columnDefs.addAll(createTableStmt.getColumnDefs());
     for (int i = 0; i < columnDefs.size(); ++i) {
       ColumnDef colDef = columnDefs.get(i);
       dummyTable.addColumn(new Column(colDef.getColName(), colDef.getType(), 
i));
     }
+    try {
+    dummyTable.addDefaultPartition(msTbl.getSd());
+    } catch (CatalogException e) {
+      e.printStackTrace();
+      fail("Failed to add test table:\n" + createTableSql);
+    }
     db.addTable(dummyTable);
     testTables_.add(dummyTable);
     return dummyTable;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java 
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index a67d353..b056951 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -94,6 +94,19 @@ public class PlannerTest extends PlannerTestBase {
   }
 
   @Test
+  public void testInsertSortBy() {
+    // Add a test table with a SORT BY clause to test that the corresponding 
sort nodes
+    // are added by the insert statements in insert-sort-by.test.
+    addTestDb("test_sort_by", "Test DB for SORT BY clause.");
+    addTestTable("create table test_sort_by.t (id int, int_col int, " +
+        "bool_col boolean) partitioned by (year int, month int) " +
+        "sort by (int_col, bool_col) location '/'");
+    addTestTable("create table test_sort_by.t_nopart (id int, int_col int, " +
+        "bool_col boolean) sort by (int_col, bool_col) location '/'");
+    runPlannerTestFile("insert-sort-by", "test_sort_by");
+  }
+
+  @Test
   public void testHdfs() {
     runPlannerTestFile("hdfs");
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test 
b/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test
index ede917d..0129ad1 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test
@@ -158,3 +158,110 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
 00:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B
 ====
+# CTAS with sort columns
+create table t sort by (int_col, bool_col) as
+select * from functional.alltypes
+---- PLAN
+WRITE TO HDFS [default.t, OVERWRITE=false]
+|  partitions=1
+|
+01:SORT
+|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [default.t, OVERWRITE=false]
+|  partitions=1
+|
+01:SORT
+|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# CTAS with partitions and sort columns
+create table t partitioned by (year, month) sort by (int_col, bool_col) as
+select * from functional.alltypes
+---- PLAN
+WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+01:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+02:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
+|
+01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# CTAS with more complex select query
+create table t partitioned by (c_nationkey) sort by (c_custkey) as
+select c_custkey, max(o_totalprice) as maxprice, c_nationkey
+  from tpch.orders join tpch.customer on c_custkey = o_custkey
+ where c_nationkey < 10
+ group by c_custkey, c_nationkey
+---- PLAN
+WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(c_nationkey)]
+|  partitions=25
+|
+04:SORT
+|  order by: c_nationkey ASC NULLS LAST, c_custkey ASC NULLS LAST
+|
+03:AGGREGATE [FINALIZE]
+|  output: max(o_totalprice)
+|  group by: c_custkey, c_nationkey
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: o_custkey = c_custkey
+|  runtime filters: RF000 <- c_custkey
+|
+|--01:SCAN HDFS [tpch.customer]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c_nationkey < 10
+|
+00:SCAN HDFS [tpch.orders]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> o_custkey
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(c_nationkey)]
+|  partitions=25
+|
+08:SORT
+|  order by: c_nationkey ASC NULLS LAST, c_custkey ASC NULLS LAST
+|
+07:EXCHANGE [HASH(c_nationkey)]
+|
+06:AGGREGATE [FINALIZE]
+|  output: max:merge(o_totalprice)
+|  group by: c_custkey, c_nationkey
+|
+05:EXCHANGE [HASH(c_custkey,c_nationkey)]
+|
+03:AGGREGATE [STREAMING]
+|  output: max(o_totalprice)
+|  group by: c_custkey, c_nationkey
+|
+02:HASH JOIN [INNER JOIN, BROADCAST]
+|  hash predicates: o_custkey = c_custkey
+|  runtime filters: RF000 <- c_custkey
+|
+|--04:EXCHANGE [BROADCAST]
+|  |
+|  01:SCAN HDFS [tpch.customer]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c_nationkey < 10
+|
+00:SCAN HDFS [tpch.orders]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> o_custkey
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test 
b/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test
new file mode 100644
index 0000000..788703b
--- /dev/null
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test
@@ -0,0 +1,377 @@
+# IMPALA-4166: insert into tables with sort.columns property adds sort node. 
Clustering
+# columns are added to the sort columns.
+insert into table test_sort_by.t partition(year, month) /*+ shuffle */
+select id, int_col, bool_col, year, month from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+01:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+02:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
+|
+01:EXCHANGE [HASH(year,month)]
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-4166: insert with noshuffle hint into tables with sort.columns 
property adds
+# sort node.
+insert into table test_sort_by.t partition(year, month) /*+ noshuffle */
+select id, int_col, bool_col, year, month from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+01:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+01:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-4166: insert into tables with sort.columns property adds sort node. 
Clustering
+# columns are added to the sort columns. noclustered hint is ignored when 
sort.columns
+# are specified.
+insert into table test_sort_by.t partition(year, month) /*+ noclustered */
+select id, int_col, bool_col, year, month from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+01:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+02:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
+|
+01:EXCHANGE [HASH(year,month)]
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-4166: insert into tables with sort.columns property adds sort node.
+insert into table test_sort_by.t_nopart /*+ shuffle */
+select id, int_col, bool_col from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+01:SORT
+|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+02:SORT
+|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-4166: insert with noshuffle hint into tables with sort.columns 
property adds
+# sort node.
+insert into table test_sort_by.t_nopart /*+ noshuffle */
+select id, int_col, bool_col from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+01:SORT
+|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+01:SORT
+|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-4166: sort columns are correct when using an identity column 
permutation.
+insert into table test_sort_by.t_nopart (id, int_col, bool_col) /*+ shuffle */
+select id, int_col, bool_col from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+01:SORT
+|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+02:SORT
+|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-4166: sort columns are correct when using a non-trivial column 
permutation.
+insert into table test_sort_by.t_nopart (bool_col, id, int_col) /*+ shuffle */
+select bool_col, id, int_col from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+01:SORT
+|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+02:SORT
+|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-4166: sort columns are correct when using a partial column 
permutation.
+insert into table test_sort_by.t_nopart (bool_col, id) /*+ shuffle */
+select bool_col, id from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+01:SORT
+|  order by: bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+02:SORT
+|  order by: bool_col ASC NULLS LAST
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-4166: no sort node is added when using a partial column permutation 
and none of
+# the sort columns appear in the permutation.
+insert into table test_sort_by.t_nopart (id) /*+ shuffle */
+select id from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
+|  partitions=1
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-4166: sort columns with a join
+insert into table test_sort_by.t partition(year, month) /*+ noclustered */
+select a.id, b.int_col, a.bool_col, b.year, a.month
+from functional.alltypes a join functional.alltypes b on a.id = b.id order by 
b.string_col
+limit 10
+---- PLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+04:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
+|
+03:TOP-N [LIMIT=10]
+|  order by: string_col ASC
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: b.id = a.id
+|  runtime filters: RF000 <- a.id
+|
+|--00:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|
+01:SCAN HDFS [functional.alltypes b]
+   partitions=24/24 files=24 size=478.45KB
+   runtime filters: RF000 -> b.id
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+07:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
+|
+06:MERGING-EXCHANGE [UNPARTITIONED]
+|  order by: string_col ASC
+|  limit: 10
+|
+03:TOP-N [LIMIT=10]
+|  order by: string_col ASC
+|
+02:HASH JOIN [INNER JOIN, PARTITIONED]
+|  hash predicates: b.id = a.id
+|  runtime filters: RF000 <- a.id
+|
+|--05:EXCHANGE [HASH(a.id)]
+|  |
+|  00:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|
+04:EXCHANGE [HASH(b.id)]
+|
+01:SCAN HDFS [functional.alltypes b]
+   partitions=24/24 files=24 size=478.45KB
+   runtime filters: RF000 -> b.id
+====
+# IMPALA-4166: sort columns with a join and agg
+insert into table test_sort_by.t partition(year, month) /*+ noclustered */
+select a.id, max(b.int_col), min(a.bool_col), b.year, a.month
+from functional.alltypes a join functional.alltypes b on a.id = b.id
+group by a.id, b.year, a.month
+---- PLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, 
PARTITION-KEYS=(b.year,a.month)]
+|  partitions=24
+|
+04:SORT
+|  order by: b.year ASC NULLS LAST, a.month ASC NULLS LAST, max(b.int_col) ASC 
NULLS LAST, min(a.bool_col) ASC NULLS LAST
+|
+03:AGGREGATE [FINALIZE]
+|  output: max(b.int_col), min(a.bool_col)
+|  group by: a.id, b.year, a.month
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: b.id = a.id
+|  runtime filters: RF000 <- a.id
+|
+|--00:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|
+01:SCAN HDFS [functional.alltypes b]
+   partitions=24/24 files=24 size=478.45KB
+   runtime filters: RF000 -> b.id
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, 
PARTITION-KEYS=(b.year,a.month)]
+|  partitions=24
+|
+09:SORT
+|  order by: b.year ASC NULLS LAST, a.month ASC NULLS LAST, max(b.int_col) ASC 
NULLS LAST, min(a.bool_col) ASC NULLS LAST
+|
+08:EXCHANGE [HASH(b.year,a.month)]
+|
+07:AGGREGATE [FINALIZE]
+|  output: max:merge(b.int_col), min:merge(a.bool_col)
+|  group by: a.id, b.year, a.month
+|
+06:EXCHANGE [HASH(a.id,b.year,a.month)]
+|
+03:AGGREGATE [STREAMING]
+|  output: max(b.int_col), min(a.bool_col)
+|  group by: a.id, b.year, a.month
+|
+02:HASH JOIN [INNER JOIN, PARTITIONED]
+|  hash predicates: b.id = a.id
+|  runtime filters: RF000 <- a.id
+|
+|--05:EXCHANGE [HASH(a.id)]
+|  |
+|  00:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|
+04:EXCHANGE [HASH(b.id)]
+|
+01:SCAN HDFS [functional.alltypes b]
+   partitions=24/24 files=24 size=478.45KB
+   runtime filters: RF000 -> b.id
+====
+# Test that using both the sortby hint and sort columns work.
+# TODO: Remove this in IMPALA-5157.
+insert into table test_sort_by.t partition(year, month) /*+ shuffle,sortby(id) 
*/
+select id, int_col, bool_col, year, month from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+01:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+02:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST
+|
+01:EXCHANGE [HASH(year,month)]
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====
+# Test that using both the sortby hint and sort columns work when the column 
lists
+# overlap.
+# TODO: Remove this in IMPALA-5157.
+insert into table test_sort_by.t partition(year, month) /*+ shuffle,sortby(id, 
bool_col) */
+select id, int_col, bool_col, year, month from functional.alltypes
+---- PLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+01:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+---- DISTRIBUTEDPLAN
+WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
+|  partitions=24
+|
+02:SORT
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST, bool_col ASC NULLS LAST
+|
+01:EXCHANGE [HASH(year,month)]
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/insert.test 
b/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
index 5f54f86..e3edae0 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
@@ -714,7 +714,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, 
PARTITION-KEYS=(year,month)
 |  partitions=24
 |
 01:SORT
-|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
@@ -723,7 +723,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, 
PARTITION-KEYS=(year,month)
 |  partitions=24
 |
 02:SORT
-|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
 |
 01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
 |
@@ -739,7 +739,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, 
PARTITION-KEYS=(year,month)
 |  partitions=24
 |
 01:SORT
-|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
@@ -748,7 +748,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, 
PARTITION-KEYS=(year,month)
 |  partitions=24
 |
 01:SORT
-|  order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/alter-table.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/alter-table.test 
b/testdata/workloads/functional-query/queries/QueryTest/alter-table.test
index 69eede9..2b087ff 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/alter-table.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/alter-table.test
@@ -1055,3 +1055,216 @@ select x, a from i1670A_alter_1 order by x, a;
 ---- TYPES
 INT, INT
 ====
+---- QUERY
+create table insert_data (i int, d double, f float, b boolean);
+insert into insert_data values (1, 2, 3, false), (4, 5, 6, true);
+====
+---- QUERY
+create table insert_sorted (i int, d double, f float, b boolean);
+====
+---- QUERY
+# Test setting the sort.columns property
+alter table insert_sorted sort by(i, d);
+describe formatted insert_sorted;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','i,d                 '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test inserting after alter table
+insert into table insert_sorted select i, d, f, b from insert_data;
+---- RUNTIME_PROFILE
+row_regex: .*order by: i ASC NULLS LAST, d ASC NULLS LAST
+====
+---- QUERY
+# Test selection after alter table
+select count(*) from insert_sorted;
+---- RESULTS
+2
+====
+---- QUERY
+# Test altering the sort.columns property
+alter table insert_sorted sort by(b, d, f);
+describe formatted insert_sorted;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','b,d,f               '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test inserting after alter table
+insert into table insert_sorted select i, d, f, b from insert_data;
+---- RUNTIME_PROFILE
+row_regex: .*order by: b ASC NULLS LAST, d ASC NULLS LAST, f ASC NULLS LAST
+====
+---- QUERY
+# Test selection after alter table
+select count(*) from insert_sorted;
+---- RESULTS
+4
+====
+---- QUERY
+# Test renaming a column in the sort by list.
+alter table insert_sorted change d e double;
+describe formatted insert_sorted;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','b,e,f               '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test inserting after alter table
+insert into table insert_sorted select i, d, f, b from insert_data;
+---- RUNTIME_PROFILE
+row_regex: .*order by: b ASC NULLS LAST, d ASC NULLS LAST, f ASC NULLS LAST
+====
+---- QUERY
+# Test selection after alter table
+select count(*) from insert_sorted;
+---- RESULTS
+6
+====
+---- QUERY
+# Test replacing the column list, including a column in the sort by list.
+alter table insert_sorted replace columns (i bigint, e decimal(12,2), f 
boolean);
+describe formatted insert_sorted;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','e,f                 '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test inserting after alter table
+insert into table insert_sorted select i, cast(d as decimal(12,2)), b from 
insert_data;
+---- RUNTIME_PROFILE
+row_regex: .*order by: CAST\(d AS DECIMAL\(12,2\)\) ASC NULLS LAST, b ASC 
NULLS LAST
+====
+---- QUERY
+# Test selection after alter table
+select count(*) from insert_sorted;
+---- RESULTS
+8
+====
+---- QUERY
+# Test dropping a column in the sort by list
+alter table insert_sorted drop column f;
+describe formatted insert_sorted;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','e                   '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test that a dropped column cannot be added as a sort column
+alter table insert_sorted sort by(f);
+---- CATCH
+AnalysisException: Could not find SORT BY column 'f' in table.
+====
+---- QUERY
+# Test that erroneous query didn't change sort columns
+describe formatted insert_sorted;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','e                   '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test inserting after alter table
+insert into table insert_sorted select i, cast(d as decimal(12,2)) from 
insert_data;
+---- RUNTIME_PROFILE
+row_regex: .*order by: CAST\(d AS DECIMAL\(12,2\)\) ASC NULLS LAST
+====
+---- QUERY
+# Test selection after alter table
+select count(*) from insert_sorted;
+---- RESULTS
+10
+====
+---- QUERY
+# Test removing the sort.columns property
+alter table insert_sorted sort by();
+describe formatted insert_sorted;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','                    '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test inserting after alter table
+insert into table insert_sorted select i, cast(d as decimal(12,2)) from 
insert_data;
+---- RUNTIME_PROFILE
+aggregation(SUM, InitialRunsCreated): 0
+====
+---- QUERY
+# Test selection after alter table
+select count(*) from insert_sorted;
+---- RESULTS
+12
+====
+---- QUERY
+create table insert_sorted_partitioned (i int, d double, f float, b boolean) 
partitioned by (p int) sort by (i);
+====
+---- QUERY
+# Test removing all sort columns.
+alter table insert_sorted_partitioned sort by();
+describe formatted insert_sorted_partitioned;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','                    '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test inserting after alter table
+insert into table insert_sorted_partitioned partition (p=1) select i, d, f, b 
from insert_data;
+---- RUNTIME_PROFILE
+aggregation(SUM, InitialRunsCreated): 0
+====
+---- QUERY
+# Test selection after alter table
+select count(*) from insert_sorted_partitioned;
+---- RESULTS
+2
+====
+---- QUERY
+# Re-add a sort column.
+alter table insert_sorted_partitioned sort by(i);
+describe formatted insert_sorted_partitioned;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','i                   '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test inserting after alter table
+insert into table insert_sorted_partitioned partition (p=1) select i, d, f, b 
from insert_data;
+---- RUNTIME_PROFILE
+row_regex: .*order by: i ASC NULLS LAST
+====
+---- QUERY
+# Test selection after alter table
+select count(*) from insert_sorted_partitioned;
+---- RESULTS
+4
+====
+---- QUERY
+# Test dropping the sort column
+alter table insert_sorted_partitioned drop column i;
+describe formatted insert_sorted_partitioned;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','                    '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test inserting after alter table
+insert into table insert_sorted_partitioned partition (p=1) select d, f, b 
from insert_data;
+---- RUNTIME_PROFILE
+aggregation(SUM, InitialRunsCreated): 0
+====
+---- QUERY
+# Test selection after alter table
+select count(*) from insert_sorted_partitioned;
+---- RESULTS
+6
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test
 
b/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test
index 9090d61..f157681 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test
@@ -189,3 +189,13 @@ create table if not exists cast_null_as_int as (select 
cast(null as int) c);
 ---- RESULTS
 'Inserted 1 row(s)'
 ====
+---- QUERY
+# Test adding sort.columns when creating a table from a select query.
+create table sortbytest sort by (int_col, bool_col) as
+select * from functional.alltypessmall;
+describe formatted sortbytest;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','int_col,bool_col    '
+---- TYPES
+STRING,STRING,STRING
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
 
b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
index 313275c..43a7929 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
@@ -168,3 +168,14 @@ STRING, STRING, STRING
 drop table if exists $DATABASE.temp_modern_table
 ---- RESULTS
 ====
+---- QUERY
+# Test adding sort.columns when creating a table like a parquet file.
+create table $DATABASE.sorted_zipcodes_file like parquet
+'$FILESYSTEM_PREFIX/test-warehouse/schemas/zipcode_incomes.parquet'
+sort by (id, zip) stored as textfile;
+describe formatted $DATABASE.sorted_zipcodes_file;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','id,zip              '
+---- TYPES
+STRING,STRING,STRING
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
 
b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
index 946ac3a..0a4df27 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
@@ -213,3 +213,30 @@ drop table like_no_avro_schema
 drop table no_avro_schema
 ---- RESULTS
 ====
+---- QUERY
+# Test setting sort.columns when using create table like.
+create table sortbytest sort by (int_col, bool_col) like functional.alltypes;
+describe formatted sortbytest;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','int_col,bool_col    '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test that sort.columns will be inherited from the source table.
+create table sortbytest_clone like sortbytest;
+describe formatted sortbytest_clone;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','int_col,bool_col    '
+---- TYPES
+STRING,STRING,STRING
+====
+---- QUERY
+# Test that sort.columns can be overridden in the query.
+create table sortbytest_override sort by (id, string_col) like sortbytest;
+describe formatted sortbytest_override;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','id,string_col       '
+---- TYPES
+STRING,STRING,STRING
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/create-table.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/create-table.test 
b/testdata/workloads/functional-query/queries/QueryTest/create-table.test
index e66fb6f..2661394 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/create-table.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table.test
@@ -274,3 +274,12 @@ from $DATABASE.struct_keywords t, t.a, t.m
 ---- TYPES
 INT, STRING, DECIMAL, BOOLEAN, STRING, STRING, INT, TIMESTAMP, INT, INT
 ====
+---- QUERY
+# Make sure that specifying sort columns sets the 'sort.columns' property 
correctly.
+create table sortbytest (i int, d double, b boolean) sort by (d, i);
+describe formatted sortbytest;
+---- RESULTS: VERIFY_IS_SUBSET
+'','sort.columns        ','d,i                 '
+---- TYPES
+STRING,STRING,STRING
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ada9dac/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test 
b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
index 946d229..81b619b 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
@@ -348,3 +348,27 @@ CREATE VIEW show_create_table_test_db.view_with_subquery
 AS SELECT * FROM functional.alltypestiny
 WHERE id IN (SELECT id FROM functional.alltypesagg)
 ====
+---- CREATE_TABLE
+# Simple table with sort columns.
+CREATE TABLE test1 (id INT)
+SORT BY (id)
+STORED AS TEXTFILE
+---- RESULTS
+CREATE TABLE show_create_table_test_db.test1 (id INT)
+SORT BY (id)
+STORED AS TEXTFILE
+LOCATION '$$location_uri$$'
+====
+---- CREATE_TABLE
+# Simple partitioned table with sort columns.
+CREATE TABLE test1 (id INT)
+PARTITIONED BY (x INT, y INT)
+SORT BY (id)
+STORED AS TEXTFILE
+---- RESULTS
+CREATE TABLE show_create_table_test_db.test1 (id INT)
+PARTITIONED BY (x INT, y INT)
+SORT BY (id)
+STORED AS TEXTFILE
+LOCATION '$$location_uri$$'
+====


Reply via email to