[4/7] incubator-impala git commit: IMPALA-5144: Remove sortby() hint

tarmstrong Mon, 22 May 2017 08:04:37 -0700

IMPALA-5144: Remove sortby() hint

The sortby() hint is superseded by the SORT BY SQL clause, which has
been introduced in IMPALA-4166. This changes removes the hint.


Change-Id: I83e1cd6fa7039035973676322deefbce00d3f594
Reviewed-on: http://gerrit.cloudera.org:8080/6885
Reviewed-by: Lars Volker <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0c8b2d3d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0c8b2d3d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0c8b2d3d

Branch: refs/heads/master
Commit: 0c8b2d3dbeff6440cdb0223680f77f8daa10a4a7
Parents: ee9fbec
Author: Lars Volker <[email protected]>
Authored: Sat May 13 20:27:50 2017 +0200
Committer: Impala Public Jenkins <[email protected]>
Committed: Mon May 22 00:40:04 2017 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-table-sink.h                   |  4 +-
 common/thrift/DataSinks.thrift                  |  4 +-
 .../org/apache/impala/analysis/InsertStmt.java  | 49 +------------
 .../apache/impala/planner/HdfsTableSink.java    |  4 +-
 .../java/org/apache/impala/planner/Planner.java | 17 ++---
 .../org/apache/impala/planner/TableSink.java    |  6 +-
 .../apache/impala/analysis/AnalyzeDDLTest.java  |  4 +-
 .../impala/analysis/AnalyzeStmtsTest.java       | 33 ---------
 .../org/apache/impala/analysis/ParserTest.java  | 37 +++++-----
 .../org/apache/impala/analysis/ToSqlTest.java   | 12 +++-
 .../queries/PlannerTest/insert-sort-by.test     | 51 --------------
 .../queries/PlannerTest/insert.test             | 73 --------------------
 .../queries/QueryTest/insert.test               | 26 -------
 tests/query_test/test_insert_parquet.py         | 21 +++---
 14 files changed, 62 insertions(+), 279 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/be/src/exec/hdfs-table-sink.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-table-sink.h b/be/src/exec/hdfs-table-sink.h
index f6db868..b928a20 100644
--- a/be/src/exec/hdfs-table-sink.h
+++ b/be/src/exec/hdfs-table-sink.h
@@ -276,8 +276,8 @@ class HdfsTableSink : public DataSink {
   bool input_is_clustered_;
 
   // Stores the indices into the list of non-clustering columns of the target 
table that
-  // are mentioned in the 'sortby()' hint. This is used in the backend to 
populate the
-  // RowGroup::sorting_columns list in parquet files.
+  // are stored in the 'sort.columns' table property. This is used in the 
backend to
+  // populate the RowGroup::sorting_columns list in parquet files.
   const std::vector<int32_t>& sort_columns_;
 
   /// Stores the current partition during clustered inserts across subsequent 
row batches.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/common/thrift/DataSinks.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/DataSinks.thrift b/common/thrift/DataSinks.thrift
index cce7971..1b26c92 100644
--- a/common/thrift/DataSinks.thrift
+++ b/common/thrift/DataSinks.thrift
@@ -72,8 +72,8 @@ struct THdfsTableSink {
   4: required bool input_is_clustered
 
   // Stores the indices into the list of non-clustering columns of the target 
table that
-  // are mentioned in the 'sortby()' hint. This is used in the backend to 
populate the
-  // RowGroup::sorting_columns list in parquet files.
+  // are stored in the 'sort.columns' table property. This is used in the 
backend to
+  // populate the RowGroup::sorting_columns list in parquet files.
   5: optional list<i32> sort_columns
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
index 412586c..c25f484 100644
--- a/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
@@ -136,7 +136,7 @@ public class InsertStmt extends StatementBase {
   private boolean hasNoClusteredHint_ = false;
 
   // For every column of the target table that is referenced in the optional
-  // 'sort.columns' table property or in the optional 'sortby()' hint, this 
list will
+  // 'sort.columns' table property, this list will
   // contain the corresponding result expr from 'resultExprs_'. Before 
insertion, all rows
   // will be sorted by these exprs. If the list is empty, no additional 
sorting by
   // non-partitioning columns will be performed. The column list must not 
contain
@@ -144,11 +144,8 @@ public class InsertStmt extends StatementBase {
   private List<Expr> sortExprs_ = Lists.newArrayList();
 
   // Stores the indices into the list of non-clustering columns of the target 
table that
-  // are mentioned in the 'sort.columns' table property or the 'sortby()' 
hint. This is
+  // are mentioned in the 'sort.columns' table property. This is
   // sent to the backend to populate the RowGroup::sorting_columns list in 
parquet files.
-  // Sort columns supersede the sortby() hint, which we will remove in a 
subsequent change
-  // (IMPALA-5144). Until then, it is possible to specify sort columns using 
both ways at
-  // the same time and the column lists will be concatenated.
   private List<Integer> sortColumns_ = Lists.newArrayList();
 
   // Output expressions that produce the final results to write to the target 
table. May
@@ -820,9 +817,6 @@ public class InsertStmt extends StatementBase {
       } else if (hint.is("NOCLUSTERED")) {
         hasNoClusteredHint_ = true;
         analyzer.setHasPlanHints();
-      } else if (hint.is("SORTBY")) {
-        analyzeSortByHint(hint);
-        analyzer.setHasPlanHints();
       } else {
         analyzer.addWarning("INSERT hint not recognized: " + hint);
       }
@@ -836,45 +830,6 @@ public class InsertStmt extends StatementBase {
     }
   }
 
-  private void analyzeSortByHint(PlanHint hint) throws AnalysisException {
-    // HBase and Kudu tables don't support insert hints at all (must be 
enforced by the caller).
-    Preconditions.checkState(!(table_ instanceof HBaseTable || table_ 
instanceof KuduTable));
-
-    List<String> columnNames = hint.getArgs();
-    Preconditions.checkState(!columnNames.isEmpty());
-    for (String columnName: columnNames) {
-      // Make sure it's not an Hdfs partition column.
-      for (Column tableColumn: table_.getClusteringColumns()) {
-        if (tableColumn.getName().equals(columnName)) {
-          throw new AnalysisException(String.format("SORTBY hint column list 
must " +
-              "not contain Hdfs partition column: '%s'", columnName));
-        }
-      }
-
-      // Find the matching column in the target table's column list (by name) 
and store
-      // the corresponding result expr in sortExprs_.
-      boolean foundColumn = false;
-      List<Column> columns = table_.getNonClusteringColumns();
-      if (!columns.isEmpty()) {
-        // We need to make a copy to make the sortColumns_ list mutable.
-        // TODO: Remove this when removing the sortby() hint (IMPALA-5157).
-        sortColumns_ = Lists.newArrayList(sortColumns_);
-      }
-      for (int i = 0; i < columns.size(); ++i) {
-        if (columns.get(i).getName().equals(columnName)) {
-          sortExprs_.add(resultExprs_.get(i));
-          sortColumns_.add(i);
-          foundColumn = true;
-          break;
-        }
-      }
-      if (!foundColumn) {
-        throw new AnalysisException(String.format("Could not find SORTBY hint 
column " +
-            "'%s' in table.", columnName));
-      }
-    }
-  }
-
   @Override
   public ArrayList<Expr> getResultExprs() { return resultExprs_; }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/fe/src/main/java/org/apache/impala/planner/HdfsTableSink.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsTableSink.java 
b/fe/src/main/java/org/apache/impala/planner/HdfsTableSink.java
index 996388f..5ff8a1b 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsTableSink.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsTableSink.java
@@ -53,8 +53,8 @@ public class HdfsTableSink extends TableSink {
   protected final boolean inputIsClustered_;
 
   // Stores the indices into the list of non-clustering columns of the target 
table that
-  // are mentioned in the 'sortby()' hint. This is sent to the backend to 
populate the
-  // RowGroup::sorting_columns list in parquet files.
+  // are stored in the 'sort.columns' table property. This is sent to the 
backend to
+  // populate the RowGroup::sorting_columns list in parquet files.
   private List<Integer> sortColumns_ = Lists.newArrayList();
 
   public HdfsTableSink(Table targetTable, List<Expr> partitionKeyExprs,

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/fe/src/main/java/org/apache/impala/planner/Planner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java 
b/fe/src/main/java/org/apache/impala/planner/Planner.java
index d6b0814..b6fa54a 100644
--- a/fe/src/main/java/org/apache/impala/planner/Planner.java
+++ b/fe/src/main/java/org/apache/impala/planner/Planner.java
@@ -484,14 +484,15 @@ public class Planner {
   }
 
   /**
-   * Insert a sort node on top of the plan, depending on the 
clustered/noclustered/sortby
-   * plan hint. If clustering is enabled in insertStmt or additional columns 
are specified
-   * in the 'sort.columns' table property, then the ordering columns will 
start with the
-   * clustering columns (key columns for Kudu tables), so that partitions can 
be written
-   * sequentially in the table sink. Any additional non-clustering columns 
specified by
-   * the 'sort.columns' property will be added to the ordering columns and 
after any
-   * clustering columns. If no clustering is requested and the table does not 
contain
-   * columns in the 'sort.columns' property, then no sort node will be added 
to the plan.
+   * Insert a sort node on top of the plan, depending on the 
clustered/noclustered
+   * plan hint and on the 'sort.columns' table property. If clustering is 
enabled in
+   * insertStmt or additional columns are specified in the 'sort.columns' 
table property,
+   * then the ordering columns will start with the clustering columns (key 
columns for
+   * Kudu tables), so that partitions can be written sequentially in the table 
sink. Any
+   * additional non-clustering columns specified by the 'sort.columns' 
property will be
+   * added to the ordering columns and after any clustering columns. If no 
clustering is
+   * requested and the table does not contain columns in the 'sort.columns' 
property, then
+   * no sort node will be added to the plan.
    */
   public void createPreInsertSort(InsertStmt insertStmt, PlanFragment 
inputFragment,
        Analyzer analyzer) throws ImpalaException {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/fe/src/main/java/org/apache/impala/planner/TableSink.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/TableSink.java 
b/fe/src/main/java/org/apache/impala/planner/TableSink.java
index 1c27360..4c4b35e 100644
--- a/fe/src/main/java/org/apache/impala/planner/TableSink.java
+++ b/fe/src/main/java/org/apache/impala/planner/TableSink.java
@@ -88,7 +88,7 @@ public abstract class TableSink extends DataSink {
    * All parameters must be non-null, the lists in particular need to be empty 
if they
    * don't make sense for a certain table type.
    * For HDFS tables 'sortColumns' specifies the indices into the list of 
non-clustering
-   * columns of the target table that are mentioned in the 'sortby()' hint.
+   * columns of the target table that are stored in the 'sort.columns' table 
property.
    */
   public static TableSink create(Table table, Op sinkAction,
       List<Expr> partitionKeyExprs,  List<Integer> referencedColumns,
@@ -112,14 +112,14 @@ public abstract class TableSink extends DataSink {
       Preconditions.checkState(overwrite == false);
       // Referenced columns don't make sense for an HBase table.
       Preconditions.checkState(referencedColumns.isEmpty());
-      // sortby() hint is not supported for HBase tables.
+      // Sort columns are not supported for HBase tables.
       Preconditions.checkState(sortColumns.isEmpty());
       // Create the HBaseTableSink and return it.
       return new HBaseTableSink(table);
     } else if (table instanceof KuduTable) {
       // Kudu doesn't have a way to perform INSERT OVERWRITE.
       Preconditions.checkState(overwrite == false);
-      // sortby() hint is not supported for Kudu tables.
+      // Sort columns are not supported for Kudu tables.
       Preconditions.checkState(sortColumns.isEmpty());
       return new KuduTableSink(table, sinkAction, referencedColumns);
     } else {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 0858774..0fe60a3 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -1952,14 +1952,14 @@ public class AnalyzeDDLTest extends FrontendTestBase {
         "tblproperties ('sort.columns'='i')", "Table definition must not 
contain the " +
         "sort.columns table property. Use SORT BY (...) instead.");
 
-    // Column in sortby hint must exist.
+    // Column in sort by list must exist.
     AnalysisError("create table functional.new_table (i int) sort by (j)", 
"Could not " +
         "find SORT BY column 'j' in table.");
 
     // Partitioned HDFS table
     AnalyzesOk("create table functional.new_table (i int) PARTITIONED BY (d 
decimal)" +
         "SORT BY (i)");
-    // Column in sortby hint must not be a Hdfs partition column.
+    // Column in sort by list must not be a Hdfs partition column.
     AnalysisError("create table functional.new_table (i int) PARTITIONED BY (d 
decimal)" +
         "SORT BY (d)", "SORT BY column list must not contain partition column: 
'd'");
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index e4a688c..35acda9 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -1779,39 +1779,6 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
           "functional.alltypes", prefix, suffix),
           "Insert statement has 'noclustered' hint, but table has 
'sort.columns' " +
           "property. The 'noclustered' hint will be ignored.");
-
-
-      // Below are tests for hints that are not supported by the legacy syntax.
-      if (prefix == "[") continue;
-
-      // Tests for sortby hint
-      AnalyzesOk(String.format("insert into functional.alltypessmall " +
-          "partition (year, month) %ssortby(int_col)%s select * from 
functional.alltypes",
-          prefix, suffix));
-      AnalyzesOk(String.format("insert into functional.alltypessmall " +
-          "partition (year, month) %sshuffle,clustered,sortby(int_col)%s 
select * from " +
-          "functional.alltypes", prefix, suffix));
-      AnalyzesOk(String.format("insert into functional.alltypessmall " +
-          "partition (year, month) %ssortby(int_col, bool_col)%s select * from 
" +
-          "functional.alltypes", prefix, suffix));
-      AnalyzesOk(String.format("insert into functional.alltypessmall " +
-          "partition (year, month) 
%sshuffle,clustered,sortby(int_col,bool_col)%s " +
-          "select * from functional.alltypes", prefix, suffix));
-      AnalyzesOk(String.format("insert into functional.alltypessmall " +
-          "partition (year, month) 
%sshuffle,sortby(int_col,bool_col),clustered%s " +
-          "select * from functional.alltypes", prefix, suffix));
-      AnalyzesOk(String.format("insert into functional.alltypessmall " +
-          "partition (year, month) 
%ssortby(int_col,bool_col),shuffle,clustered%s " +
-          "select * from functional.alltypes", prefix, suffix));
-      // Column in sortby hint must exist.
-      AnalysisError(String.format("insert into functional.alltypessmall " +
-          "partition (year, month) %ssortby(foo)%s select * from 
functional.alltypes",
-          prefix, suffix), "Could not find SORTBY hint column 'foo' in 
table.");
-      // Column in sortby hint must not be a Hdfs partition column.
-      AnalysisError(String.format("insert into functional.alltypessmall " +
-          "partition (year, month) %ssortby(year)%s select * from " +
-          "functional.alltypes", prefix, suffix),
-          "SORTBY hint column list must not contain Hdfs partition column: 
'year'");
     }
 
     // Multiple non-conflicting hints and case insensitivity of hints.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/ParserTest.java 
b/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
index d35b9b3..9fb1e2a 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
@@ -209,13 +209,14 @@ public class ParserTest extends FrontendTestBase {
     ParsesOk("/* select 1; */ select 1");
     ParsesOk("/** select 1; */ select 1");
     ParsesOk("/* select */ select 1 /* 1 */");
-    ParsesOk("select 1 /* sortby(() */");
-    // Empty columns list in sortby hint
-    ParserError("select 1 /*+ sortby() */");
+    // Test hint with arguments
+    ParsesOk("select 1 /* hint_with_args(() */");
+    // Empty argument list in hint_with_args hint is not allowed
+    ParserError("select 1 /*+ hint_with_args() */");
     // Mismatching parentheses
-    ParserError("select 1 /*+ sortby(() */");
-    ParserError("select 1 /*+ sortby(a) \n");
-    ParserError("select 1 --+ sortby(a) */\n from t");
+    ParserError("select 1 /*+ hint_with_args(() */");
+    ParserError("select 1 /*+ hint_with_args(a) \n");
+    ParserError("select 1 --+ hint_with_args(a) */\n from t");
   }
 
   /**
@@ -237,9 +238,10 @@ public class ParserTest extends FrontendTestBase {
     ParserError("-- baz /*\nselect 1*/");
     ParsesOk("select -- blah\n 1");
     ParsesOk("select -- select 1\n 1");
-    ParsesOk("select 1 -- sortby(()");
+    // Test hint with arguments
+    ParsesOk("select 1 -- hint_with_args(()");
     // Mismatching parentheses
-    ParserError("select 1 -- +sortby(()\n");
+    ParserError("select 1 -- +hint_with_args(()\n");
   }
 
   /**
@@ -460,19 +462,20 @@ public class ParserTest extends FrontendTestBase {
 
       // Tests for hints with arguments.
       TestInsertHints(String.format(
-          "insert into t %ssortby(a)%s select * from t", prefix, suffix),
-          "sortby(a)");
+          "insert into t %shint_with_args(a)%s select * from t", prefix, 
suffix),
+          "hint_with_args(a)");
       TestInsertHints(String.format(
-          "insert into t %sclustered,shuffle,sortby(a)%s select * from t", 
prefix,
-          suffix), "clustered", "shuffle", "sortby(a)");
+          "insert into t %sclustered,shuffle,hint_with_args(a)%s select * from 
t", prefix,
+          suffix), "clustered", "shuffle", "hint_with_args(a)");
       TestInsertHints(String.format(
-          "insert into t %ssortby(a,b)%s select * from t", prefix, suffix),
-          "sortby(a,b)");
+          "insert into t %shint_with_args(a,b)%s select * from t", prefix, 
suffix),
+          "hint_with_args(a,b)");
       TestInsertHints(String.format(
-          "insert into t %ssortby(a  , b)%s select * from t", prefix, suffix),
-          "sortby(a,b)");
+          "insert into t %shint_with_args(a  , b)%s select * from t", prefix, 
suffix),
+          "hint_with_args(a,b)");
       ParserError(String.format(
-          "insert into t %ssortby(  a  ,  , ,,, b  )%s select * from t", 
prefix, suffix));
+          "insert into t %shint_with_args(  a  ,  , ,,, b  )%s select * from 
t",
+          prefix, suffix));
     }
     // No "+" at the beginning so the comment is not recognized as a hint.
     TestJoinHints("select * from functional.alltypes a join /* comment */" +

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java 
b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
index 26def5b..cd14d25 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
@@ -505,11 +505,19 @@ public class ToSqlTest extends FrontendTestBase {
       // Insert hint.
       testToSql(String.format(
           "insert into functional.alltypes(int_col, bool_col) " +
-          "partition(year, month) %snoshuffle,sortby(int_col)%s " +
+          "partition(year, month) %snoshuffle%s " +
           "select int_col, bool_col, year, month from functional.alltypes",
           prefix, suffix),
           "INSERT INTO TABLE functional.alltypes(int_col, bool_col) " +
-              "PARTITION (year, month) \n-- +noshuffle,sortby(int_col)\n " +
+              "PARTITION (year, month) \n-- +noshuffle\n " +
+          "SELECT int_col, bool_col, year, month FROM functional.alltypes");
+      testToSql(String.format(
+          "insert into functional.alltypes(int_col, bool_col) " +
+          "partition(year, month) %sshuffle,clustered%s " +
+          "select int_col, bool_col, year, month from functional.alltypes",
+          prefix, suffix),
+          "INSERT INTO TABLE functional.alltypes(int_col, bool_col) " +
+              "PARTITION (year, month) \n-- +shuffle,clustered\n " +
           "SELECT int_col, bool_col, year, month FROM functional.alltypes");
 
       // Table hint

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test 
b/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test
index 2a9d272..f2ecb2c 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/insert-sort-by.test
@@ -403,54 +403,3 @@ WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
    partitions=1/0 files=0 size=0B
    runtime filters: RF000 -> id
 ====
-# Test that using both the sortby hint and sort columns work.
-# TODO: Remove this in IMPALA-5157.
-insert into table test_sort_by.t partition(year, month) /*+ shuffle,sortby(id) 
*/
-select id, int_col, bool_col, year, month from functional.alltypes
----- PLAN
-WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
-|  partitions=24
-|
-01:SORT
-|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST
-|
-00:SCAN HDFS [functional.alltypes]
-   partitions=24/24 files=24 size=478.45KB
----- DISTRIBUTEDPLAN
-WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
-|  partitions=24
-|
-02:SORT
-|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST
-|
-01:EXCHANGE [HASH(year,month)]
-|
-00:SCAN HDFS [functional.alltypes]
-   partitions=24/24 files=24 size=478.45KB
-====
-# Test that using both the sortby hint and sort columns work when the column 
lists
-# overlap.
-# TODO: Remove this in IMPALA-5157.
-insert into table test_sort_by.t partition(year, month) /*+ shuffle,sortby(id, 
bool_col) */
-select id, int_col, bool_col, year, month from functional.alltypes
----- PLAN
-WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
-|  partitions=24
-|
-01:SORT
-|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST, bool_col ASC NULLS LAST
-|
-00:SCAN HDFS [functional.alltypes]
-   partitions=24/24 files=24 size=478.45KB
----- DISTRIBUTEDPLAN
-WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
-|  partitions=24
-|
-02:SORT
-|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST, id ASC NULLS LAST, bool_col ASC NULLS LAST
-|
-01:EXCHANGE [HASH(year,month)]
-|
-00:SCAN HDFS [functional.alltypes]
-   partitions=24/24 files=24 size=478.45KB
-====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/insert.test 
b/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
index e3edae0..bdbc92f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
@@ -705,76 +705,3 @@ WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
 00:SCAN HDFS [functional.alltypesnopart]
    partitions=1/1 files=0 size=0B
 ====
-# IMPALA-4163: sortby hint in insert statement adds sort node.
-insert into table functional.alltypes partition(year, month)
-       /*+ sortby(int_col, bool_col),shuffle */
-       select * from functional.alltypes
----- PLAN
-WRITE TO HDFS [functional.alltypes, OVERWRITE=false, 
PARTITION-KEYS=(year,month)]
-|  partitions=24
-|
-01:SORT
-|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
-|
-00:SCAN HDFS [functional.alltypes]
-   partitions=24/24 files=24 size=478.45KB
----- DISTRIBUTEDPLAN
-WRITE TO HDFS [functional.alltypes, OVERWRITE=false, 
PARTITION-KEYS=(year,month)]
-|  partitions=24
-|
-02:SORT
-|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
-|
-01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
-|
-00:SCAN HDFS [functional.alltypes]
-   partitions=24/24 files=24 size=478.45KB
-====
-# IMPALA-4163: sortby hint in insert statement with noshuffle adds sort node.
-insert into table functional.alltypes partition(year, month)
-       /*+ sortby(int_col, bool_col),noshuffle */
-       select * from functional.alltypes
----- PLAN
-WRITE TO HDFS [functional.alltypes, OVERWRITE=false, 
PARTITION-KEYS=(year,month)]
-|  partitions=24
-|
-01:SORT
-|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
-|
-00:SCAN HDFS [functional.alltypes]
-   partitions=24/24 files=24 size=478.45KB
----- DISTRIBUTEDPLAN
-WRITE TO HDFS [functional.alltypes, OVERWRITE=false, 
PARTITION-KEYS=(year,month)]
-|  partitions=24
-|
-01:SORT
-|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
-|
-00:SCAN HDFS [functional.alltypes]
-   partitions=24/24 files=24 size=478.45KB
-====
-# IMPALA-4163: sortby hint in insert statement adds ordering columns to 
clustering sort.
-insert into table functional.alltypes partition(year, month)
-       /*+ sortby(int_col, bool_col),clustered */
-       select * from functional.alltypes
----- PLAN
-WRITE TO HDFS [functional.alltypes, OVERWRITE=false, 
PARTITION-KEYS=(year,month)]
-|  partitions=24
-|
-01:SORT
-|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
-|
-00:SCAN HDFS [functional.alltypes]
-   partitions=24/24 files=24 size=478.45KB
----- DISTRIBUTEDPLAN
-WRITE TO HDFS [functional.alltypes, OVERWRITE=false, 
PARTITION-KEYS=(year,month)]
-|  partitions=24
-|
-02:SORT
-|  order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS 
LAST, bool_col ASC NULLS LAST
-|
-01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
-|
-00:SCAN HDFS [functional.alltypes]
-   partitions=24/24 files=24 size=478.45KB
-====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/testdata/workloads/functional-query/queries/QueryTest/insert.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/insert.test 
b/testdata/workloads/functional-query/queries/QueryTest/insert.test
index 9dfaf34..5601b35 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/insert.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/insert.test
@@ -945,29 +945,3 @@ RESET alltypesnopart_insert
 ---- RESULTS
 : 100
 ====
----- QUERY
-# IMPALA-4163: insert into table sortby() plan hint
-insert into table alltypesinsert
-partition (year, month) /*+ clustered,shuffle,sortby(int_col, bool_col) */
-select * from alltypestiny;
----- SETUP
-DROP PARTITIONS alltypesinsert
-RESET alltypesinsert
----- RESULTS
-year=2009/month=1/: 2
-year=2009/month=2/: 2
-year=2009/month=3/: 2
-year=2009/month=4/: 2
-====
----- QUERY
-# IMPALA-4163: insert into table sortby() plan hint
-insert into table alltypesnopart_insert
-/*+ clustered,shuffle,sortby(int_col, bool_col) */
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col,
-double_col, date_string_col, string_col, timestamp_col from alltypestiny;
----- SETUP
-DROP PARTITIONS alltypesinsert
-RESET alltypesinsert
----- RESULTS
-: 8
-====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0c8b2d3d/tests/query_test/test_insert_parquet.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_insert_parquet.py 
b/tests/query_test/test_insert_parquet.py
index 4477786..9d9eb7b 100644
--- a/tests/query_test/test_insert_parquet.py
+++ b/tests/query_test/test_insert_parquet.py
@@ -232,8 +232,8 @@ class TestHdfsParquetTableWriter(ImpalaTestSuite):
       rmtree(tmp_dir)
 
   def test_sorting_columns(self, vector, unique_database, tmpdir):
-    """Tests that RowGroup::sorting_columns gets populated when specifying a 
sortby()
-    insert hint."""
+    """Tests that RowGroup::sorting_columns gets populated when the table has 
SORT BY
+    columns."""
     source_table = "functional_parquet.alltypessmall"
     target_table = "test_write_sorting_columns"
     qualified_target_table = "{0}.{1}".format(unique_database, target_table)
@@ -241,14 +241,13 @@ class TestHdfsParquetTableWriter(ImpalaTestSuite):
         target_table))
 
     # Create table
-    # TODO: Simplify once IMPALA-4167 (insert hints in CTAS) has been fixed.
-    query = "create table {0} like {1} stored as 
parquet".format(qualified_target_table,
-        source_table)
+    query = "create table {0} sort by (int_col, id) like {1} stored as 
parquet".format(
+        qualified_target_table, source_table)
     self.execute_query(query)
 
     # Insert data
-    query = ("insert into {0} partition(year, month) /* +sortby(int_col, id) 
*/ "
-        "select * from {1}").format(qualified_target_table, source_table)
+    query = ("insert into {0} partition(year, month) select * from 
{1}").format(
+        qualified_target_table, source_table)
     self.execute_query(query)
 
     # Download hdfs files and extract rowgroup metadata
@@ -540,7 +539,7 @@ class TestHdfsParquetTableStatsWriter(ImpalaTestSuite):
 
   def test_write_statistics_multiple_row_groups(self, vector, unique_database):
     """Test that writing multiple row groups works as expected. This is done 
by inserting
-    into a table using the sortby() hint and then making sure that the min and 
max values
+    into a table using the SORT BY clause and then making sure that the min 
and max values
     of row groups don't overlap."""
     source_table = "tpch_parquet.orders"
     target_table = "test_hdfs_parquet_table_writer"
@@ -551,10 +550,10 @@ class TestHdfsParquetTableStatsWriter(ImpalaTestSuite):
     # Insert a large amount of data on a single backend with a limited parquet 
file size.
     # This will result in several files being written, exercising code that 
tracks
     # statistics for row groups.
-    query = "create table {0} like {1} stored as 
parquet".format(qualified_target_table,
-                                                                 source_table)
+    query = "create table {0} sort by (o_orderkey) like {1} stored as 
parquet".format(
+        qualified_target_table, source_table)
     self.execute_query(query, vector.get_value('exec_option'))
-    query = ("insert into {0} /* +sortby(o_orderkey) */ select * from 
{1}").format(
+    query = ("insert into {0} select * from {1}").format(
         qualified_target_table, source_table)
     vector.get_value('exec_option')['num_nodes'] = 1
     vector.get_value('exec_option')['parquet_file_size'] = 8 * 1024 * 1024

[4/7] incubator-impala git commit: IMPALA-5144: Remove sortby() hint

Reply via email to