hive git commit: HIVE-10741 : count distinct rewrite is not firing

hashutosh Wed, 20 May 2015 15:05:48 -0700

Repository: hive
Updated Branches:
  refs/heads/master 7a35c75bc -> cf72246f9



HIVE-10741 : count distinct rewrite is not firing

Signed-off-by: Ashutosh Chauhan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cf72246f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cf72246f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cf72246f

Branch: refs/heads/master
Commit: cf72246f924859a7d4e89fc7462daf301944fb04
Parents: 7a35c75
Author: Ashutosh Chauhan <[email protected]>
Authored: Mon May 18 11:11:51 2015 -0700
Committer: Ashutosh Chauhan <[email protected]>
Committed: Wed May 20 15:00:09 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  8 ++--
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  | 28 ++++++-------
 .../clientpositive/spark/auto_join32.q.out      | 44 ++++++++++++--------
 .../results/clientpositive/spark/count.q.out    | 10 ++---
 .../results/clientpositive/spark/groupby2.q.out |  8 ++--
 .../results/clientpositive/spark/groupby3.q.out |  8 ++--
 .../clientpositive/spark/groupby3_map.q.out     |  8 ++--
 .../spark/groupby3_map_multi_distinct.q.out     |  8 ++--
 .../spark/groupby3_map_skew.q.out               |  8 ++--
 .../clientpositive/spark/groupby3_noskew.q.out  |  6 +--
 .../spark/groupby3_noskew_multi_distinct.q.out  |  6 +--
 .../clientpositive/spark/groupby_map_ppr.q.out  |  8 ++--
 .../spark/groupby_map_ppr_multi_distinct.q.out  |  8 ++--
 .../clientpositive/spark/groupby_ppr.q.out      |  8 ++--
 .../clientpositive/spark/limit_pushdown.q.out   | 12 +++---
 .../spark/vector_count_distinct.q.out           |  6 +--
 16 files changed, 96 insertions(+), 88 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index c412561..4760a22 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -379,7 +379,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
     }
     // Now check QB in more detail. canHandleQbForCbo returns null if query can
     // be handled.
-    String msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, 
needToLogMessage);
+    String msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, 
needToLogMessage, qb);
     if (msg == null) {
       return true;
     }
@@ -408,11 +408,11 @@ public class CalcitePlanner extends SemanticAnalyzer {
    *         2. Nested Subquery will return false for qbToChk.getIsQuery()
    */
   static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf 
conf,
-      boolean topLevelQB, boolean verbose) {
+      boolean topLevelQB, boolean verbose, QB qb) {
     boolean isInTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST);
     boolean isStrictTest = isInTest
         && !conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict");
-    boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 
1) || isInTest;
+    boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 
1) || isInTest || distinctExprsExists(qb);
 
     if (!isStrictTest && hasEnoughJoins && !queryProperties.hasClusterBy()
         && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy()
@@ -2711,7 +2711,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       // 0. Check if we can handle the SubQuery;
       // canHandleQbForCbo returns null if the query can be handled.
-      String reason = canHandleQbForCbo(queryProperties, conf, false, 
LOG.isDebugEnabled());
+      String reason = canHandleQbForCbo(queryProperties, conf, false, 
LOG.isDebugEnabled(), qb);
       if (reason != null) {
         String msg = "CBO can not handle Sub Query";
         if (LOG.isDebugEnabled()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 30c87ad..086d9a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -233,7 +233,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner;
   private HashMap<TableScanOperator, PrunedPartitionList> opToPartList;
   protected HashMap<String, Operator<? extends OperatorDesc>> topOps;
-  private HashMap<String, Operator<? extends OperatorDesc>> topSelOps;
+  private final HashMap<String, Operator<? extends OperatorDesc>> topSelOps;
   protected LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> 
opParseCtx;
   private List<LoadTableDesc> loadTableWork;
   private List<LoadFileDesc> loadFileWork;
@@ -294,7 +294,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
   /** Not thread-safe. */
   final ASTSearcher astSearcher = new ASTSearcher();
-  
+
   protected AnalyzeRewriteContext analyzeRewrite;
   private CreateTableDesc tableDesc;
 
@@ -1421,7 +1421,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
   /**
    * This is phase1 of supporting specifying schema in insert statement
    * insert into foo(z,y) select a,b from bar;
-   * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, 
RowResolver, QB, ASTNode) 
+   * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, 
RowResolver, QB, ASTNode)
    * @throws SemanticException
    */
   private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, 
Phase1Ctx ctx_1) throws SemanticException {
@@ -3880,14 +3880,14 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
    * create table source (a int, b int);
    * create table target (x int, y int, z int);
    * insert into target(z,x) select * from source
-   * 
+   *
    * Once the * is resolved to 'a,b', this list needs to rewritten to 
'b,null,a' so that it looks
    * as if the original query was written as
    * insert into target select b, null, a from source
-   * 
+   *
    * if target schema is not specified, this is no-op
-   * 
-   * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx) 
+   *
+   * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
    * @throws SemanticException
    */
   private void handleInsertStatementSpec(List<ExprNodeDesc> col_list, String 
dest,
@@ -3919,7 +3919,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     Table target = qb.getMetaData().getDestTableForAlias(dest);
     Partition partition = target == null ? 
qb.getMetaData().getDestPartitionForAlias(dest) : null;
     if(target == null && partition == null) {
-      throw new SemanticException(generateErrorMessage(selExprList, 
+      throw new SemanticException(generateErrorMessage(selExprList,
         "No table/partition found in QB metadata for dest='" + dest + "'"));
     }
     ArrayList<ExprNodeDesc> new_col_list = new ArrayList<ExprNodeDesc>();
@@ -8581,7 +8581,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     }
     RowResolver outputRR = inputRR.duplicate();
     Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
-        new SelectDesc(colList, columnNames, true), 
+        new SelectDesc(colList, columnNames, true),
         outputRR.getRowSchema(), input), outputRR);
     output.setColumnExprMap(columnExprMap);
     return output;
@@ -8742,7 +8742,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
   }
 
   // see if there are any distinct expressions
-  private boolean distinctExprsExists(QB qb) {
+  protected static boolean distinctExprsExists(QB qb) {
     QBParseInfo qbp = qb.getParseInfo();
 
     TreeSet<String> ks = new TreeSet<String>();
@@ -8997,9 +8997,9 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     if (leftmap.size() != rightmap.size()) {
       throw new SemanticException("Schema of both sides of union should 
match.");
     }
-    
+
     RowResolver unionoutRR = new RowResolver();
-    
+
     Iterator<Map.Entry<String, ColumnInfo>> lIter = 
leftmap.entrySet().iterator();
     Iterator<Map.Entry<String, ColumnInfo>> rIter = 
rightmap.entrySet().iterator();
     while (lIter.hasNext()) {
@@ -9008,7 +9008,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       ColumnInfo lInfo = lEntry.getValue();
       ColumnInfo rInfo = rEntry.getValue();
 
-      String field = lEntry.getKey(); // use left alias (~mysql, postgresql) 
+      String field = lEntry.getKey(); // use left alias (~mysql, postgresql)
       // try widening conversion, otherwise fail union
       TypeInfo commonTypeInfo = 
FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(),
           rInfo.getType());
@@ -9158,7 +9158,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 
     Iterator<ColumnInfo> oIter = origInputFieldMap.values().iterator();
     Iterator<ColumnInfo> uIter = fieldMap.values().iterator();
-    
+
     List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
     boolean needsCast = false;
     while (oIter.hasNext()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/auto_join32.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_join32.q.out 
b/ql/src/test/results/clientpositive/spark/auto_join32.q.out
index c537b95..361a968 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join32.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join32.q.out
@@ -428,30 +428,38 @@ STAGE PLANS:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: s
+                  alias: v
                   Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
                   Filter Operator
-                    predicate: (name is not null and (p = 'bar')) (type: 
boolean)
+                    predicate: ((p = 'bar') and name is not null) (type: 
boolean)
                     Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
-                    Sorted Merge Bucket Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 name (type: string)
-                        1 name (type: string)
-                      outputColumnNames: _col0, _col9
+                    Select Operator
+                      expressions: name (type: string), registration (type: 
string)
+                      outputColumnNames: _col0, _col1
                       Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
-                      Group By Operator
-                        aggregations: count(DISTINCT _col9)
-                        keys: _col0 (type: string), _col9 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0, _col1, _col2
+                      Sorted Merge Bucket Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col1, _col3
                         Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string), _col1 (type: 
string)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: string)
+                        Select Operator
+                          expressions: _col3 (type: string), _col1 (type: 
string)
+                          outputColumnNames: _col0, _col1
                           Statistics: Num rows: 0 Data size: 0 Basic stats: 
NONE Column stats: NONE
+                          Group By Operator
+                            aggregations: count(DISTINCT _col1)
+                            keys: _col0 (type: string), _col1 (type: string)
+                            mode: hash
+                            outputColumnNames: _col0, _col1, _col2
+                            Statistics: Num rows: 0 Data size: 0 Basic stats: 
NONE Column stats: NONE
+                            Reduce Output Operator
+                              key expressions: _col0 (type: string), _col1 
(type: string)
+                              sort order: ++
+                              Map-reduce partition columns: _col0 (type: 
string)
+                              Statistics: Num rows: 0 Data size: 0 Basic 
stats: NONE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/count.q.out 
b/ql/src/test/results/clientpositive/spark/count.q.out
index 6923a5f..cb9eda5 100644
--- a/ql/src/test/results/clientpositive/spark/count.q.out
+++ b/ql/src/test/results/clientpositive/spark/count.q.out
@@ -123,11 +123,11 @@ STAGE PLANS:
                   Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
                   Select Operator
                     expressions: a (type: int), b (type: int), c (type: int), 
d (type: int)
-                    outputColumnNames: a, b, c, d
+                    outputColumnNames: _col1, _col2, _col3, _col4
                     Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(1), count(), count(a), count(b), 
count(c), count(d), count(DISTINCT a), count(DISTINCT b), count(DISTINCT c), 
count(DISTINCT d), count(DISTINCT a, b), count(DISTINCT b, c), count(DISTINCT 
c, d), count(DISTINCT a, d), count(DISTINCT a, c), count(DISTINCT b, d), 
count(DISTINCT a, b, c), count(DISTINCT b, c, d), count(DISTINCT a, c, d), 
count(DISTINCT a, b, d), count(DISTINCT a, b, c, d)
-                      keys: a (type: int), b (type: int), c (type: int), d 
(type: int)
+                      aggregations: count(1), count(), count(_col1), 
count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT 
_col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, 
_col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), 
count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT 
_col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, 
_col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, 
_col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4)
+                      keys: _col1 (type: int), _col2 (type: int), _col3 (type: 
int), _col4 (type: int)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, 
_col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
                       Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
@@ -252,10 +252,10 @@ STAGE PLANS:
                   Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
                   Select Operator
                     expressions: a (type: int), b (type: int), c (type: int), 
d (type: int)
-                    outputColumnNames: a, b, c, d
+                    outputColumnNames: _col1, _col2, _col3, _col4
                     Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
-                      key expressions: a (type: int), b (type: int), c (type: 
int), d (type: int)
+                      key expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int)
                       sort order: ++++
                       Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby2.q.out 
b/ql/src/test/results/clientpositive/spark/groupby2.q.out
index f6be571..f9e3459 100644
--- a/ql/src/test/results/clientpositive/spark/groupby2.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby2.q.out
@@ -32,13 +32,13 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: key (type: string), value (type: string)
-                    outputColumnNames: key, value
+                    expressions: substr(key, 1, 1) (type: string), 
substr(value, 5) (type: string)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
-                      key expressions: substr(key, 1, 1) (type: string), 
substr(value, 5) (type: string)
+                      key expressions: _col0 (type: string), _col1 (type: 
string)
                       sort order: ++
-                      Map-reduce partition columns: substr(key, 1, 1) (type: 
string)
+                      Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby3.q.out 
b/ql/src/test/results/clientpositive/spark/groupby3.q.out
index af63c0e..e48018c 100644
--- a/ql/src/test/results/clientpositive/spark/groupby3.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby3.q.out
@@ -51,13 +51,13 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: value (type: string)
-                    outputColumnNames: value
+                    expressions: substr(value, 5) (type: string)
+                    outputColumnNames: _col0
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
-                      key expressions: substr(value, 5) (type: string)
+                      key expressions: _col0 (type: string)
                       sort order: +
-                      Map-reduce partition columns: substr(value, 5) (type: 
string)
+                      Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out 
b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out
index 8379fc9..f806303 100644
--- a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out
@@ -50,12 +50,12 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: value (type: string)
-                    outputColumnNames: value
+                    expressions: substr(value, 5) (type: string)
+                    outputColumnNames: _col0
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: sum(substr(value, 5)), avg(substr(value, 
5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 
5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), 
variance(substr(value, 5)), var_samp(substr(value, 5))
-                      keys: substr(value, 5) (type: string)
+                      aggregations: sum(_col0), avg(_col0), avg(DISTINCT 
_col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), 
variance(_col0), var_samp(_col0)
+                      keys: _col0 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out 
b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out
index 5e9d229..3b31dfe 100644
--- a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out
@@ -54,12 +54,12 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: value (type: string)
-                    outputColumnNames: value
+                    expressions: substr(value, 5) (type: string)
+                    outputColumnNames: _col0
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: sum(substr(value, 5)), avg(substr(value, 
5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 
5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), 
variance(substr(value, 5)), var_samp(substr(value, 5)), sum(DISTINCT 
substr(value, 5)), count(DISTINCT substr(value, 5))
-                      keys: substr(value, 5) (type: string)
+                      aggregations: sum(_col0), avg(_col0), avg(DISTINCT 
_col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), 
variance(_col0), var_samp(_col0), sum(DISTINCT _col0), count(DISTINCT _col0)
+                      keys: _col0 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out 
b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out
index d7f90f1..bbad6e7 100644
--- a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out
@@ -51,12 +51,12 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: value (type: string)
-                    outputColumnNames: value
+                    expressions: substr(value, 5) (type: string)
+                    outputColumnNames: _col0
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: sum(substr(value, 5)), avg(substr(value, 
5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 
5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), 
variance(substr(value, 5)), var_samp(substr(value, 5))
-                      keys: substr(value, 5) (type: string)
+                      aggregations: sum(_col0), avg(_col0), avg(DISTINCT 
_col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), 
variance(_col0), var_samp(_col0)
+                      keys: _col0 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out 
b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out
index 75cb50b..6868eff 100644
--- a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out
@@ -50,11 +50,11 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: value (type: string)
-                    outputColumnNames: value
+                    expressions: substr(value, 5) (type: string)
+                    outputColumnNames: _col0
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
-                      key expressions: substr(value, 5) (type: string)
+                      key expressions: _col0 (type: string)
                       sort order: +
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out 
b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out
index 51831db..399bfd8 100644
--- 
a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out
+++ 
b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out
@@ -54,11 +54,11 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: value (type: string)
-                    outputColumnNames: value
+                    expressions: substr(value, 5) (type: string)
+                    outputColumnNames: _col0
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
-                      key expressions: substr(value, 5) (type: string)
+                      key expressions: _col0 (type: string)
                       sort order: +
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out 
b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out
index 517e492..8a26e81 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out
@@ -114,12 +114,12 @@ STAGE PLANS:
                   Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Select Operator
-                    expressions: key (type: string), value (type: string)
-                    outputColumnNames: key, value
+                    expressions: substr(key, 1, 1) (type: string), 
substr(value, 5) (type: string)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(DISTINCT substr(value, 5)), 
sum(substr(value, 5))
-                      keys: substr(key, 1, 1) (type: string), substr(value, 5) 
(type: string)
+                      aggregations: count(DISTINCT _col1), sum(_col1)
+                      keys: _col0 (type: string), _col1 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out 
b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out
index d247c25..6005381 100644
--- 
a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out
+++ 
b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out
@@ -131,12 +131,12 @@ STAGE PLANS:
                   Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Select Operator
-                    expressions: key (type: string), value (type: string)
-                    outputColumnNames: key, value
+                    expressions: substr(key, 1, 1) (type: string), 
substr(value, 5) (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(DISTINCT substr(value, 5)), 
sum(substr(value, 5)), sum(DISTINCT substr(value, 5)), count(DISTINCT value)
-                      keys: substr(key, 1, 1) (type: string), substr(value, 5) 
(type: string), value (type: string)
+                      aggregations: count(DISTINCT _col1), sum(_col1), 
sum(DISTINCT _col1), count(DISTINCT _col2)
+                      keys: _col0 (type: string), _col1 (type: string), _col2 
(type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6
                       Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out 
b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out
index 8bc6105..e00d234 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out
@@ -114,13 +114,13 @@ STAGE PLANS:
                   Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Select Operator
-                    expressions: key (type: string), value (type: string)
-                    outputColumnNames: key, value
+                    expressions: substr(key, 1, 1) (type: string), 
substr(value, 5) (type: string)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
-                      key expressions: substr(key, 1, 1) (type: string), 
substr(value, 5) (type: string)
+                      key expressions: _col0 (type: string), _col1 (type: 
string)
                       sort order: ++
-                      Map-reduce partition columns: substr(key, 1, 1) (type: 
string)
+                      Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
                       tag: -1
                       auto parallelism: false

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out 
b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
index 40af253..1efa9e7 100644
--- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
@@ -473,11 +473,11 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), cdouble (type: 
double)
-                    outputColumnNames: ctinyint, cdouble
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(DISTINCT cdouble)
-                      keys: ctinyint (type: tinyint), cdouble (type: double)
+                      aggregations: count(DISTINCT _col1)
+                      keys: _col0 (type: tinyint), _col1 (type: double)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 12288 Data size: 377237 Basic 
stats: COMPLETE Column stats: NONE
@@ -660,11 +660,11 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), cstring1 (type: 
string), cstring2 (type: string)
-                    outputColumnNames: ctinyint, cstring1, cstring2
+                    outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(DISTINCT cstring1), count(DISTINCT 
cstring2)
-                      keys: ctinyint (type: tinyint), cstring1 (type: string), 
cstring2 (type: string)
+                      aggregations: count(DISTINCT _col1), count(DISTINCT 
_col2)
+                      keys: _col0 (type: tinyint), _col1 (type: string), _col2 
(type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
                       Statistics: Num rows: 12288 Data size: 377237 Basic 
stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out 
b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
index 7fa7bdb..fecfe0a 100644
--- a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
@@ -1257,11 +1257,11 @@ STAGE PLANS:
                   Statistics: Num rows: 2000 Data size: 3504000 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ws_order_number (type: int)
-                    outputColumnNames: ws_order_number
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2000 Data size: 3504000 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(DISTINCT ws_order_number)
-                      keys: ws_order_number (type: int)
+                      aggregations: count(DISTINCT _col0)
+                      keys: _col0 (type: int)
                       mode: hash
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 2000 Data size: 3504000 Basic 
stats: COMPLETE Column stats: NONE

hive git commit: HIVE-10741 : count distinct rewrite is not firing

Reply via email to