svn commit: r1639929 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/parse/ test/queries/clientnegative/ test/queries/clientpositive/ test/results/clientnegative/ test/results/clientpositive/

hashutosh Sat, 15 Nov 2014 13:47:05 -0800

Author: hashutosh
Date: Sat Nov 15 21:45:51 2014
New Revision: 1639929

URL: http://svn.apache.org/r1639929
Log:
HIVE-8512 : queries with star and gby produce incorrect results (Sergey 
Shelukhin via Ashutosh Chauhan)


Added:
    hive/trunk/ql/src/test/queries/clientnegative/gby_star.q
    hive/trunk/ql/src/test/queries/clientnegative/gby_star2.q
    hive/trunk/ql/src/test/queries/clientpositive/gby_star.q
    hive/trunk/ql/src/test/results/clientnegative/gby_star.q.out
    hive/trunk/ql/src/test/results/clientnegative/gby_star2.q.out
    hive/trunk/ql/src/test/results/clientpositive/gby_star.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
    hive/trunk/ql/src/test/queries/clientpositive/ctas_colname.q
    hive/trunk/ql/src/test/queries/clientpositive/parquet_create.q
    hive/trunk/ql/src/test/results/clientpositive/ctas_colname.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_ppd.q.out
    hive/trunk/ql/src/test/results/clientpositive/parquet_create.q.out

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1639929&r1=1639928&r2=1639929&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
Sat Nov 15 21:45:51 2014
@@ -2762,11 +2762,14 @@ public class SemanticAnalyzer extends Ba
   // TODO: make aliases unique, otherwise needless rewriting takes place
   private Integer genColListRegex(String colRegex, String tabAlias, ASTNode 
sel,
     ArrayList<ExprNodeDesc> col_list, HashSet<ColumnInfo> excludeCols, 
RowResolver input,
-    Integer pos, RowResolver output, List<String> aliases, boolean 
ensureUniqueCols)
-      throws SemanticException {
+    RowResolver colSrcRR, Integer pos, RowResolver output, List<String> 
aliases,
+    boolean ensureUniqueCols) throws SemanticException {
 
+    if (colSrcRR == null) {
+      colSrcRR = input;
+    }
     // The table alias should exist
-    if (tabAlias != null && !input.hasTableAlias(tabAlias)) {
+    if (tabAlias != null && !colSrcRR.hasTableAlias(tabAlias)) {
       throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel));
     }
 
@@ -2795,7 +2798,7 @@ public class SemanticAnalyzer extends Ba
     // For expr "*", aliases should be iterated in the order they are specified
     // in the query.
     for (String alias : aliases) {
-      HashMap<String, ColumnInfo> fMap = input.getFieldMap(alias);
+      HashMap<String, ColumnInfo> fMap = colSrcRR.getFieldMap(alias);
       if (fMap == null) {
         continue;
       }
@@ -2806,8 +2809,11 @@ public class SemanticAnalyzer extends Ba
         if (excludeCols != null && excludeCols.contains(colInfo)) {
           continue; // This was added during plan generation.
         }
+        // First, look up the column from the source against which * is to be 
resolved.
+        // We'd later translated this into the column from proper input, if 
it's valid.
+        // TODO: excludeCols may be possible to remove using the same 
technique.
         String name = colInfo.getInternalName();
-        String[] tmp = input.reverseLookup(name);
+        String[] tmp = colSrcRR.reverseLookup(name);
 
         // Skip the colinfos which are not for this particular alias
         if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
@@ -2823,6 +2829,27 @@ public class SemanticAnalyzer extends Ba
           continue;
         }
 
+        // If input (GBY) is different than the source of columns, find the 
same column in input.
+        // TODO: This is fraught with peril.
+        if (input != colSrcRR) {
+          colInfo = input.get(tabAlias, tmp[1]);
+          if (colInfo == null) {
+            LOG.error("Cannot find colInfo for " + tabAlias + "." + tmp[1]
+                + ", derived from [" + colSrcRR + "], in [" + input + "]");
+            throw new SemanticException(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY, 
tmp[1]);
+          }
+          String oldCol = null;
+          if (LOG.isDebugEnabled()) {
+            oldCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + 
tmp[1]));
+          }
+          name = colInfo.getInternalName();
+          tmp = input.reverseLookup(name);
+          if (LOG.isDebugEnabled()) {
+            String newCol = name + " => " + (tmp == null ? "null" : (tmp[0] + 
"." + tmp[1]));
+            LOG.debug("Translated [" + oldCol + "] to [" + newCol + "]");
+          }
+        }
+
         ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
         if (oColInfo == null) {
           ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
@@ -3421,11 +3448,10 @@ public class SemanticAnalyzer extends Ba
   }
 
 
-  private Operator<?> genSelectPlan(String dest, QB qb, Operator<?> input)
-      throws SemanticException {
+  private Operator<?> genSelectPlan(String dest, QB qb, Operator<?> input,
+      Operator<?> inputForSelectStar) throws SemanticException {
     ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
-
-    Operator<?> op = genSelectPlan(selExprList, qb, input, false);
+    Operator<?> op = genSelectPlan(selExprList, qb, input, inputForSelectStar, 
false);
 
     if (LOG.isDebugEnabled()) {
       LOG.debug("Created Select Plan for clause: " + dest);
@@ -3435,8 +3461,8 @@ public class SemanticAnalyzer extends Ba
   }
 
   @SuppressWarnings("nls")
-  private Operator<?> genSelectPlan(ASTNode selExprList, QB qb,
-      Operator<?> input, boolean outerLV) throws SemanticException {
+  private Operator<?> genSelectPlan(ASTNode selExprList, QB qb, Operator<?> 
input,
+      Operator<?> inputForSelectStar, boolean outerLV) throws 
SemanticException {
 
     if (LOG.isDebugEnabled()) {
       LOG.debug("tree: " + selExprList.toStringTree());
@@ -3447,6 +3473,10 @@ public class SemanticAnalyzer extends Ba
     ASTNode trfm = null;
     Integer pos = Integer.valueOf(0);
     RowResolver inputRR = opParseCtx.get(input).getRowResolver();
+    RowResolver starRR = null;
+    if (inputForSelectStar != null && inputForSelectStar != input) {
+      starRR = opParseCtx.get(inputForSelectStar).getRowResolver();
+    }
     // SELECT * or SELECT TRANSFORM(*)
     boolean selectStar = false;
     int posn = 0;
@@ -3492,7 +3522,7 @@ public class SemanticAnalyzer extends Ba
       }
       if (isUDTF && (selectStar = udtfExprType == 
HiveParser.TOK_FUNCTIONSTAR)) {
         genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0),
-            col_list, null, inputRR, pos, out_rwsch, qb.getAliases(), false);
+            col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), 
false);
       }
     }
 
@@ -3545,7 +3575,7 @@ public class SemanticAnalyzer extends Ba
     }
 
     if (LOG.isDebugEnabled()) {
-      LOG.debug("genSelectPlan: input = " + inputRR.toString());
+      LOG.debug("genSelectPlan: input = " + inputRR + " starRr = " + starRR);
     }
 
     // For UDTF's, skip the function name to get the expressions
@@ -3614,7 +3644,7 @@ public class SemanticAnalyzer extends Ba
       if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
         pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
             : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(),
-            expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases(), 
false);
+            expr, col_list, null, inputRR, starRR, pos, out_rwsch, 
qb.getAliases(), false);
         selectStar = true;
       } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
           && !inputRR.getIsExprResolver()
@@ -3623,7 +3653,7 @@ public class SemanticAnalyzer extends Ba
         // This can only happen without AS clause
         // We don't allow this for ExprResolver - the Group By case
         pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()),
-            null, expr, col_list, null, inputRR, pos, out_rwsch, 
qb.getAliases(), false);
+            null, expr, col_list, null, inputRR, starRR, pos, out_rwsch, 
qb.getAliases(), false);
       } else if (expr.getType() == HiveParser.DOT
           && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
           && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
@@ -3635,7 +3665,7 @@ public class SemanticAnalyzer extends Ba
         // We don't allow this for ExprResolver - the Group By case
         pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
             
unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()),
-             expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases(), 
false);
+             expr, col_list, null, inputRR, starRR, pos, out_rwsch, 
qb.getAliases(), false);
       } else {
         // Case when this is an expression
         TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
@@ -5163,7 +5193,8 @@ public class SemanticAnalyzer extends Ba
       Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo,
           dest, curr, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null);
 
-      curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb, 
aliasToOpInfo);
+      // TODO: should we pass curr instead of null?
+      curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb, 
aliasToOpInfo, null);
     }
 
     return curr;
@@ -8780,7 +8811,7 @@ public class SemanticAnalyzer extends Ba
       for (String dest : ks) {
         curr = input;
         curr = genGroupByPlan2MRMultiGroupBy(dest, qb, curr);
-        curr = genSelectPlan(dest, qb, curr);
+        curr = genSelectPlan(dest, qb, curr, null); // TODO: we may need to 
pass "input" here instead of null
         Integer limit = qbp.getDestLimit(dest);
         if (limit != null) {
           curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), true);
@@ -8837,6 +8868,8 @@ public class SemanticAnalyzer extends Ba
                 ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest);
                 curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, 
curr, aliasToOpInfo, false);
               }
+              // Preserve operator before the GBY - we'll use it to resolve '*'
+              Operator<?> gbySource = curr;
 
               if (qbp.getAggregationExprsForClause(dest).size() != 0
                   || getGroupByForClause(qbp, dest).size() > 0) {
@@ -8861,8 +8894,12 @@ public class SemanticAnalyzer extends Ba
                   curr = genGroupByPlan1MR(dest, qb, curr);
                 }
               }
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("RR before GB " + 
opParseCtx.get(gbySource).getRowResolver()
+                    + " after GB " + opParseCtx.get(curr).getRowResolver());
+              }
 
-              curr = genPostGroupByBodyPlan(curr, dest, qb, aliasToOpInfo);
+              curr = genPostGroupByBodyPlan(curr, dest, qb, aliasToOpInfo, 
gbySource);
             }
           } else {
             curr = genGroupByPlan1ReduceMultiGBY(commonGroupByDestGroup, qb, 
input, aliasToOpInfo);
@@ -8889,7 +8926,7 @@ public class SemanticAnalyzer extends Ba
   }
 
   private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb,
-      Map<String, Operator> aliasToOpInfo)
+      Map<String, Operator> aliasToOpInfo, Operator gbySource)
       throws SemanticException {
 
     QBParseInfo qbp = qb.getParseInfo();
@@ -8907,7 +8944,7 @@ public class SemanticAnalyzer extends Ba
       curr = genWindowingPlan(qb.getWindowingSpec(dest), curr);
     }
 
-    curr = genSelectPlan(dest, qb, curr);
+    curr = genSelectPlan(dest, qb, curr, gbySource);
     Integer limit = qbp.getDestLimit(dest);
 
     // Expressions are not supported currently without a alias.
@@ -9864,7 +9901,7 @@ public class SemanticAnalyzer extends Ba
     // Get the UDTF Path
     QB blankQb = new QB(null, null, false);
     Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree
-        .getChild(0), blankQb, lvForward,
+        .getChild(0), blankQb, lvForward, null,
         lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER);
     // add udtf aliases to QB
     for (String udtfAlias : blankQb.getAliases()) {
@@ -14203,8 +14240,8 @@ public class SemanticAnalyzer extends Ba
         if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
           pos = genColListRegex(".*",
               expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) 
expr.getChild(0))
-                  .toLowerCase(), expr, col_list, excludedColumns, inputRR, 
pos, out_rwsch,
-                  tabAliasesForAllProjs, true);
+                  .toLowerCase(), expr, col_list, excludedColumns, inputRR, 
null, pos,
+                  out_rwsch, tabAliasesForAllProjs, true);
           selectStar = true;
         } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && 
!hasAsClause
             && !inputRR.getIsExprResolver()
@@ -14213,7 +14250,8 @@ public class SemanticAnalyzer extends Ba
           // This can only happen without AS clause
           // We don't allow this for ExprResolver - the Group By case
           pos = 
genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr,
-              col_list, excludedColumns, inputRR, pos, out_rwsch, 
tabAliasesForAllProjs, true);
+              col_list, excludedColumns, inputRR, null, pos, out_rwsch, 
tabAliasesForAllProjs,
+              true);
         } else if (expr.getType() == HiveParser.DOT
             && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
             && 
inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText()
@@ -14224,7 +14262,8 @@ public class SemanticAnalyzer extends Ba
           // We don't allow this for ExprResolver - the Group By case
           pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
               
unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), expr,
-              col_list, excludedColumns, inputRR, pos, out_rwsch, 
tabAliasesForAllProjs, true);
+              col_list, excludedColumns, inputRR, null, pos, out_rwsch, 
tabAliasesForAllProjs,
+              true);
         } else if (expr.toStringTree().contains("TOK_FUNCTIONDI") && !(srcRel 
instanceof HiveAggregateRel)) {
           // Likely a malformed query eg, select hash(distinct c1) from t1;
           throw new OptiqSemanticException("Distinct without an 
aggreggation.");

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1639929&r1=1639928&r2=1639929&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
 Sat Nov 15 21:45:51 2014
@@ -74,6 +74,8 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 
+import com.google.common.collect.Lists;
+
 
 /**
  * The Factory for creating typecheck processors. The typecheck processors are
@@ -184,9 +186,8 @@ public class TypeCheckProcFactory {
         opRules, tcCtx);
     GraphWalker ogw = new DefaultGraphWalker(disp);
 
-    // Create a list of topop nodes
-    ArrayList<Node> topNodes = new ArrayList<Node>();
-    topNodes.add(expr);
+    // Create a list of top nodes
+    ArrayList<Node> topNodes = Lists.<Node>newArrayList(expr);
     HashMap<Node, Object> nodeOutputs = new LinkedHashMap<Node, Object>();
     ogw.startWalking(topNodes, nodeOutputs);
 

Added: hive/trunk/ql/src/test/queries/clientnegative/gby_star.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/gby_star.q?rev=1639929&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/gby_star.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/gby_star.q Sat Nov 15 
21:45:51 2014
@@ -0,0 +1 @@
+select *, count(value) from src group by key;

Added: hive/trunk/ql/src/test/queries/clientnegative/gby_star2.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/gby_star2.q?rev=1639929&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/gby_star2.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/gby_star2.q Sat Nov 15 
21:45:51 2014
@@ -0,0 +1 @@
+select *, sum(key) from src;

Modified: hive/trunk/ql/src/test/queries/clientpositive/ctas_colname.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ctas_colname.q?rev=1639929&r1=1639928&r2=1639929&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ctas_colname.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ctas_colname.q Sat Nov 15 
21:45:51 2014
@@ -26,20 +26,20 @@ select * from x5;
 
 -- sub queries
 explain
-create table x6 as select * from (select *, max(key) from src1) a;
-create table x6 as select * from (select *, max(key) from src1) a;
+create table x6 as select * from (select *, key + 1 from src1) a;
+create table x6 as select * from (select *, key + 1 from src1) a;
 describe formatted x6;
 select * from x6;
 
 explain
-create table x7 as select * from (select * from src group by key) a;
-create table x7 as select * from (select * from src group by key) a;
+create table x7 as select * from (select *, count(value) from src group by 
key, value) a;
+create table x7 as select * from (select *, count(value) from src group by 
key, value) a;
 describe formatted x7;
 select * from x7;
 
 explain
-create table x8 as select * from (select * from src group by key having key < 
9) a;
-create table x8 as select * from (select * from src group by key having key < 
9) a;
+create table x8 as select * from (select *, count(value) from src group by 
key, value having key < 9) a;
+create table x8 as select * from (select *, count(value) from src group by 
key, value having key < 9) a;
 describe formatted x8;
 select * from x8;
 

Added: hive/trunk/ql/src/test/queries/clientpositive/gby_star.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/gby_star.q?rev=1639929&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/gby_star.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/gby_star.q Sat Nov 15 
21:45:51 2014
@@ -0,0 +1,17 @@
+explain
+select *, sum(key) from src group by key, value limit 10;
+select *, sum(key) from src group by key, value limit 10;
+
+explain
+select *, sum(key) from src where key < 100 group by key, value limit 10;
+select *, sum(key) from src where key < 100 group by key, value limit 10;
+
+explain
+select *, sum(key) from (select key from src where key < 100) a group by key 
limit 10;
+select *, sum(key) from (select key from src where key < 100) a group by key 
limit 10;
+
+explain
+select a.*, sum(src.key) from (select key from src where key < 100) a 
+inner join src on a.key = src.key group by a.key limit 10;
+select a.*, sum(src.key) from (select key from src where key < 100) a 
+inner join src on a.key = src.key group by a.key limit 10;

Modified: hive/trunk/ql/src/test/queries/clientpositive/parquet_create.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_create.q?rev=1639929&r1=1639928&r2=1639929&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_create.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_create.q Sat Nov 15 
21:45:51 2014
@@ -28,7 +28,6 @@ SELECT * FROM parquet_create_staging;
 
 INSERT OVERWRITE TABLE parquet_create SELECT * FROM parquet_create_staging;
 
-SELECT * FROM parquet_create group by id;
 SELECT id, count(0) FROM parquet_create group by id;
 SELECT str from parquet_create;
 SELECT mp from parquet_create;

Added: hive/trunk/ql/src/test/results/clientnegative/gby_star.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/gby_star.q.out?rev=1639929&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/gby_star.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/gby_star.q.out Sat Nov 15 
21:45:51 2014
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10025]: Expression not in GROUP BY key value

Added: hive/trunk/ql/src/test/results/clientnegative/gby_star2.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/gby_star2.q.out?rev=1639929&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/gby_star2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/gby_star2.q.out Sat Nov 15 
21:45:51 2014
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10025]: Expression not in GROUP BY key key

Modified: hive/trunk/ql/src/test/results/clientpositive/ctas_colname.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ctas_colname.q.out?rev=1639929&r1=1639928&r2=1639929&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ctas_colname.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/ctas_colname.q.out Sat Nov 15 
21:45:51 2014
@@ -461,17 +461,22 @@ POSTHOOK: Input: default@x5
 119    val_119 119
 PREHOOK: query: -- sub queries
 explain
-create table x6 as select * from (select *, max(key) from src1) a
+create table x6 as select * from (select *, key + 1 from src1) a
 PREHOOK: type: CREATETABLE_AS_SELECT
 POSTHOOK: query: -- sub queries
 explain
-create table x6 as select * from (select *, max(key) from src1) a
+create table x6 as select * from (select *, key + 1 from src1) a
 POSTHOOK: type: CREATETABLE_AS_SELECT
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-3 depends on stages: Stage-0
-  Stage-2 depends on stages: Stage-3
+  Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+  Stage-4
+  Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+  Stage-8 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-8
+  Stage-3
+  Stage-5
+  Stage-6 depends on stages: Stage-5
 
 STAGE PLANS:
   Stage: Stage-1
@@ -481,36 +486,26 @@ STAGE PLANS:
             alias: src1
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
-              expressions: key (type: string)
-              outputColumnNames: key
+              expressions: key (type: string), value (type: string), (key + 1) 
(type: double)
+              outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-              Group By Operator
-                aggregations: max(key)
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col0 (type: string)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: max(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column 
stats: NONE
-          Select Operator
-            expressions: _col0 (type: string), _col0 (type: string)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column 
stats: NONE
-            File Output Operator
-              compressed: false
-              Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.x6
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.x6
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-4
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
 
   Stage: Stage-0
     Move Operator
@@ -518,10 +513,10 @@ STAGE PLANS:
           hdfs directory: true
 #### A masked pattern was here ####
 
-  Stage: Stage-3
+  Stage: Stage-8
       Create Table Operator:
         Create Table
-          columns: _col0 string, _c1 string
+          columns: key string, value string, _c1 double
           input format: org.apache.hadoop.mapred.TextInputFormat
           output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
           serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -530,12 +525,42 @@ STAGE PLANS:
   Stage: Stage-2
     Stats-Aggr Operator
 
-PREHOOK: query: create table x6 as select * from (select *, max(key) from 
src1) a
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.x6
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.x6
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create table x6 as select * from (select *, key + 1 from src1) 
a
 PREHOOK: type: CREATETABLE_AS_SELECT
 PREHOOK: Input: default@src1
 PREHOOK: Output: database:default
 PREHOOK: Output: default@x6
-POSTHOOK: query: create table x6 as select * from (select *, max(key) from 
src1) a
+POSTHOOK: query: create table x6 as select * from (select *, key + 1 from 
src1) a
 POSTHOOK: type: CREATETABLE_AS_SELECT
 POSTHOOK: Input: default@src1
 POSTHOOK: Output: database:default
@@ -548,8 +573,9 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@x6
 # col_name             data_type               comment             
                 
-_col0                  string                                      
-_c1                    string                                      
+key                    string                                      
+value                  string                                      
+_c1                    double                                      
                 
 # Detailed Table Information            
 Database:              default                  
@@ -561,9 +587,9 @@ Table Type:                 MANAGED_TABLE      
 Table Parameters:               
        COLUMN_STATS_ACCURATE   true                
        numFiles                1                   
-       numRows                 1                   
-       rawDataSize             5                   
-       totalSize               6                   
+       numRows                 25                  
+       rawDataSize             309                 
+       totalSize               334                 
 #### A masked pattern was here ####
                 
 # Storage Information           
@@ -584,12 +610,36 @@ POSTHOOK: query: select * from x6
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@x6
 #### A masked pattern was here ####
-98     98
+               NULL
+               NULL
+               NULL
+               NULL
+       val_165 NULL
+       val_193 NULL
+       val_265 NULL
+       val_27  NULL
+       val_409 NULL
+       val_484 NULL
+128            129.0
+146    val_146 147.0
+150    val_150 151.0
+213    val_213 214.0
+224            225.0
+238    val_238 239.0
+255    val_255 256.0
+273    val_273 274.0
+278    val_278 279.0
+311    val_311 312.0
+369            370.0
+401    val_401 402.0
+406    val_406 407.0
+66     val_66  67.0
+98     val_98  99.0
 PREHOOK: query: explain
-create table x7 as select * from (select * from src group by key) a
+create table x7 as select * from (select *, count(value) from src group by 
key, value) a
 PREHOOK: type: CREATETABLE_AS_SELECT
 POSTHOOK: query: explain
-create table x7 as select * from (select * from src group by key) a
+create table x7 as select * from (select *, count(value) from src group by 
key, value) a
 POSTHOOK: type: CREATETABLE_AS_SELECT
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -605,28 +655,31 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
-              expressions: key (type: string)
-              outputColumnNames: key
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                keys: key (type: string)
+                aggregations: count(value)
+                keys: key (type: string), value (type: string)
                 mode: hash
-                outputColumnNames: _col0
+                outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string)
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
-          outputColumnNames: _col0
+          outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
-            expressions: _col0 (type: string)
-            outputColumnNames: _col0
+            expressions: _col0 (type: string), _col1 (type: string), _col2 
(type: bigint)
+            outputColumnNames: _col0, _col1, _col2
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
@@ -646,7 +699,7 @@ STAGE PLANS:
   Stage: Stage-3
       Create Table Operator:
         Create Table
-          columns: _col0 string
+          columns: _col0 string, _col1 string, _c1 bigint
           input format: org.apache.hadoop.mapred.TextInputFormat
           output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
           serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -655,12 +708,12 @@ STAGE PLANS:
   Stage: Stage-2
     Stats-Aggr Operator
 
-PREHOOK: query: create table x7 as select * from (select * from src group by 
key) a
+PREHOOK: query: create table x7 as select * from (select *, count(value) from 
src group by key, value) a
 PREHOOK: type: CREATETABLE_AS_SELECT
 PREHOOK: Input: default@src
 PREHOOK: Output: database:default
 PREHOOK: Output: default@x7
-POSTHOOK: query: create table x7 as select * from (select * from src group by 
key) a
+POSTHOOK: query: create table x7 as select * from (select *, count(value) from 
src group by key, value) a
 POSTHOOK: type: CREATETABLE_AS_SELECT
 POSTHOOK: Input: default@src
 POSTHOOK: Output: database:default
@@ -674,6 +727,8 @@ POSTHOOK: Input: default@x7
 # col_name             data_type               comment             
                 
 _col0                  string                                      
+_col1                  string                                      
+_c1                    bigint                                      
                 
 # Detailed Table Information            
 Database:              default                  
@@ -686,8 +741,8 @@ Table Parameters:            
        COLUMN_STATS_ACCURATE   true                
        numFiles                1                   
        numRows                 309                 
-       rawDataSize             864                 
-       totalSize               1173                
+       rawDataSize             3891                
+       totalSize               4200                
 #### A masked pattern was here ####
                 
 # Storage Information           
@@ -708,320 +763,320 @@ POSTHOOK: query: select * from x7
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@x7
 #### A masked pattern was here ####
-0
-10
-100
-103
-104
-105
-11
-111
-113
-114
-116
-118
-119
-12
-120
-125
-126
-128
-129
-131
-133
-134
-136
-137
-138
-143
-145
-146
-149
-15
-150
-152
-153
-155
-156
-157
-158
-160
-162
-163
-164
-165
-166
-167
-168
-169
-17
-170
-172
-174
-175
-176
-177
-178
-179
-18
-180
-181
-183
-186
-187
-189
-19
-190
-191
-192
-193
-194
-195
-196
-197
-199
-2
-20
-200
-201
-202
-203
-205
-207
-208
-209
-213
-214
-216
-217
-218
-219
-221
-222
-223
-224
-226
-228
-229
-230
-233
-235
-237
-238
-239
-24
-241
-242
-244
-247
-248
-249
-252
-255
-256
-257
-258
-26
-260
-262
-263
-265
-266
-27
-272
-273
-274
-275
-277
-278
-28
-280
-281
-282
-283
-284
-285
-286
-287
-288
-289
-291
-292
-296
-298
-30
-302
-305
-306
-307
-308
-309
-310
-311
-315
-316
-317
-318
-321
-322
-323
-325
-327
-33
-331
-332
-333
-335
-336
-338
-339
-34
-341
-342
-344
-345
-348
-35
-351
-353
-356
-360
-362
-364
-365
-366
-367
-368
-369
-37
-373
-374
-375
-377
-378
-379
-382
-384
-386
-389
-392
-393
-394
-395
-396
-397
-399
-4
-400
-401
-402
-403
-404
-406
-407
-409
-41
-411
-413
-414
-417
-418
-419
-42
-421
-424
-427
-429
-43
-430
-431
-432
-435
-436
-437
-438
-439
-44
-443
-444
-446
-448
-449
-452
-453
-454
-455
-457
-458
-459
-460
-462
-463
-466
-467
-468
-469
-47
-470
-472
-475
-477
-478
-479
-480
-481
-482
-483
-484
-485
-487
-489
-490
-491
-492
-493
-494
-495
-496
-497
-498
-5
-51
-53
-54
-57
-58
-64
-65
-66
-67
-69
-70
-72
-74
-76
-77
-78
-8
-80
-82
-83
-84
-85
-86
-87
-9
-90
-92
-95
-96
-97
-98
+0      val_0   3
+10     val_10  1
+100    val_100 2
+103    val_103 2
+104    val_104 2
+105    val_105 1
+11     val_11  1
+111    val_111 1
+113    val_113 2
+114    val_114 1
+116    val_116 1
+118    val_118 2
+119    val_119 3
+12     val_12  2
+120    val_120 2
+125    val_125 2
+126    val_126 1
+128    val_128 3
+129    val_129 2
+131    val_131 1
+133    val_133 1
+134    val_134 2
+136    val_136 1
+137    val_137 2
+138    val_138 4
+143    val_143 1
+145    val_145 1
+146    val_146 2
+149    val_149 2
+15     val_15  2
+150    val_150 1
+152    val_152 2
+153    val_153 1
+155    val_155 1
+156    val_156 1
+157    val_157 1
+158    val_158 1
+160    val_160 1
+162    val_162 1
+163    val_163 1
+164    val_164 2
+165    val_165 2
+166    val_166 1
+167    val_167 3
+168    val_168 1
+169    val_169 4
+17     val_17  1
+170    val_170 1
+172    val_172 2
+174    val_174 2
+175    val_175 2
+176    val_176 2
+177    val_177 1
+178    val_178 1
+179    val_179 2
+18     val_18  2
+180    val_180 1
+181    val_181 1
+183    val_183 1
+186    val_186 1
+187    val_187 3
+189    val_189 1
+19     val_19  1
+190    val_190 1
+191    val_191 2
+192    val_192 1
+193    val_193 3
+194    val_194 1
+195    val_195 2
+196    val_196 1
+197    val_197 2
+199    val_199 3
+2      val_2   1
+20     val_20  1
+200    val_200 2
+201    val_201 1
+202    val_202 1
+203    val_203 2
+205    val_205 2
+207    val_207 2
+208    val_208 3
+209    val_209 2
+213    val_213 2
+214    val_214 1
+216    val_216 2
+217    val_217 2
+218    val_218 1
+219    val_219 2
+221    val_221 2
+222    val_222 1
+223    val_223 2
+224    val_224 2
+226    val_226 1
+228    val_228 1
+229    val_229 2
+230    val_230 5
+233    val_233 2
+235    val_235 1
+237    val_237 2
+238    val_238 2
+239    val_239 2
+24     val_24  2
+241    val_241 1
+242    val_242 2
+244    val_244 1
+247    val_247 1
+248    val_248 1
+249    val_249 1
+252    val_252 1
+255    val_255 2
+256    val_256 2
+257    val_257 1
+258    val_258 1
+26     val_26  2
+260    val_260 1
+262    val_262 1
+263    val_263 1
+265    val_265 2
+266    val_266 1
+27     val_27  1
+272    val_272 2
+273    val_273 3
+274    val_274 1
+275    val_275 1
+277    val_277 4
+278    val_278 2
+28     val_28  1
+280    val_280 2
+281    val_281 2
+282    val_282 2
+283    val_283 1
+284    val_284 1
+285    val_285 1
+286    val_286 1
+287    val_287 1
+288    val_288 2
+289    val_289 1
+291    val_291 1
+292    val_292 1
+296    val_296 1
+298    val_298 3
+30     val_30  1
+302    val_302 1
+305    val_305 1
+306    val_306 1
+307    val_307 2
+308    val_308 1
+309    val_309 2
+310    val_310 1
+311    val_311 3
+315    val_315 1
+316    val_316 3
+317    val_317 2
+318    val_318 3
+321    val_321 2
+322    val_322 2
+323    val_323 1
+325    val_325 2
+327    val_327 3
+33     val_33  1
+331    val_331 2
+332    val_332 1
+333    val_333 2
+335    val_335 1
+336    val_336 1
+338    val_338 1
+339    val_339 1
+34     val_34  1
+341    val_341 1
+342    val_342 2
+344    val_344 2
+345    val_345 1
+348    val_348 5
+35     val_35  3
+351    val_351 1
+353    val_353 2
+356    val_356 1
+360    val_360 1
+362    val_362 1
+364    val_364 1
+365    val_365 1
+366    val_366 1
+367    val_367 2
+368    val_368 1
+369    val_369 3
+37     val_37  2
+373    val_373 1
+374    val_374 1
+375    val_375 1
+377    val_377 1
+378    val_378 1
+379    val_379 1
+382    val_382 2
+384    val_384 3
+386    val_386 1
+389    val_389 1
+392    val_392 1
+393    val_393 1
+394    val_394 1
+395    val_395 2
+396    val_396 3
+397    val_397 2
+399    val_399 2
+4      val_4   1
+400    val_400 1
+401    val_401 5
+402    val_402 1
+403    val_403 3
+404    val_404 2
+406    val_406 4
+407    val_407 1
+409    val_409 3
+41     val_41  1
+411    val_411 1
+413    val_413 2
+414    val_414 2
+417    val_417 3
+418    val_418 1
+419    val_419 1
+42     val_42  2
+421    val_421 1
+424    val_424 2
+427    val_427 1
+429    val_429 2
+43     val_43  1
+430    val_430 3
+431    val_431 3
+432    val_432 1
+435    val_435 1
+436    val_436 1
+437    val_437 1
+438    val_438 3
+439    val_439 2
+44     val_44  1
+443    val_443 1
+444    val_444 1
+446    val_446 1
+448    val_448 1
+449    val_449 1
+452    val_452 1
+453    val_453 1
+454    val_454 3
+455    val_455 1
+457    val_457 1
+458    val_458 2
+459    val_459 2
+460    val_460 1
+462    val_462 2
+463    val_463 2
+466    val_466 3
+467    val_467 1
+468    val_468 4
+469    val_469 5
+47     val_47  1
+470    val_470 1
+472    val_472 1
+475    val_475 1
+477    val_477 1
+478    val_478 2
+479    val_479 1
+480    val_480 3
+481    val_481 1
+482    val_482 1
+483    val_483 1
+484    val_484 1
+485    val_485 1
+487    val_487 1
+489    val_489 4
+490    val_490 1
+491    val_491 1
+492    val_492 2
+493    val_493 1
+494    val_494 1
+495    val_495 1
+496    val_496 1
+497    val_497 1
+498    val_498 3
+5      val_5   3
+51     val_51  2
+53     val_53  1
+54     val_54  1
+57     val_57  1
+58     val_58  2
+64     val_64  1
+65     val_65  1
+66     val_66  1
+67     val_67  2
+69     val_69  1
+70     val_70  3
+72     val_72  2
+74     val_74  1
+76     val_76  2
+77     val_77  1
+78     val_78  1
+8      val_8   1
+80     val_80  1
+82     val_82  1
+83     val_83  2
+84     val_84  2
+85     val_85  1
+86     val_86  1
+87     val_87  1
+9      val_9   1
+90     val_90  3
+92     val_92  1
+95     val_95  2
+96     val_96  1
+97     val_97  2
+98     val_98  2
 PREHOOK: query: explain
-create table x8 as select * from (select * from src group by key having key < 
9) a
+create table x8 as select * from (select *, count(value) from src group by 
key, value having key < 9) a
 PREHOOK: type: CREATETABLE_AS_SELECT
 POSTHOOK: query: explain
-create table x8 as select * from (select * from src group by key having key < 
9) a
+create table x8 as select * from (select *, count(value) from src group by 
key, value having key < 9) a
 POSTHOOK: type: CREATETABLE_AS_SELECT
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -1040,28 +1095,31 @@ STAGE PLANS:
               predicate: (key < 9) (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: key (type: string)
-                outputColumnNames: key
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: key, value
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
                 Group By Operator
-                  keys: key (type: string)
+                  aggregations: count(value)
+                  keys: key (type: string), value (type: string)
                   mode: hash
-                  outputColumnNames: _col0
+                  outputColumnNames: _col0, _col1, _col2
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
                     Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string)
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
-          outputColumnNames: _col0
+          outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
-            expressions: _col0 (type: string)
-            outputColumnNames: _col0
+            expressions: _col0 (type: string), _col1 (type: string), _col2 
(type: bigint)
+            outputColumnNames: _col0, _col1, _col2
             Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
@@ -1081,7 +1139,7 @@ STAGE PLANS:
   Stage: Stage-3
       Create Table Operator:
         Create Table
-          columns: _col0 string
+          columns: _col0 string, _col1 string, _c1 bigint
           input format: org.apache.hadoop.mapred.TextInputFormat
           output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
           serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -1090,12 +1148,12 @@ STAGE PLANS:
   Stage: Stage-2
     Stats-Aggr Operator
 
-PREHOOK: query: create table x8 as select * from (select * from src group by 
key having key < 9) a
+PREHOOK: query: create table x8 as select * from (select *, count(value) from 
src group by key, value having key < 9) a
 PREHOOK: type: CREATETABLE_AS_SELECT
 PREHOOK: Input: default@src
 PREHOOK: Output: database:default
 PREHOOK: Output: default@x8
-POSTHOOK: query: create table x8 as select * from (select * from src group by 
key having key < 9) a
+POSTHOOK: query: create table x8 as select * from (select *, count(value) from 
src group by key, value having key < 9) a
 POSTHOOK: type: CREATETABLE_AS_SELECT
 POSTHOOK: Input: default@src
 POSTHOOK: Output: database:default
@@ -1109,6 +1167,8 @@ POSTHOOK: Input: default@x8
 # col_name             data_type               comment             
                 
 _col0                  string                                      
+_col1                  string                                      
+_c1                    bigint                                      
                 
 # Detailed Table Information            
 Database:              default                  
@@ -1121,8 +1181,8 @@ Table Parameters:          
        COLUMN_STATS_ACCURATE   true                
        numFiles                1                   
        numRows                 5                   
-       rawDataSize             5                   
-       totalSize               10                  
+       rawDataSize             45                  
+       totalSize               50                  
 #### A masked pattern was here ####
                 
 # Storage Information           
@@ -1143,11 +1203,11 @@ POSTHOOK: query: select * from x8
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@x8
 #### A masked pattern was here ####
-0
-2
-4
-5
-8
+0      val_0   3
+2      val_2   1
+4      val_4   1
+5      val_5   3
+8      val_8   1
 PREHOOK: query: explain
 create table x9 as select * from (select max(value),key from src group by key 
having key < 9 AND max(value) IS NOT NULL) a
 PREHOOK: type: CREATETABLE_AS_SELECT

Added: hive/trunk/ql/src/test/results/clientpositive/gby_star.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/gby_star.q.out?rev=1639929&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/gby_star.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/gby_star.q.out Sat Nov 15 
21:45:51 2014
@@ -0,0 +1,370 @@
+PREHOOK: query: explain
+select *, sum(key) from src group by key, value limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *, sum(key) from src group by key, value limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: sum(key)
+                keys: key (type: string), value (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), _col2 
(type: double)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+            Limit
+              Number of rows: 10
+              Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *, sum(key) from src group by key, value limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *, sum(key) from src group by key, value limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      val_0   0.0
+10     val_10  10.0
+100    val_100 200.0
+103    val_103 206.0
+104    val_104 208.0
+105    val_105 105.0
+11     val_11  11.0
+111    val_111 111.0
+113    val_113 226.0
+114    val_114 114.0
+PREHOOK: query: explain
+select *, sum(key) from src where key < 100 group by key, value limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *, sum(key) from src where key < 100 group by key, value limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (key < 100) (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: key, value
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(key)
+                  keys: key (type: string), value (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                    Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col2 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), _col2 
(type: double)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
+            Limit
+              Number of rows: 10
+              Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *, sum(key) from src where key < 100 group by key, 
value limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *, sum(key) from src where key < 100 group by key, 
value limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      val_0   0.0
+10     val_10  10.0
+11     val_11  11.0
+12     val_12  24.0
+15     val_15  30.0
+17     val_17  17.0
+18     val_18  36.0
+19     val_19  19.0
+2      val_2   2.0
+20     val_20  20.0
+PREHOOK: query: explain
+select *, sum(key) from (select key from src where key < 100) a group by key 
limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *, sum(key) from (select key from src where key < 100) a group by key 
limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (key < 100) (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(_col0)
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: double)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
+            Limit
+              Number of rows: 10
+              Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *, sum(key) from (select key from src where key < 100) 
a group by key limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *, sum(key) from (select key from src where key < 100) 
a group by key limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      0.0
+10     10.0
+11     11.0
+12     24.0
+15     30.0
+17     17.0
+18     36.0
+19     19.0
+2      2.0
+20     20.0
+PREHOOK: query: explain
+select a.*, sum(src.key) from (select key from src where key < 100) a 
+inner join src on a.key = src.key group by a.key limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.*, sum(src.key) from (select key from src where key < 100) a 
+inner join src on a.key = src.key group by a.key limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (key < 100) (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {KEY.reducesinkkey0}
+            1 {KEY.reducesinkkey0}
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+            Group By Operator
+              aggregations: sum(_col1)
+              keys: _col0 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: double)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE 
Column stats: NONE
+            Limit
+              Number of rows: 10
+              Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.*, sum(src.key) from (select key from src where key < 
100) a 
+inner join src on a.key = src.key group by a.key limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select a.*, sum(src.key) from (select key from src where key 
< 100) a 
+inner join src on a.key = src.key group by a.key limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      0.0
+10     10.0
+11     11.0
+12     48.0
+15     60.0
+17     17.0
+18     72.0
+19     19.0
+2      2.0
+20     20.0

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_ppd.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_ppd.q.out?rev=1639929&r1=1639928&r2=1639929&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_ppd.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_ppd.q.out Sat Nov 15 
21:45:51 2014
@@ -79,7 +79,7 @@ STAGE PLANS:
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: 
NONE
           Select Operator
-            expressions: 1 (type: int), _col1 (type: int)
+            expressions: _col1 (type: int), 1 (type: int)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
             File Output Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/parquet_create.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_create.q.out?rev=1639929&r1=1639928&r2=1639929&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_create.q.out 
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_create.q.out Sat Nov 
15 21:45:51 2014
@@ -118,17 +118,6 @@ POSTHOOK: Lineage: parquet_create.lst SI
 POSTHOOK: Lineage: parquet_create.mp SIMPLE 
[(parquet_create_staging)parquet_create_staging.FieldSchema(name:mp, 
type:map<string,string>, comment:null), ]
 POSTHOOK: Lineage: parquet_create.str SIMPLE 
[(parquet_create_staging)parquet_create_staging.FieldSchema(name:str, 
type:string, comment:null), ]
 POSTHOOK: Lineage: parquet_create.strct SIMPLE 
[(parquet_create_staging)parquet_create_staging.FieldSchema(name:strct, 
type:struct<A:string,B:string>, comment:null), ]
-PREHOOK: query: SELECT * FROM parquet_create group by id
-PREHOOK: type: QUERY
-PREHOOK: Input: default@parquet_create
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT * FROM parquet_create group by id
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@parquet_create
-#### A masked pattern was here ####
-1
-2
-3
 PREHOOK: query: SELECT id, count(0) FROM parquet_create group by id
 PREHOOK: type: QUERY
 PREHOOK: Input: default@parquet_create

svn commit: r1639929 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/parse/ test/queries/clientnegative/ test/queries/clientpositive/ test/results/clientnegative/ test/results/clientpositive/

Reply via email to