Author: jpullokk
Date: Thu Jan 15 22:15:40 2015
New Revision: 1652286

URL: http://svn.apache.org/r1652286
Log:
HIVE-9194: Support select distinct * (Pengcheng Xiong via Laljo John 
Pullokkaran)

Added:
    hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_1.q
    hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q
    hive/trunk/ql/src/test/queries/clientpositive/selectDistinctStar.q
    hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_1.q.out
    hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out
    hive/trunk/ql/src/test/results/clientpositive/selectDistinctStar.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/selectDistinctStar.q.out
Modified:
    hive/trunk/itests/src/test/resources/testconfiguration.properties
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java

Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL: 
http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1652286&r1=1652285&r2=1652286&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Thu Jan 
15 22:15:40 2015
@@ -140,6 +140,7 @@ minitez.query.files.shared=alter_merge_2
   parallel.q,\
   ptf.q,\
   sample1.q,\
+  selectDistinctStar.q,\
   script_env_var1.q,\
   script_env_var2.q,\
   script_pipe.q,\

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java?rev=1652286&r1=1652285&r2=1652286&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
Thu Jan 15 22:15:40 2015
@@ -1734,6 +1734,23 @@ public class CalcitePlanner extends Sema
       // 1. Gather GB Expressions (AST) (GB + Aggregations)
       // NOTE: Multi Insert is not supported
       String detsClauseName = qbp.getClauseNames().iterator().next();
+      // Check and transform group by *. This will only happen for select 
distinct *.
+      // Here the "genSelectPlan" is being leveraged.
+      // The main benefits are (1) remove virtual columns that should
+      // not be included in the group by; (2) add the fully qualified column 
names to unParseTranslator
+      // so that view is supported. The drawback is that an additional SEL op 
is added. If it is
+      // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer 
because it will match
+      // SEL%SEL% rule.
+      ASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName);
+      if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI
+          && selExprList.getChildCount() == 1 && 
selExprList.getChild(0).getChildCount() == 1) {
+        ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0);
+        if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) {
+          srcRel = genSelectLogicalPlan(qb, srcRel, srcRel);
+          RowResolver rr = this.relToHiveRR.get(srcRel);
+          qbp.setSelExprForClause(detsClauseName, 
SemanticAnalyzer.genSelectDIAST(rr));
+        }
+      }
       List<ASTNode> grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, 
detsClauseName);
       HashMap<String, ASTNode> aggregationTrees = 
qbp.getAggregationExprsForClause(detsClauseName);
       boolean hasGrpByAstExprs = (grpByAstExprs != null && 
!grpByAstExprs.isEmpty()) ? true : false;

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1652286&r1=1652285&r2=1652286&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
Thu Jan 15 22:15:40 2015
@@ -38,6 +38,7 @@ import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
 import org.antlr.runtime.ClassicToken;
+import org.antlr.runtime.CommonToken;
 import org.antlr.runtime.Token;
 import org.antlr.runtime.tree.Tree;
 import org.antlr.runtime.tree.TreeWizard;
@@ -8752,6 +8753,23 @@ public class SemanticAnalyzer extends Ba
                 // insert a select operator here used by the ColumnPruner to 
reduce
                 // the data to shuffle
                 curr = insertSelectAllPlanForGroupBy(curr);
+                // Check and transform group by *. This will only happen for 
select distinct *.
+                // Here the "genSelectPlan" is being leveraged.
+                // The main benefits are (1) remove virtual columns that should
+                // not be included in the group by; (2) add the fully 
qualified column names to unParseTranslator
+                // so that view is supported. The drawback is that an 
additional SEL op is added. If it is
+                // not necessary, it will be removed by NonBlockingOpDeDupProc 
Optimizer because it will match
+                // SEL%SEL% rule.
+                ASTNode selExprList = qbp.getSelForClause(dest);
+                if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI
+                    && selExprList.getChildCount() == 1 && 
selExprList.getChild(0).getChildCount() == 1) {
+                  ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0);
+                  if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) {
+                    curr = genSelectPlan(dest, qb, curr, curr);
+                    RowResolver rr = opParseCtx.get(curr).getRowResolver();
+                    qbp.setSelExprForClause(dest, 
SemanticAnalyzer.genSelectDIAST(rr));
+                  }
+                }
                 if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) {
                   if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
                     curr = genGroupByPlanMapAggrNoSkew(dest, qb, curr);
@@ -12208,4 +12226,25 @@ public class SemanticAnalyzer extends Ba
   protected boolean deleting() {
     return false;
   }
+  public static ASTNode genSelectDIAST(RowResolver rr) {
+    HashMap<String, LinkedHashMap<String, ColumnInfo>> map = rr.getRslvMap();
+    ASTNode selectDI = new ASTNode(new CommonToken(HiveParser.TOK_SELECTDI, 
"TOK_SELECTDI"));
+    for (String tabAlias : map.keySet()) {
+      for (Entry<String, ColumnInfo> entry : map.get(tabAlias).entrySet()) {
+        selectDI.addChild(buildSelExprSubTree(tabAlias, entry.getKey()));
+      }
+    }
+    return selectDI;
+  }
+  private static ASTNode buildSelExprSubTree(String tableAlias, String col) {
+    ASTNode selexpr = new ASTNode(new CommonToken(HiveParser.TOK_SELEXPR, 
"TOK_SELEXPR"));
+    ASTNode tableOrCol = new ASTNode(new 
CommonToken(HiveParser.TOK_TABLE_OR_COL,
+        "TOK_TABLE_OR_COL"));
+    ASTNode dot = new ASTNode(new CommonToken(HiveParser.DOT, "."));
+    tableOrCol.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, 
tableAlias)));
+    dot.addChild(tableOrCol);
+    dot.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, col)));
+    selexpr.addChild(dot);
+    return selexpr;
+  }
 }

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java?rev=1652286&r1=1652285&r2=1652286&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java 
Thu Jan 15 22:15:40 2015
@@ -97,6 +97,10 @@ class UnparseTranslator {
 
     int tokenStartIndex = node.getTokenStartIndex();
     int tokenStopIndex = node.getTokenStopIndex();
+    if (tokenStopIndex < 0) {
+      // this is for artificially added tokens
+      return;
+    }
     Translation translation = new Translation();
     translation.tokenStopIndex = tokenStopIndex;
     translation.replacementText = replacementText;

Added: hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_1.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_1.q?rev=1652286&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_1.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_1.q Thu 
Jan 15 22:15:40 2015
@@ -0,0 +1,4 @@
+-- Duplicate column name: key
+
+drop view if exists v;
+create view v as select distinct * from src join src1 on src.key=src1.key;
\ No newline at end of file

Added: hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q?rev=1652286&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q Thu 
Jan 15 22:15:40 2015
@@ -0,0 +1,3 @@
+-- SELECT DISTINCT and GROUP BY can not be in the same query. Error 
encountered near token ‘key’
+
+select distinct * from src group by key;
\ No newline at end of file

Added: hive/trunk/ql/src/test/queries/clientpositive/selectDistinctStar.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/selectDistinctStar.q?rev=1652286&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/selectDistinctStar.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/selectDistinctStar.q Thu Jan 
15 22:15:40 2015
@@ -0,0 +1,101 @@
+explain select distinct src.* from src;
+
+select distinct src.* from src;
+
+select distinct * from src;
+
+explain select distinct * from src where key < '3';
+
+select distinct * from src where key < '3';
+
+from src a select distinct a.* where a.key = '238';
+
+explain
+SELECT distinct * from (
+select * from src1
+union all
+select * from src )subq; 
+
+SELECT distinct * from (
+select * from src1
+union all
+select * from src )subq; 
+
+drop view if exists sdi;
+
+explain create view sdi as select distinct * from src order by key limit 2;
+
+create view sdi as select distinct * from src order by key limit 2;
+
+describe extended sdi;
+
+describe formatted sdi;
+
+select * from sdi;
+
+select distinct * from src union all select distinct * from src1;
+
+select distinct * from src join src1 on src.key=src1.key;
+
+SELECT distinct *
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
+
+select * from (select distinct * from src)src1 
+join 
+(select distinct * from src)src2 
+on src1.key=src2.key;
+
+select distinct * from (select distinct * from src)src1;
+
+set hive.cbo.enable=false;
+
+explain select distinct src.* from src;
+
+select distinct src.* from src;
+
+select distinct * from src;
+
+explain select distinct * from src where key < '3';
+
+select distinct * from src where key < '3';
+
+from src a select distinct a.* where a.key = '238';
+
+explain
+SELECT distinct * from (
+select * from src1
+union all
+select * from src )subq; 
+
+SELECT distinct * from (
+select * from src1
+union all
+select * from src )subq; 
+
+drop view if exists sdi;
+
+explain create view sdi as select distinct * from src order by key limit 2;
+
+create view sdi as select distinct * from src order by key limit 2;
+
+describe extended sdi;
+
+describe formatted sdi;
+
+select * from sdi;
+
+select distinct * from src union all select distinct * from src1;
+
+select distinct * from src join src1 on src.key=src1.key;
+
+SELECT distinct *
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
+
+select * from (select distinct * from src)src1 
+join 
+(select distinct * from src)src2 
+on src1.key=src2.key;
+
+select distinct * from (select distinct * from src)src1;

Added: 
hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_1.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_1.q.out?rev=1652286&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_1.q.out 
(added)
+++ hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_1.q.out 
Thu Jan 15 22:15:40 2015
@@ -0,0 +1,9 @@
+PREHOOK: query: -- Duplicate column name: key
+
+drop view if exists v
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: -- Duplicate column name: key
+
+drop view if exists v
+POSTHOOK: type: DROPVIEW
+FAILED: SemanticException [Error 10036]: Duplicate column name: key

Added: 
hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out?rev=1652286&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out 
(added)
+++ hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out 
Thu Jan 15 22:15:40 2015
@@ -0,0 +1 @@
+FAILED: SemanticException 3:36 SELECT DISTINCT and GROUP BY can not be in the 
same query. Error encountered near token 'key'


Reply via email to