Author: jpullokk Date: Thu Jan 15 22:15:40 2015 New Revision: 1652286 URL: http://svn.apache.org/r1652286 Log: HIVE-9194: Support select distinct * (Pengcheng Xiong via Laljo John Pullokkaran)
Added: hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_1.q hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q hive/trunk/ql/src/test/queries/clientpositive/selectDistinctStar.q hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_1.q.out hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out hive/trunk/ql/src/test/results/clientpositive/selectDistinctStar.q.out hive/trunk/ql/src/test/results/clientpositive/tez/selectDistinctStar.q.out Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties URL: http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1652286&r1=1652285&r2=1652286&view=diff ============================================================================== --- hive/trunk/itests/src/test/resources/testconfiguration.properties (original) +++ hive/trunk/itests/src/test/resources/testconfiguration.properties Thu Jan 15 22:15:40 2015 @@ -140,6 +140,7 @@ minitez.query.files.shared=alter_merge_2 parallel.q,\ ptf.q,\ sample1.q,\ + selectDistinctStar.q,\ script_env_var1.q,\ script_env_var2.q,\ script_pipe.q,\ Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java?rev=1652286&r1=1652285&r2=1652286&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java Thu Jan 15 22:15:40 2015 @@ -1734,6 +1734,23 @@ public class CalcitePlanner extends Sema // 1. Gather GB Expressions (AST) (GB + Aggregations) // NOTE: Multi Insert is not supported String detsClauseName = qbp.getClauseNames().iterator().next(); + // Check and transform group by *. This will only happen for select distinct *. + // Here the "genSelectPlan" is being leveraged. + // The main benefits are (1) remove virtual columns that should + // not be included in the group by; (2) add the fully qualified column names to unParseTranslator + // so that view is supported. The drawback is that an additional SEL op is added. If it is + // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match + // SEL%SEL% rule. + ASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName); + if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI + && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { + ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); + if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { + srcRel = genSelectLogicalPlan(qb, srcRel, srcRel); + RowResolver rr = this.relToHiveRR.get(srcRel); + qbp.setSelExprForClause(detsClauseName, SemanticAnalyzer.genSelectDIAST(rr)); + } + } List<ASTNode> grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, detsClauseName); HashMap<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1652286&r1=1652285&r2=1652286&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Jan 15 22:15:40 2015 @@ -38,6 +38,7 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.antlr.runtime.ClassicToken; +import org.antlr.runtime.CommonToken; import org.antlr.runtime.Token; import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeWizard; @@ -8752,6 +8753,23 @@ public class SemanticAnalyzer extends Ba // insert a select operator here used by the ColumnPruner to reduce // the data to shuffle curr = insertSelectAllPlanForGroupBy(curr); + // Check and transform group by *. This will only happen for select distinct *. + // Here the "genSelectPlan" is being leveraged. + // The main benefits are (1) remove virtual columns that should + // not be included in the group by; (2) add the fully qualified column names to unParseTranslator + // so that view is supported. The drawback is that an additional SEL op is added. If it is + // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match + // SEL%SEL% rule. + ASTNode selExprList = qbp.getSelForClause(dest); + if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI + && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { + ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); + if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { + curr = genSelectPlan(dest, qb, curr, curr); + RowResolver rr = opParseCtx.get(curr).getRowResolver(); + qbp.setSelExprForClause(dest, SemanticAnalyzer.genSelectDIAST(rr)); + } + } if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) { if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { curr = genGroupByPlanMapAggrNoSkew(dest, qb, curr); @@ -12208,4 +12226,25 @@ public class SemanticAnalyzer extends Ba protected boolean deleting() { return false; } + public static ASTNode genSelectDIAST(RowResolver rr) { + HashMap<String, LinkedHashMap<String, ColumnInfo>> map = rr.getRslvMap(); + ASTNode selectDI = new ASTNode(new CommonToken(HiveParser.TOK_SELECTDI, "TOK_SELECTDI")); + for (String tabAlias : map.keySet()) { + for (Entry<String, ColumnInfo> entry : map.get(tabAlias).entrySet()) { + selectDI.addChild(buildSelExprSubTree(tabAlias, entry.getKey())); + } + } + return selectDI; + } + private static ASTNode buildSelExprSubTree(String tableAlias, String col) { + ASTNode selexpr = new ASTNode(new CommonToken(HiveParser.TOK_SELEXPR, "TOK_SELEXPR")); + ASTNode tableOrCol = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, + "TOK_TABLE_OR_COL")); + ASTNode dot = new ASTNode(new CommonToken(HiveParser.DOT, ".")); + tableOrCol.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, tableAlias))); + dot.addChild(tableOrCol); + dot.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, col))); + selexpr.addChild(dot); + return selexpr; + } } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java?rev=1652286&r1=1652285&r2=1652286&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java Thu Jan 15 22:15:40 2015 @@ -97,6 +97,10 @@ class UnparseTranslator { int tokenStartIndex = node.getTokenStartIndex(); int tokenStopIndex = node.getTokenStopIndex(); + if (tokenStopIndex < 0) { + // this is for artificially added tokens + return; + } Translation translation = new Translation(); translation.tokenStopIndex = tokenStopIndex; translation.replacementText = replacementText; Added: hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_1.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_1.q?rev=1652286&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_1.q (added) +++ hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_1.q Thu Jan 15 22:15:40 2015 @@ -0,0 +1,4 @@ +-- Duplicate column name: key + +drop view if exists v; +create view v as select distinct * from src join src1 on src.key=src1.key; \ No newline at end of file Added: hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q?rev=1652286&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q (added) +++ hive/trunk/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q Thu Jan 15 22:15:40 2015 @@ -0,0 +1,3 @@ +-- SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token âkeyâ + +select distinct * from src group by key; \ No newline at end of file Added: hive/trunk/ql/src/test/queries/clientpositive/selectDistinctStar.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/selectDistinctStar.q?rev=1652286&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/selectDistinctStar.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/selectDistinctStar.q Thu Jan 15 22:15:40 2015 @@ -0,0 +1,101 @@ +explain select distinct src.* from src; + +select distinct src.* from src; + +select distinct * from src; + +explain select distinct * from src where key < '3'; + +select distinct * from src where key < '3'; + +from src a select distinct a.* where a.key = '238'; + +explain +SELECT distinct * from ( +select * from src1 +union all +select * from src )subq; + +SELECT distinct * from ( +select * from src1 +union all +select * from src )subq; + +drop view if exists sdi; + +explain create view sdi as select distinct * from src order by key limit 2; + +create view sdi as select distinct * from src order by key limit 2; + +describe extended sdi; + +describe formatted sdi; + +select * from sdi; + +select distinct * from src union all select distinct * from src1; + +select distinct * from src join src1 on src.key=src1.key; + +SELECT distinct * +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + +select * from (select distinct * from src)src1 +join +(select distinct * from src)src2 +on src1.key=src2.key; + +select distinct * from (select distinct * from src)src1; + +set hive.cbo.enable=false; + +explain select distinct src.* from src; + +select distinct src.* from src; + +select distinct * from src; + +explain select distinct * from src where key < '3'; + +select distinct * from src where key < '3'; + +from src a select distinct a.* where a.key = '238'; + +explain +SELECT distinct * from ( +select * from src1 +union all +select * from src )subq; + +SELECT distinct * from ( +select * from src1 +union all +select * from src )subq; + +drop view if exists sdi; + +explain create view sdi as select distinct * from src order by key limit 2; + +create view sdi as select distinct * from src order by key limit 2; + +describe extended sdi; + +describe formatted sdi; + +select * from sdi; + +select distinct * from src union all select distinct * from src1; + +select distinct * from src join src1 on src.key=src1.key; + +SELECT distinct * +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + +select * from (select distinct * from src)src1 +join +(select distinct * from src)src2 +on src1.key=src2.key; + +select distinct * from (select distinct * from src)src1; Added: hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_1.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_1.q.out?rev=1652286&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_1.q.out (added) +++ hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_1.q.out Thu Jan 15 22:15:40 2015 @@ -0,0 +1,9 @@ +PREHOOK: query: -- Duplicate column name: key + +drop view if exists v +PREHOOK: type: DROPVIEW +POSTHOOK: query: -- Duplicate column name: key + +drop view if exists v +POSTHOOK: type: DROPVIEW +FAILED: SemanticException [Error 10036]: Duplicate column name: key Added: hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out?rev=1652286&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out (added) +++ hive/trunk/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out Thu Jan 15 22:15:40 2015 @@ -0,0 +1 @@ +FAILED: SemanticException 3:36 SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token 'key'