Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1627140&r1=1627139&r2=1627140&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Sep 23 20:00:10 2014 @@ -22,8 +22,10 @@ import static org.apache.hadoop.hive.con import java.io.IOException; import java.io.Serializable; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -34,12 +36,18 @@ import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import net.hydromatic.optiq.SchemaPlus; +import net.hydromatic.optiq.tools.Frameworks; + import org.antlr.runtime.ClassicToken; import org.antlr.runtime.Token; import org.antlr.runtime.tree.Tree; +import org.antlr.runtime.tree.TreeVisitor; +import org.antlr.runtime.tree.TreeVisitorAction; import org.antlr.runtime.tree.TreeWizard; import org.antlr.runtime.tree.TreeWizard.ContextVisitor; import org.apache.commons.lang.StringUtils; @@ -107,6 +115,29 @@ import org.apache.hadoop.hive.ql.metadat import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.Optimizer; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; +import org.apache.hadoop.hive.ql.optimizer.optiq.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil; +import org.apache.hadoop.hive.ql.optimizer.optiq.HiveTypeSystemImpl; +import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException; +import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveVolcanoPlanner; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveUnionRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePartitionPrunerRule; +import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushFilterPastJoinRule; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ASTConverter; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinCondTypeCheckProcFactory; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinTypeCheckCtx; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.RexNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.TypeConverter; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; @@ -192,12 +223,72 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.InputFormat; +import org.eigenbase.rel.AggregateCall; +import org.eigenbase.rel.Aggregation; +import org.eigenbase.rel.FilterRelBase; +import org.eigenbase.rel.InvalidRelException; +import org.eigenbase.rel.JoinRelBase; +import org.eigenbase.rel.JoinRelType; +import org.eigenbase.rel.RelCollation; +import org.eigenbase.rel.RelCollationImpl; +import org.eigenbase.rel.RelFactories; +import org.eigenbase.rel.RelFieldCollation; +import org.eigenbase.rel.RelNode; +import org.eigenbase.rel.metadata.CachingRelMetadataProvider; +import org.eigenbase.rel.metadata.ChainedRelMetadataProvider; +import org.eigenbase.rel.metadata.RelMetadataProvider; +import org.eigenbase.rel.rules.ConvertMultiJoinRule; +import org.eigenbase.rel.rules.LoptOptimizeJoinRule; +import org.eigenbase.rel.rules.MergeFilterRule; +import org.eigenbase.rel.rules.PushFilterPastProjectRule; +import org.eigenbase.rel.rules.PushFilterPastSetOpRule; +import org.eigenbase.rel.rules.SemiJoinRel; +import org.eigenbase.rel.rules.TransitivePredicatesOnJoinRule; +import org.eigenbase.relopt.RelOptCluster; +import org.eigenbase.relopt.RelOptPlanner; +import org.eigenbase.relopt.RelOptQuery; +import org.eigenbase.relopt.RelOptRule; +import org.eigenbase.relopt.RelOptSchema; +import org.eigenbase.relopt.RelOptUtil; +import org.eigenbase.relopt.RelTraitSet; +import org.eigenbase.relopt.hep.HepMatchOrder; +import org.eigenbase.relopt.hep.HepPlanner; +import org.eigenbase.relopt.hep.HepProgram; +import org.eigenbase.relopt.hep.HepProgramBuilder; +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.reltype.RelDataTypeFactory; +import org.eigenbase.reltype.RelDataTypeField; +import org.eigenbase.rex.RexBuilder; +import org.eigenbase.rex.RexInputRef; +import org.eigenbase.rex.RexNode; +import org.eigenbase.rex.RexWindowBound; +import org.eigenbase.rex.RexFieldCollation; +import org.eigenbase.sql.SqlAggFunction; +import org.eigenbase.sql.SqlWindow; +import org.eigenbase.sql.parser.SqlParserPos; +import org.eigenbase.sql.type.SqlTypeName; +import org.eigenbase.sql2rel.RelFieldTrimmer; +import org.eigenbase.sql.SqlCall; +import org.eigenbase.sql.SqlExplainLevel; +import org.eigenbase.sql.SqlKind; +import org.eigenbase.sql.SqlNode; +import org.eigenbase.sql.SqlLiteral; +import org.eigenbase.util.CompositeList; +import org.eigenbase.util.ImmutableIntList; +import org.eigenbase.util.Pair; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; /** * Implementation of the semantic analyzer. It generates the query plan. @@ -265,6 +356,9 @@ public class SemanticAnalyzer extends Ba //flag for partial scan during analyze ... compute statistics protected boolean partialscan; + private volatile boolean runCBO = true; + private volatile boolean disableJoinMerge = false; + /* * Capture the CTE definitions in a Query. */ @@ -279,6 +373,11 @@ public class SemanticAnalyzer extends Ba int nextNum; } + protected SemanticAnalyzer(HiveConf conf, boolean runCBO) throws SemanticException { + this(conf); + this.runCBO = runCBO; + } + public SemanticAnalyzer(HiveConf conf) throws SemanticException { super(conf); opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>(); @@ -315,8 +414,11 @@ public class SemanticAnalyzer extends Ba } @Override - protected void reset() { - super.reset(); + protected void reset(boolean clearPartsCache) { + super.reset(true); + if(clearPartsCache) { + prunedPartitions.clear(); + } loadTableWork.clear(); loadFileWork.clear(); topOps.clear(); @@ -330,7 +432,7 @@ public class SemanticAnalyzer extends Ba smbMapJoinContext.clear(); opParseCtx.clear(); groupOpToInputTables.clear(); - prunedPartitions.clear(); + disableJoinMerge = false; aliasToCTEs.clear(); topToTable.clear(); opToPartPruner.clear(); @@ -344,8 +446,6 @@ public class SemanticAnalyzer extends Ba viewsExpanded = null; viewSelect = null; ctesExpanded = null; - noscan = false; - partialscan = false; globalLimitCtx.disableOpt(); viewAliasToInput.clear(); reduceSinkOperatorsAddedByEnforceBucketingSorting.clear(); @@ -354,7 +454,6 @@ public class SemanticAnalyzer extends Ba unparseTranslator.clear(); queryProperties.clear(); outputs.clear(); - globalLimitCtx.reset(); } public void initParseCtx(ParseContext pctx) { @@ -957,9 +1056,7 @@ public class SemanticAnalyzer extends Ba private boolean isJoinToken(ASTNode node) { if ((node.getToken().getType() == HiveParser.TOK_JOIN) || (node.getToken().getType() == HiveParser.TOK_CROSSJOIN) - || (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) - || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) - || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN) + || isOuterJoinToken(node) || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN) || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) { return true; @@ -968,6 +1065,12 @@ public class SemanticAnalyzer extends Ba return false; } + private boolean isOuterJoinToken(ASTNode node) { + return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) + || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) + || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN); + } + /** * Given the AST with TOK_JOIN as the root, get all the aliases for the tables * or subqueries in the join. @@ -985,6 +1088,7 @@ public class SemanticAnalyzer extends Ba "Join with multiple children")); } + queryProperties.incrementJoinCount(isOuterJoinToken(join)); for (int num = 0; num < numChildren; num++) { ASTNode child = (ASTNode) join.getChild(num); if (child.getToken().getType() == HiveParser.TOK_TABREF) { @@ -1091,10 +1195,15 @@ public class SemanticAnalyzer extends Ba qb.countSel(); qbp.setSelExprForClause(ctx_1.dest, ast); + int posn = 0; if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) { qbp.setHints((ASTNode) ast.getChild(0)); + posn++; } + if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) + queryProperties.setUsesScript(true); + LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest); doPhase1GetColumnAliasesFromSelect(ast, qbp); @@ -1105,6 +1214,8 @@ public class SemanticAnalyzer extends Ba case HiveParser.TOK_WHERE: qbp.setWhrExprForClause(ctx_1.dest, ast); + if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty()) + queryProperties.setFilterWithSubQuery(true); break; case HiveParser.TOK_INSERT_INTO: @@ -1127,6 +1238,9 @@ public class SemanticAnalyzer extends Ba } } qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); + + if (qbp.getClauseNamesForDest().size() > 1) + queryProperties.setMultiDestQuery(true); break; case HiveParser.TOK_FROM: @@ -1150,9 +1264,9 @@ public class SemanticAnalyzer extends Ba processSubQuery(qb, frm); } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) { + queryProperties.setHasLateralViews(true); processLateralView(qb, frm); } else if (isJoinToken(frm)) { - queryProperties.setHasJoin(true); processJoin(qb, frm); qbp.setJoinExpr(frm); }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){ @@ -1365,6 +1479,10 @@ public class SemanticAnalyzer extends Ba } } + public Table getTable(TableScanOperator ts) { + return topToTable.get(ts); + } + public void getMetaData(QB qb) throws SemanticException { getMetaData(qb, null); } @@ -2310,7 +2428,7 @@ public class SemanticAnalyzer extends Ba output = putOpInsertMap(output, inputRR); return output; } - + private Operator genPlanForSubQueryPredicate( QB qbSQ, ISubQueryJoinInfo subQueryPredicate) throws SemanticException { @@ -9444,7 +9562,9 @@ public class SemanticAnalyzer extends Ba aliasToOpInfo ); } } - mergeJoinTree(qb); + + if (!disableJoinMerge) + mergeJoinTree(qb); } // if any filters are present in the join tree, push them on top of the @@ -9652,9 +9772,9 @@ public class SemanticAnalyzer extends Ba } @Override - public void init() { + public void init(boolean clearPartsCache) { // clear most members - reset(); + reset(clearPartsCache); // init QB qb = new QB(null, null, false); @@ -9709,11 +9829,82 @@ public class SemanticAnalyzer extends Ba getMetaData(qb); LOG.info("Completed getting MetaData in Semantic Analysis"); + + if (runCBO) { + boolean tokenTypeIsQuery = ast.getToken().getType() == HiveParser.TOK_QUERY + || ast.getToken().getType() == HiveParser.TOK_EXPLAIN; + if (!tokenTypeIsQuery || createVwDesc != null + || !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) + || !canHandleQuery(qb, true) || !HiveOptiqUtil.validateASTForCBO(ast)) { + runCBO = false; + } + + if (runCBO) { + disableJoinMerge = true; + } + } + // Save the result schema derived from the sink operator produced // by genPlan. This has the correct column names, which clients // such as JDBC would prefer instead of the c0, c1 we'll end // up with later. - Operator sinkOp = genPlan(qb); + Operator sinkOp = null; + + if (runCBO) { + OptiqBasedPlanner optiqPlanner = new OptiqBasedPlanner(); + boolean reAnalyzeAST = false; + + try { + // 1. Gen Optimized AST + ASTNode newAST = optiqPlanner.getOptimizedAST(prunedPartitions); + + // 2. Regen OP plan from optimized AST + init(false); + ctx_1 = initPhase1Ctx(); + if (!doPhase1(newAST, qb, ctx_1)) { + throw new RuntimeException( + "Couldn't do phase1 on CBO optimized query plan"); + } + // unfortunately making prunedPartitions immutable is not possible here + // with SemiJoins not all tables are costed in CBO, + // so their PartitionList is not evaluated until the run phase. + //prunedPartitions = ImmutableMap.copyOf(prunedPartitions); + getMetaData(qb); + + disableJoinMerge = true; + sinkOp = genPlan(qb); + LOG.info("CBO Succeeded; optimized logical plan."); + LOG.debug(newAST.dump()); + + /* + * Use non CBO Result Set Schema so as to preserve user specified names. + * Hive seems to have bugs with OB/LIMIT in sub queries. // 3. Reset + * result set schema resultSchema = + * convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp) + * .getRowResolver(), true); + */ + } catch (Exception e) { + LOG.error("CBO failed, skipping CBO. ", e); + if (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || + (optiqPlanner.noColsMissingStats.get() > 0) || + e instanceof OptiqSemanticException) { + reAnalyzeAST = true; + } else { + throw e instanceof SemanticException ? (SemanticException) e : new SemanticException(e); + } + } finally { + runCBO = false; + disableJoinMerge = false; + if (reAnalyzeAST) { + init(true); + prunedPartitions.clear(); + analyzeInternal(ast); + return; + } + } + } else { + sinkOp = genPlan(qb); + } if (createVwDesc != null) resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver()); @@ -11953,4 +12144,1931 @@ public class SemanticAnalyzer extends Ba return false; } + /**** Temporary Place Holder For Optiq plan Gen, Optimizer ****/ + + /* + * Entry point to Optimizations using Optiq. + */ + private boolean canHandleQuery(QB qbToChk, boolean topLevelQB) { + boolean runOptiqPlanner = false; + // Assumption: + // 1. If top level QB is query then everything below it must also be Query + // 2. Nested Subquery will return false for qbToChk.getIsQuery() + if ((!topLevelQB || qbToChk.getIsQuery()) + && (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict")) + && (!topLevelQB || (queryProperties.getJoinCount() > 1) || conf.getBoolVar(ConfVars.HIVE_IN_TEST)) + && !queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() + && !queryProperties.hasSortBy() && !queryProperties.hasPTF() + && !queryProperties.usesScript() && !queryProperties.hasMultiDestQuery() + && !queryProperties.hasLateralViews()) { + runOptiqPlanner = true; + } else { + LOG.info("Can not invoke CBO; query contains operators not supported for CBO."); + } + + return runOptiqPlanner; + } + + private class OptiqBasedPlanner implements Frameworks.PlannerAction<RelNode> { + RelOptCluster cluster; + RelOptSchema relOptSchema; + SemanticException semanticException; + Map<String, PrunedPartitionList> partitionCache; + AtomicInteger noColsMissingStats = new AtomicInteger(0); + List<FieldSchema> topLevelFieldSchema; + + // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or + // just last one. + LinkedHashMap<RelNode, RowResolver> relToHiveRR = new LinkedHashMap<RelNode, RowResolver>(); + LinkedHashMap<RelNode, ImmutableMap<String, Integer>> relToHiveColNameOptiqPosMap = new LinkedHashMap<RelNode, ImmutableMap<String, Integer>>(); + + private ASTNode getOptimizedAST(Map<String, PrunedPartitionList> partitionCache) + throws SemanticException { + ASTNode optiqOptimizedAST = null; + RelNode optimizedOptiqPlan = null; + this.partitionCache = partitionCache; + + try { + optimizedOptiqPlan = Frameworks.withPlanner(this, + Frameworks.newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build()); + } catch (Exception e) { + if (semanticException != null) + throw semanticException; + else + throw new RuntimeException(e); + } + optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, topLevelFieldSchema); + + return optiqOptimizedAST; + } + + @Override + public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) { + RelNode optiqGenPlan = null; + RelNode optiqPreCboPlan = null; + RelNode optiqOptimizedPlan = null; + + /* + * recreate cluster, so that it picks up the additional traitDef + */ + RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); + final RelOptQuery query = new RelOptQuery(planner); + final RexBuilder rexBuilder = cluster.getRexBuilder(); + cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); + + this.cluster = cluster; + this.relOptSchema = relOptSchema; + + try { + optiqGenPlan = genLogicalPlan(qb); + topLevelFieldSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(optiqGenPlan), + HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); + } catch (SemanticException e) { + semanticException = e; + throw new RuntimeException(e); + } + + optiqPreCboPlan = applyPreCBOTransforms(optiqGenPlan, HiveDefaultRelMetadataProvider.INSTANCE); + List<RelMetadataProvider> list = Lists.newArrayList(); + list.add(HiveDefaultRelMetadataProvider.INSTANCE); + RelTraitSet desiredTraits = cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY); + + HepProgram hepPgm = null; + HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) + .addRuleInstance(new ConvertMultiJoinRule(HiveJoinRel.class)); + hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoinRel.HIVE_JOIN_FACTORY, + HiveProjectRel.DEFAULT_PROJECT_FACTORY, HiveFilterRel.DEFAULT_FILTER_FACTORY)); + + hepPgm = hepPgmBldr.build(); + HepPlanner hepPlanner = new HepPlanner(hepPgm); + + hepPlanner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + + RelNode rootRel = optiqPreCboPlan; + hepPlanner.setRoot(rootRel); + if (!optiqPreCboPlan.getTraitSet().equals(desiredTraits)) { + rootRel = hepPlanner.changeTraits(optiqPreCboPlan, desiredTraits); + } + hepPlanner.setRoot(rootRel); + + optiqOptimizedPlan = hepPlanner.findBestExp(); + + if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { + LOG.debug("CBO Planning details:\n"); + LOG.debug("Original Plan:\n"); + LOG.debug(RelOptUtil.toString(optiqGenPlan)); + LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n"); + LOG.debug(RelOptUtil.toString(optiqPreCboPlan)); + LOG.debug("Plan After Join Reordering:\n"); + LOG.debug(RelOptUtil.toString(optiqOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES)); + } + + return optiqOptimizedPlan; + } + + public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { + + // TODO: Decorelation of subquery should be done before attempting + // Partition Pruning; otherwise Expression evaluation may try to execute + // corelated sub query. + basePlan = hepPlan(basePlan, true, mdProvider, new PushFilterPastProjectRule( + FilterRelBase.class, HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveProjectRel.class, + HiveProjectRel.DEFAULT_PROJECT_FACTORY), new PushFilterPastSetOpRule( + HiveFilterRel.DEFAULT_FILTER_FACTORY), new MergeFilterRule( + HiveFilterRel.DEFAULT_FILTER_FACTORY), HivePushFilterPastJoinRule.JOIN, + HivePushFilterPastJoinRule.FILTER_ON_JOIN); + + basePlan = hepPlan(basePlan, false, mdProvider, new TransitivePredicatesOnJoinRule( + JoinRelBase.class, HiveFilterRel.DEFAULT_FILTER_FACTORY), + // TODO: Enable it after OPTIQ-407 is fixed + //RemoveTrivialProjectRule.INSTANCE, + new HivePartitionPrunerRule(SemanticAnalyzer.this.conf)); + + RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProjectRel.DEFAULT_PROJECT_FACTORY, + HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveJoinRel.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, + HiveSortRel.HIVE_SORT_REL_FACTORY, HiveAggregateRel.HIVE_AGGR_REL_FACTORY, HiveUnionRel.UNION_REL_FACTORY); + basePlan = fieldTrimmer.trim(basePlan); + + return basePlan; + } + + private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, + RelMetadataProvider mdProvider, RelOptRule... rules) { + + RelNode optimizedRelNode = basePlan; + HepProgramBuilder programBuilder = new HepProgramBuilder(); + if (followPlanChanges) { + programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN); + programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules)); + } else { + // TODO: Should this be also TOP_DOWN? + for (RelOptRule r : rules) + programBuilder.addRuleInstance(r); + } + + HepPlanner planner = new HepPlanner(programBuilder.build()); + List<RelMetadataProvider> list = Lists.newArrayList(); + list.add(mdProvider); + planner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + basePlan.getCluster().setMetadataProvider( + new CachingRelMetadataProvider(chainedProvider, planner)); + + planner.setRoot(basePlan); + optimizedRelNode = planner.findBestExp(); + + return optimizedRelNode; + } + + @SuppressWarnings("nls") + private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel, + String rightalias, RelNode rightRel) throws SemanticException { + HiveUnionRel unionRel = null; + + // 1. Get Row Resolvers, Column map for original left and right input of + // Union Rel + RowResolver leftRR = this.relToHiveRR.get(leftRel); + RowResolver rightRR = this.relToHiveRR.get(rightRel); + HashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias); + HashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias); + + // 2. Validate that Union is feasible according to Hive (by using type + // info from RR) + if (leftmap.size() != rightmap.size()) { + throw new SemanticException("Schema of both sides of union should match."); + } + + ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias( + qb.getAliases().get(0)); + for (Map.Entry<String, ColumnInfo> lEntry : leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + if (rInfo == null) { + throw new SemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match. " + rightalias + + " does not have the field " + field)); + } + if (lInfo == null) { + throw new SemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match. " + leftalias + + " does not have the field " + field)); + } + if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { + throw new SemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match: field " + field + ":" + + " appears on the left side of the UNION at column position: " + + getPositionFromInternalName(lInfo.getInternalName()) + + ", and on the right side of the UNION at column position: " + + getPositionFromInternalName(rInfo.getInternalName()) + + ". Column positions should match for a UNION")); + } + // try widening coversion, otherwise fail union + TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + rInfo.getType()); + if (commonTypeInfo == null) { + throw new SemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match: Column " + field + " is of type " + + lInfo.getType().getTypeName() + " on first table and type " + + rInfo.getType().getTypeName() + " on second table")); + } + } + + // 3. construct Union Output RR using original left & right Input + RowResolver unionoutRR = new RowResolver(); + for (Map.Entry<String, ColumnInfo> lEntry : leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + ColumnInfo unionColInfo = new ColumnInfo(lInfo); + unionColInfo.setTabAlias(unionalias); + unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + rInfo.getType())); + unionoutRR.put(unionalias, field, unionColInfo); + } + + // 4. Determine which columns requires cast on left/right input (Optiq + // requires exact types on both sides of union) + boolean leftNeedsTypeCast = false; + boolean rightNeedsTypeCast = false; + List<RexNode> leftProjs = new ArrayList<RexNode>(); + List<RexNode> rightProjs = new ArrayList<RexNode>(); + List<RelDataTypeField> leftRowDT = leftRel.getRowType().getFieldList(); + List<RelDataTypeField> rightRowDT = rightRel.getRowType().getFieldList(); + + RelDataType leftFieldDT; + RelDataType rightFieldDT; + RelDataType unionFieldDT; + for (int i = 0; i < leftRowDT.size(); i++) { + leftFieldDT = leftRowDT.get(i).getType(); + rightFieldDT = rightRowDT.get(i).getType(); + if (!leftFieldDT.equals(rightFieldDT)) { + unionFieldDT = TypeConverter.convert(unionoutRR.getColumnInfos().get(i).getType(), + cluster.getTypeFactory()); + if (!unionFieldDT.equals(leftFieldDT)) { + leftNeedsTypeCast = true; + } + leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, + cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); + + if (!unionFieldDT.equals(rightFieldDT)) { + rightNeedsTypeCast = true; + } + rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, + cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); + } else { + leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldDT, + cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); + rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldDT, + cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); + } + } + + // 5. Introduce Project Rel above original left/right inputs if cast is + // needed for type parity + RelNode unionLeftInput = leftRel; + RelNode unionRightInput = rightRel; + if (leftNeedsTypeCast) { + unionLeftInput = HiveProjectRel.create(leftRel, leftProjs, leftRel.getRowType() + .getFieldNames()); + } + if (rightNeedsTypeCast) { + unionRightInput = HiveProjectRel.create(rightRel, rightProjs, rightRel.getRowType() + .getFieldNames()); + } + + // 6. Construct Union Rel + ImmutableList.Builder bldr = new ImmutableList.Builder<RelNode>(); + bldr.add(unionLeftInput); + bldr.add(unionRightInput); + unionRel = new HiveUnionRel(cluster, TraitsUtil.getDefaultTraitSet(cluster), + bldr.build()); + + relToHiveRR.put(unionRel, unionoutRR); + relToHiveColNameOptiqPosMap.put(unionRel, + this.buildHiveToOptiqColumnMap(unionoutRR, unionRel)); + + return unionRel; + } + + private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJoinType, + ASTNode joinCond) throws SemanticException { + RelNode joinRel = null; + + // 1. construct the RowResolver for the new Join Node by combining row + // resolvers from left, right + RowResolver leftRR = this.relToHiveRR.get(leftRel); + RowResolver rightRR = this.relToHiveRR.get(rightRel); + RowResolver joinRR = null; + + if (hiveJoinType != JoinType.LEFTSEMI) { + joinRR = RowResolver.getCombinedRR(leftRR, rightRR); + } else { + joinRR = new RowResolver(); + RowResolver.add(joinRR, leftRR, 0); + } + + // 2. Construct ExpressionNodeDesc representing Join Condition + RexNode optiqJoinCond = null; + if (joinCond != null) { + JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType); + Map<ASTNode, ExprNodeDesc> exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond, + jCtx); + if (jCtx.getError() != null) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(), + jCtx.getError())); + + ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond); + + List<RelNode> inputRels = new ArrayList<RelNode>(); + inputRels.add(leftRel); + inputRels.add(rightRel); + optiqJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels, + relToHiveRR, relToHiveColNameOptiqPosMap, false); + } else { + optiqJoinCond = cluster.getRexBuilder().makeLiteral(true); + } + + // 3. Validate that join condition is legal (i.e no function refering to + // both sides of join, only equi join) + // TODO: Join filter handling (only supported for OJ by runtime or is it + // supported for IJ as well) + + // 4. Construct Join Rel Node + boolean leftSemiJoin = false; + JoinRelType optiqJoinType; + switch (hiveJoinType) { + case LEFTOUTER: + optiqJoinType = JoinRelType.LEFT; + break; + case RIGHTOUTER: + optiqJoinType = JoinRelType.RIGHT; + break; + case FULLOUTER: + optiqJoinType = JoinRelType.FULL; + break; + case LEFTSEMI: + optiqJoinType = JoinRelType.INNER; + leftSemiJoin = true; + break; + case INNER: + default: + optiqJoinType = JoinRelType.INNER; + break; + } + + if (leftSemiJoin) { + List<RelDataTypeField> sysFieldList = new ArrayList<RelDataTypeField>(); + List<RexNode> leftJoinKeys = new ArrayList<RexNode>(); + List<RexNode> rightJoinKeys = new ArrayList<RexNode>(); + + RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel, + optiqJoinCond, leftJoinKeys, rightJoinKeys, null, null); + + if (!nonEquiConds.isAlwaysTrue()) { + throw new SemanticException("Non equality condition not supported in Semi-Join" + + nonEquiConds); + } + + RelNode[] inputRels = new RelNode[] { leftRel, rightRel }; + final List<Integer> leftKeys = new ArrayList<Integer>(); + final List<Integer> rightKeys = new ArrayList<Integer>(); + optiqJoinCond = HiveOptiqUtil.projectNonColumnEquiConditions( + HiveProjectRel.DEFAULT_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0, + leftKeys, rightKeys); + + joinRel = new SemiJoinRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), + inputRels[0], inputRels[1], optiqJoinCond, ImmutableIntList.copyOf(leftKeys), + ImmutableIntList.copyOf(rightKeys)); + } else { + joinRel = HiveJoinRel.getJoin(cluster, leftRel, rightRel, optiqJoinCond, optiqJoinType, + leftSemiJoin); + } + // 5. Add new JoinRel & its RR to the maps + relToHiveColNameOptiqPosMap.put(joinRel, this.buildHiveToOptiqColumnMap(joinRR, joinRel)); + relToHiveRR.put(joinRel, joinRR); + + return joinRel; + } + + /** + * Generate Join Logical Plan Relnode by walking through the join AST. + * + * @param qb + * @param aliasToRel + * Alias(Table/Relation alias) to RelNode; only read and not + * written in to by this method + * @return + * @throws SemanticException + */ + private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map<String, RelNode> aliasToRel) + throws SemanticException { + RelNode leftRel = null; + RelNode rightRel = null; + JoinType hiveJoinType = null; + + if (joinParseTree.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) { + String msg = String.format("UNIQUE JOIN is currently not supported in CBO," + + " turn off cbo to use UNIQUE JOIN."); + LOG.debug(msg); + throw new OptiqSemanticException(msg); + } + + // 1. Determine Join Type + // TODO: What about TOK_CROSSJOIN, TOK_MAPJOIN + switch (joinParseTree.getToken().getType()) { + case HiveParser.TOK_LEFTOUTERJOIN: + hiveJoinType = JoinType.LEFTOUTER; + break; + case HiveParser.TOK_RIGHTOUTERJOIN: + hiveJoinType = JoinType.RIGHTOUTER; + break; + case HiveParser.TOK_FULLOUTERJOIN: + hiveJoinType = JoinType.FULLOUTER; + break; + case HiveParser.TOK_LEFTSEMIJOIN: + hiveJoinType = JoinType.LEFTSEMI; + break; + default: + hiveJoinType = JoinType.INNER; + break; + } + + // 2. Get Left Table Alias + ASTNode left = (ASTNode) joinParseTree.getChild(0); + if ((left.getToken().getType() == HiveParser.TOK_TABREF) + || (left.getToken().getType() == HiveParser.TOK_SUBQUERY) + || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { + String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0)) + .toLowerCase(); + String leftTableAlias = left.getChildCount() == 1 ? tableName : unescapeIdentifier(left + .getChild(left.getChildCount() - 1).getText().toLowerCase()); + // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? + // partitionTableFunctionSource partitioningSpec? expression*) + // guranteed to have an lias here: check done in processJoin + leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(left + .getChild(1).getText().toLowerCase()) + : leftTableAlias; + leftRel = aliasToRel.get(leftTableAlias); + } else if (isJoinToken(left)) { + leftRel = genJoinLogicalPlan(left, aliasToRel); + } else { + assert (false); + } + + // 3. Get Right Table Alias + ASTNode right = (ASTNode) joinParseTree.getChild(1); + if ((right.getToken().getType() == HiveParser.TOK_TABREF) + || (right.getToken().getType() == HiveParser.TOK_SUBQUERY) + || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { + String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0)) + .toLowerCase(); + String rightTableAlias = right.getChildCount() == 1 ? tableName : unescapeIdentifier(right + .getChild(right.getChildCount() - 1).getText().toLowerCase()); + // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? + // partitionTableFunctionSource partitioningSpec? expression*) + // guranteed to have an lias here: check done in processJoin + rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(right + .getChild(1).getText().toLowerCase()) + : rightTableAlias; + rightRel = aliasToRel.get(rightTableAlias); + } else { + assert (false); + } + + // 4. Get Join Condn + ASTNode joinCond = (ASTNode) joinParseTree.getChild(2); + + // 5. Create Join rel + return genJoinRelNode(leftRel, rightRel, hiveJoinType, joinCond); + } + + private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException { + RowResolver rr = new RowResolver(); + HiveTableScanRel tableRel = null; + + try { + + // 1. If the table has a Sample specified, bail from Optiq path. + if ( qb.getParseInfo().getTabSample(tableAlias) != null || + SemanticAnalyzer.this.nameToSplitSample.containsKey(tableAlias)) { + String msg = String.format("Table Sample specified for %s." + + " Currently we don't support Table Sample clauses in CBO," + + " turn off cbo for queries on tableSamples.", tableAlias); + LOG.debug(msg); + throw new OptiqSemanticException(msg); + } + + // 2. Get Table Metadata + Table tab = qb.getMetaData().getSrcForAlias(tableAlias); + + // 3. Get Table Logical Schema (Row Type) + // NOTE: Table logical schema = Non Partition Cols + Partition Cols + + // Virtual Cols + + // 3.1 Add Column info for non partion cols (Object Inspector fields) + StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer() + .getObjectInspector(); + List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs(); + ColumnInfo colInfo; + String colName; + ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>(); + for (int i = 0; i < fields.size(); i++) { + colName = fields.get(i).getFieldName(); + colInfo = new ColumnInfo( + fields.get(i).getFieldName(), + TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), + tableAlias, false); + colInfo.setSkewedCol((isSkewedCol(tableAlias, qb, colName)) ? true : false); + rr.put(tableAlias, colName, colInfo); + cInfoLst.add(colInfo); + } + // TODO: Fix this + ArrayList<ColumnInfo> nonPartitionColumns = new ArrayList<ColumnInfo>(cInfoLst); + ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>(); + + // 3.2 Add column info corresponding to partition columns + for (FieldSchema part_col : tab.getPartCols()) { + colName = part_col.getName(); + colInfo = new ColumnInfo(colName, + TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true); + rr.put(tableAlias, colName, colInfo); + cInfoLst.add(colInfo); + partitionColumns.add(colInfo); + } + + // 3.3 Add column info corresponding to virtual columns + Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator(); + while (vcs.hasNext()) { + VirtualColumn vc = vcs.next(); + colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, + vc.getIsHidden()); + rr.put(tableAlias, vc.getName(), colInfo); + cInfoLst.add(colInfo); + } + + // 3.4 Build row type from field <type, name> + RelDataType rowType = TypeConverter.getType(cluster, rr, null); + + // 4. Build RelOptAbstractTable + RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, tableAlias, rowType, tab, + nonPartitionColumns, partitionColumns, conf, partitionCache, noColsMissingStats); + + // 5. Build Hive Table Scan Rel + tableRel = new HiveTableScanRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), + optTable, rowType); + + // 6. Add Schema(RR) to RelNode-Schema map + ImmutableMap<String, Integer> hiveToOptiqColMap = buildHiveToOptiqColumnMap(rr, tableRel); + relToHiveRR.put(tableRel, rr); + relToHiveColNameOptiqPosMap.put(tableRel, hiveToOptiqColMap); + } catch (Exception e) { + if ( e instanceof SemanticException) { + throw (SemanticException) e; + } else { + throw (new RuntimeException(e)); + } + } + + return tableRel; + } + + private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) throws SemanticException { + ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel)); + ImmutableMap<String, Integer> hiveColNameOptiqPosMap = this.relToHiveColNameOptiqPosMap + .get(srcRel); + RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), + hiveColNameOptiqPosMap, 0, true).convert(filterCondn); + RelNode filterRel = new HiveFilterRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), + srcRel, convertedFilterExpr); + this.relToHiveColNameOptiqPosMap.put(filterRel, hiveColNameOptiqPosMap); + relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); + relToHiveColNameOptiqPosMap.put(filterRel, hiveColNameOptiqPosMap); + + return filterRel; + } + + private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, + Map<String, RelNode> aliasToRel, boolean forHavingClause) throws SemanticException { + /* + * Handle Subquery predicates. + * + * Notes (8/22/14 hb): Why is this a copy of the code from {@link + * #genFilterPlan} - for now we will support the same behavior as non CBO + * route. - but plan to allow nested SubQueries(Restriction.9.m) and + * multiple SubQuery expressions(Restriction.8.m). This requires use to + * utilize Optiq's Decorrelation mechanics, and for Optiq to fix/flush out + * Null semantics(OPTIQ-373) - besides only the driving code has been + * copied. Most of the code which is SubQueryUtils and QBSubQuery is + * reused. + */ + int numSrcColumns = srcRel.getRowType().getFieldCount(); + List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); + if (subQueriesInOriginalTree.size() > 0) { + + /* + * Restriction.9.m :: disallow nested SubQuery expressions. + */ + if (qb.getSubQueryPredicateDef() != null) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); + } + + /* + * Restriction.8.m :: We allow only 1 SubQuery expression per Query. + */ + if (subQueriesInOriginalTree.size() > 1) { + + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); + } + + /* + * Clone the Search AST; apply all rewrites on the clone. + */ + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); + List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); + + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outerQBRR = inputRR; + ImmutableMap<String, Integer> outerQBPosMap = + relToHiveColNameOptiqPosMap.get(srcRel); + + for (int i = 0; i < subQueries.size(); i++) { + ASTNode subQueryAST = subQueries.get(i); + ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); + + int sqIdx = qb.incrNumSubQueryPredicates(); + clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); + + QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, + originalSubQueryAST, ctx); + + if (!forHavingClause) { + qb.setWhereClauseSubQueryPredicate(subQuery); + } else { + qb.setHavingClauseSubQueryPredicate(subQuery); + } + String havingInputAlias = null; + + if (forHavingClause) { + havingInputAlias = "gby_sq" + sqIdx; + aliasToRel.put(havingInputAlias, srcRel); + } + + subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, + aliasToRel.keySet()); + + QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); + qbSQ.setSubQueryDef(subQuery.getSubQuery()); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1); + getMetaData(qbSQ); + RelNode subQueryRelNode = genLogicalPlan(qbSQ); + aliasToRel.put(subQuery.getAlias(), subQueryRelNode); + RowResolver sqRR = relToHiveRR.get(subQueryRelNode); + + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ + if (subQuery.getOperator().getType() != SubQueryType.EXISTS + && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) { + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, + "SubQuery can contain only 1 item in Select List.")); + } + + /* + * If this is a Not In SubQuery Predicate then Join in the Null Check + * SubQuery. See QBSubQuery.NotInCheck for details on why and how this + * is constructed. + */ + if (subQuery.getNotInCheck() != null) { + QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); + notInCheck.setSQRR(sqRR); + QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); + qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery()); + ctx_1 = initPhase1Ctx(); + doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1); + getMetaData(qbSQ_nic); + RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic); + aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); + srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, + // set explicitly to inner until we figure out SemiJoin use + // notInCheck.getJoinType(), + JoinType.INNER, notInCheck.getJoinConditionAST()); + inputRR = relToHiveRR.get(srcRel); + if (forHavingClause) { + aliasToRel.put(havingInputAlias, srcRel); + } + } + + /* + * Gen Join between outer Operator and SQ op + */ + subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); + srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(), + subQuery.getJoinConditionAST()); + searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); + + srcRel = genFilterRelNode(searchCond, srcRel); + + /* + * For Not Exists and Not In, add a projection on top of the Left + * Outer Join. + */ + if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + || subQuery.getOperator().getType() != SubQueryType.NOT_IN) { + srcRel = projectLeftOuterSide(srcRel, numSrcColumns); + } + } + relToHiveRR.put(srcRel, outerQBRR); + relToHiveColNameOptiqPosMap.put(srcRel, outerQBPosMap); + return srcRel; + } + + return genFilterRelNode(searchCond, srcRel); + } + + private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException { + RowResolver iRR = relToHiveRR.get(srcRel); + RowResolver oRR = new RowResolver(); + RowResolver.add(oRR, iRR, 0, numColumns); + + List<RexNode> optiqColLst = new ArrayList<RexNode>(); + List<String> oFieldNames = new ArrayList<String>(); + RelDataType iType = srcRel.getRowType(); + + for (int i = 0; i < iType.getFieldCount(); i++) { + RelDataTypeField fType = iType.getFieldList().get(i); + String fName = iType.getFieldNames().get(i); + optiqColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i)); + oFieldNames.add(fName); + } + + HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames); + + this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(oRR, selRel)); + this.relToHiveRR.put(selRel, oRR); + return selRel; + } + + private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map<String, RelNode> aliasToRel, + boolean forHavingClause) throws SemanticException { + RelNode filterRel = null; + + Iterator<ASTNode> whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values() + .iterator(); + if (whereClauseIterator.hasNext()) { + filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel, + aliasToRel, forHavingClause); + } + + return filterRel; + } + + /** + * Class to store GenericUDAF related information. + */ + private class AggInfo { + private final List<ExprNodeDesc> m_aggParams; + private final TypeInfo m_returnType; + private final String m_udfName; + private final boolean m_distinct; + + private AggInfo(List<ExprNodeDesc> aggParams, TypeInfo returnType, String udfName, + boolean isDistinct) { + m_aggParams = aggParams; + m_returnType = returnType; + m_udfName = udfName; + m_distinct = isDistinct; + } + } + + private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List<RexNode> gbChildProjLst, + RexNodeConverter converter, HashMap<String, Integer> rexNodeToPosMap, + Integer childProjLstIndx) throws SemanticException { + + // 1. Get agg fn ret type in Optiq + RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType, + this.cluster.getTypeFactory()); + + // 2. Convert Agg Fn args and type of args to Optiq + // TODO: Does HQL allows expressions as aggregate args or can it only be + // projections from child? + Integer inputIndx; + List<Integer> argList = new ArrayList<Integer>(); + RexNode rexNd = null; + RelDataTypeFactory dtFactory = this.cluster.getTypeFactory(); + ImmutableList.Builder<RelDataType> aggArgRelDTBldr = new ImmutableList.Builder<RelDataType>(); + for (ExprNodeDesc expr : agg.m_aggParams) { + rexNd = converter.convert(expr); + inputIndx = rexNodeToPosMap.get(rexNd.toString()); + if (inputIndx == null) { + gbChildProjLst.add(rexNd); + rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx); + inputIndx = childProjLstIndx; + childProjLstIndx++; + } + argList.add(inputIndx); + + // TODO: does arg need type cast? + aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory)); + } + + // 3. Get Aggregation FN from Optiq given name, ret type and input arg + // type + final Aggregation aggregation = SqlFunctionConverter.getOptiqAggFn(agg.m_udfName, + aggArgRelDTBldr.build(), aggFnRetType); + + return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null); + } + + private RelNode genGBRelNode(List<ExprNodeDesc> gbExprs, List<AggInfo> aggInfoLst, + RelNode srcRel) throws SemanticException { + RowResolver gbInputRR = this.relToHiveRR.get(srcRel); + ImmutableMap<String, Integer> posMap = this.relToHiveColNameOptiqPosMap.get(srcRel); + RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), + posMap, 0, false); + + final List<RexNode> gbChildProjLst = Lists.newArrayList(); + final HashMap<String, Integer> rexNodeToPosMap = new HashMap<String, Integer>(); + final BitSet groupSet = new BitSet(); + Integer gbIndx = 0; + RexNode rnd; + for (ExprNodeDesc key : gbExprs) { + rnd = converter.convert(key); + gbChildProjLst.add(rnd); + groupSet.set(gbIndx); + rexNodeToPosMap.put(rnd.toString(), gbIndx); + gbIndx++; + } + + List<AggregateCall> aggregateCalls = Lists.newArrayList(); + int i = aggInfoLst.size(); + for (AggInfo agg : aggInfoLst) { + aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap, + gbChildProjLst.size())); + } + + if (gbChildProjLst.isEmpty()) { + // This will happen for count(*), in such cases we arbitarily pick + // first element from srcRel + gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0)); + } + RelNode gbInputRel = HiveProjectRel.create(srcRel, gbChildProjLst, null); + + HiveRel aggregateRel = null; + try { + aggregateRel = new HiveAggregateRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), + gbInputRel, groupSet, aggregateCalls); + } catch (InvalidRelException e) { + throw new SemanticException(e); + } + + return aggregateRel; + } + + private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, + RowResolver gByInputRR, RowResolver gByRR) { + if (gByExpr.getType() == HiveParser.DOT + && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { + String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0) + .getText()); + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText()); + gByRR.put(tab_alias, col_alias, colInfo); + } else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) { + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText()); + String tab_alias = null; + /* + * If the input to the GBy has a tab alias for the column, then add an + * entry based on that tab_alias. For e.g. this query: select b.x, + * count(*) from t1 b group by x needs (tab_alias=b, col_alias=x) in the + * GBy RR. tab_alias=b comes from looking at the RowResolver that is the + * ancestor before any GBy/ReduceSinks added for the GBY operation. + */ + try { + ColumnInfo pColInfo = gByInputRR.get(tab_alias, col_alias); + tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); + } catch (SemanticException se) { + } + gByRR.put(tab_alias, col_alias, colInfo); + } + } + + private void addToGBExpr(RowResolver groupByOutputRowResolver, + RowResolver groupByInputRowResolver, ASTNode grpbyExpr, ExprNodeDesc grpbyExprNDesc, + List<ExprNodeDesc> gbExprNDescLst, List<String> outputColumnNames) { + // TODO: Should we use grpbyExprNDesc.getTypeInfo()? what if expr is + // UDF + int i = gbExprNDescLst.size(); + String field = getColumnInternalName(i); + outputColumnNames.add(field); + gbExprNDescLst.add(grpbyExprNDesc); + + ColumnInfo oColInfo = new ColumnInfo(field, grpbyExprNDesc.getTypeInfo(), null, false); + groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo); + + addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver, + groupByOutputRowResolver); + } + + private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR) + throws SemanticException { + AggInfo aInfo = null; + + // 1 Convert UDAF Params to ExprNodeDesc + ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); + for (int i = 1; i <= aggFnLstArgIndx; i++) { + ASTNode paraExpr = (ASTNode) aggAst.getChild(i); + ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR); + aggParameters.add(paraExprNode); + } + + // 2. Is this distinct UDAF + boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI; + + // 3. Determine type of UDAF + TypeInfo udafRetType = null; + + // 3.1 Obtain UDAF name + String aggName = unescapeIdentifier(aggAst.getChild(0).getText()); + + // 3.2 Rank functions type is 'int'/'double' + if (FunctionRegistry.isRankingFunction(aggName)) { + if (aggName.equalsIgnoreCase("percent_rank")) + udafRetType = TypeInfoFactory.doubleTypeInfo; + else + udafRetType = TypeInfoFactory.intTypeInfo; + } else { + // 3.3 Try obtaining UDAF evaluators to determine the ret type + try { + boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; + + // 3.3.1 Get UDAF Evaluator + Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct); + + GenericUDAFEvaluator genericUDAFEvaluator = null; + if (aggName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME) + || aggName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) { + ArrayList<ObjectInspector> originalParameterTypeInfos = + getWritableObjectInspector(aggParameters); + genericUDAFEvaluator = + FunctionRegistry.getGenericWindowingEvaluator(aggName, + originalParameterTypeInfos, isDistinct, isAllColumns); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + udafRetType = ((ListTypeInfo)udaf.returnType).getListElementTypeInfo(); + } else { + genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, + aggParameters, aggAst, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + + // 3.3.2 Get UDAF Info using UDAF Evaluator + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + udafRetType = udaf.returnType; + } + } catch (Exception e) { + LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName + + ", trying to translate to GenericUDF"); + } + + // 3.4 Try GenericUDF translation + if (udafRetType == null) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + tcCtx.setAllowDistinctFunctions(false); + ExprNodeDesc exp = genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, tcCtx); + udafRetType = exp.getTypeInfo(); + } + } + + // 4. Construct AggInfo + aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct); + + return aInfo; + } + + /** + * Generate GB plan. + * + * @param qb + * @param srcRel + * @return TODO: 1. Grouping Sets (roll up..) + * @throws SemanticException + */ + private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { + RelNode gbRel = null; + QBParseInfo qbp = getQBParseInfo(qb); + + // 0. for GSets, Cube, Rollup, bail from Optiq path. + if (!qbp.getDestRollups().isEmpty() + || !qbp.getDestGroupingSets().isEmpty() + || !qbp.getDestCubes().isEmpty()) { + String gbyClause = null; + HashMap<String, ASTNode> gbysMap = qbp.getDestToGroupBy(); + if (gbysMap.size() == 1) { + ASTNode gbyAST = gbysMap.entrySet().iterator().next().getValue(); + gbyClause = SemanticAnalyzer.this.ctx.getTokenRewriteStream() + .toString(gbyAST.getTokenStartIndex(), + gbyAST.getTokenStopIndex()); + gbyClause = "in '" + gbyClause + "'."; + } else { + gbyClause = "."; + } + String msg = String.format("Encountered Grouping Set/Cube/Rollup%s" + + " Currently we don't support Grouping Set/Cube/Rollup" + + " clauses in CBO," + " turn off cbo for these queries.", + gbyClause); + LOG.debug(msg); + throw new OptiqSemanticException(msg); + } + + // 1. Gather GB Expressions (AST) (GB + Aggregations) + // NOTE: Multi Insert is not supported + String detsClauseName = qbp.getClauseNames().iterator().next(); + List<ASTNode> grpByAstExprs = getGroupByForClause(qbp, detsClauseName); + HashMap<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); + boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; + boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true + : false; + + if (hasGrpByAstExprs || hasAggregationTrees) { + ArrayList<ExprNodeDesc> gbExprNDescLst = new ArrayList<ExprNodeDesc>(); + ArrayList<String> outputColumnNames = new ArrayList<String>(); + + // 2. Input, Output Row Resolvers + RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); + RowResolver groupByOutputRowResolver = new RowResolver(); + groupByOutputRowResolver.setIsExprResolver(true); + + if (hasGrpByAstExprs) { + // 3. Construct GB Keys (ExprNode) + for (int i = 0; i < grpByAstExprs.size(); ++i) { + ASTNode grpbyExpr = grpByAstExprs.get(i); + Map<ASTNode, ExprNodeDesc> astToExprNDescMap = TypeCheckProcFactory.genExprNode( + grpbyExpr, new TypeCheckCtx(groupByInputRowResolver)); + ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr); + if (grpbyExprNDesc == null) + throw new RuntimeException("Invalid Column Reference: " + grpbyExpr.dump()); + + addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr, + grpbyExprNDesc, gbExprNDescLst, outputColumnNames); + } + } + + // 4. Construct aggregation function Info + ArrayList<AggInfo> aggregations = new ArrayList<AggInfo>(); + if (hasAggregationTrees) { + assert (aggregationTrees != null); + for (ASTNode value : aggregationTrees.values()) { + // 4.1 Determine type of UDAF + // This is the GenericUDAF name + String aggName = unescapeIdentifier(value.getChild(0).getText()); + boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; + + // 4.2 Convert UDAF Params to ExprNodeDesc + ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode paraExpr = (ASTNode) value.getChild(i); + ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver); + aggParameters.add(paraExprNode); + } + + Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct); + GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, + aggParameters, value, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct); + aggregations.add(aInfo); + String field = getColumnInternalName(gbExprNDescLst.size() + aggregations.size() - 1); + outputColumnNames.add(field); + groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType, + "", false)); + } + } + + gbRel = genGBRelNode(gbExprNDescLst, aggregations, srcRel); + relToHiveColNameOptiqPosMap.put(gbRel, + buildHiveToOptiqColumnMap(groupByOutputRowResolver, gbRel)); + this.relToHiveRR.put(gbRel, groupByOutputRowResolver); + } + + return gbRel; + } + + private RelNode genOBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { + RelNode relToRet = null; + + QBParseInfo qbp = getQBParseInfo(qb); + String dest = qbp.getClauseNames().iterator().next(); + ASTNode obAST = qbp.getOrderByForClause(dest); + + if (obAST != null) { + // 1. OB Expr sanity test + // in strict mode, in the presence of order by, limit must be specified + Integer limit = qb.getParseInfo().getDestLimit(dest); + if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict") + && limit == null) { + throw new SemanticException(generateErrorMessage(obAST, + ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg())); + } + + // 2. Walk through OB exprs and extract field collations and additional + // virtual columns needed + final List<RexNode> newVCLst = new ArrayList<RexNode>(); + final List<RelFieldCollation> fieldCollations = Lists.newArrayList(); + int fieldIndex = 0; + + List<Node> obASTExprLst = obAST.getChildren(); + ASTNode obASTExpr; + List<Pair<ASTNode, TypeInfo>> vcASTTypePairs = new ArrayList<Pair<ASTNode, TypeInfo>>(); + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outputRR = new RowResolver(); + + RexNode rnd; + RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), + relToHiveColNameOptiqPosMap.get(srcRel), 0, false); + int srcRelRecordSz = srcRel.getRowType().getFieldCount(); + + for (int i = 0; i < obASTExprLst.size(); i++) { + // 2.1 Convert AST Expr to ExprNode + obASTExpr = (ASTNode) obASTExprLst.get(i); + Map<ASTNode, ExprNodeDesc> astToExprNDescMap = TypeCheckProcFactory.genExprNode( + obASTExpr, new TypeCheckCtx(inputRR)); + ExprNodeDesc obExprNDesc = astToExprNDescMap.get(obASTExpr.getChild(0)); + if (obExprNDesc == null) + throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); + + // 2.2 Convert ExprNode to RexNode + rnd = converter.convert(obExprNDesc); + + // 2.3 Determine the index of ob expr in child schema + // NOTE: Optiq can not take compound exprs in OB without it being + // present in the child (& hence we add a child Project Rel) + if (rnd instanceof RexInputRef) { + fieldIndex = ((RexInputRef) rnd).getIndex(); + } else { + fieldIndex = srcRelRecordSz + newVCLst.size(); + newVCLst.add(rnd); + vcASTTypePairs.add(new Pair<ASTNode, TypeInfo>((ASTNode) obASTExpr.getChild(0), + obExprNDesc.getTypeInfo())); + } + + // 2.4 Determine the Direction of order by + org.eigenbase.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; + if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { + order = RelFieldCollation.Direction.ASCENDING; + } + + // 2.5 Add to field collations + fieldCollations.add(new RelFieldCollation(fieldIndex, order)); + } + + // 3. Add Child Project Rel if needed + RelNode obInputRel = srcRel; + if (!newVCLst.isEmpty()) { + List<RexNode> originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), + new Function<RelDataTypeField, RexNode>() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + + obInputRel = HiveProjectRel.create(srcRel, CompositeList.of(originalInputRefs, newVCLst), + null); + } + + // 4. Construct SortRel + RelTraitSet traitSet = cluster.traitSetOf(HiveRel.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); + // TODO: Is it better to introduce a + // project on top to restrict VC from showing up in sortRel type + RelNode sortRel = new HiveSortRel(cluster, traitSet, obInputRel, canonizedCollation, + null, null); + + // 5. Construct OB Parent Rel If needed + // Construct a parent Project if OB has virtual columns(vc) otherwise + // vc would show up in the result + // TODO: If OB is part of sub query & Parent Query select is not of the + // type "select */.*..." then parent project is not needed + relToRet = sortRel; + if (!newVCLst.isEmpty()) { + List<RexNode> obParentRelProjs = Lists.transform(srcRel.getRowType().getFieldList(), + new Function<RelDataTypeField, RexNode>() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + + relToRet = HiveProjectRel.create(sortRel, obParentRelProjs, null); + } + + // 6. Construct output RR + RowResolver.add(outputRR, inputRR, 0); + + // 7. Update the maps + // NOTE: Output RR for SortRel is considered same as its input; we may + // end up not using VC that is present in sort rel. Also note that + // rowtype of sortrel is the type of it child; if child happens to be + // synthetic project that we introduced then that projectrel would + // contain the vc. + ImmutableMap<String, Integer> hiveColNameOptiqPosMap = buildHiveToOptiqColumnMap(outputRR, + relToRet); + relToHiveRR.put(relToRet, outputRR); + relToHiveColNameOptiqPosMap.put(relToRet, hiveColNameOptiqPosMap); + } + + return relToRet; + } + + private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { + HiveRel sortRel = null; + QBParseInfo qbp = getQBParseInfo(qb); + Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next()); + + if (limit != null) { + RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit)); + RelTraitSet traitSet = cluster.traitSetOf(HiveRel.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.EMPTY); + sortRel = new HiveSortRel(cluster, traitSet, srcRel, canonizedCollation, null, fetch); + + RowResolver outputRR = new RowResolver(); + RowResolver.add(outputRR, relToHiveRR.get(srcRel), 0); + ImmutableMap<String, Integer> hiveColNameOptiqPosMap = buildHiveToOptiqColumnMap(outputRR, + sortRel); + relToHiveRR.put(sortRel, outputRR); + relToHiveColNameOptiqPosMap.put(sortRel, hiveColNameOptiqPosMap); + } + + return sortRel; + } + + List<RexNode> getPartitionKeys(PartitionSpec ps, RexNodeConverter converter, RowResolver inputRR) + throws SemanticException { + List<RexNode> pKeys = new ArrayList<RexNode>(); + if (ps != null) { + List<PartitionExpression> pExprs = ps.getExpressions(); + for (PartitionExpression pExpr : pExprs) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + tcCtx.setAllowStatefulFunctions(true); + ExprNodeDesc exp = genExprNodeDesc(pExpr.getExpression(), inputRR, tcCtx); + pKeys.add(converter.convert(exp)); + } + } + + return pKeys; + } + + List<RexFieldCollation> getOrderKeys(OrderSpec os, RexNodeConverter converter, + RowResolver inputRR) throws SemanticException { + List<RexFieldCollation> oKeys = new ArrayList<RexFieldCollation>(); + if (os != null) { + List<OrderExpression> oExprs = os.getExpressions(); + for (OrderExpression oExpr : oExprs) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + tcCtx.setAllowStatefulFunctions(true); + ExprNodeDesc exp = genExprNodeDesc(oExpr.getExpression(), inputRR, tcCtx); + RexNode ordExp = converter.convert(exp); + Set<SqlKind> flags = new HashSet<SqlKind>(); + if (oExpr.getOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC) + flags.add(SqlKind.DESCENDING); + oKeys.add(new RexFieldCollation(ordExp, flags)); + } + } + + return oKeys; + } + + RexWindowBound getBound(BoundarySpec bs, RexNodeConverter converter) { + RexWindowBound rwb = null; + + if (bs != null) { + SqlNode sn = null; + SqlParserPos pos = new SqlParserPos(1, 1); + SqlNode amt = bs.getAmt() == 0 ? null : SqlLiteral.createExactNumeric( + String.valueOf(bs.getAmt()), new SqlParserPos(2, 2)); + RexNode amtLiteral = null; + SqlCall sc = null; + RexNode rn = null; + + if (amt != null) + amtLiteral = cluster.getRexBuilder().makeLiteral(new Integer(bs.getAmt()), + cluster.getTypeFactory().createSqlType(SqlTypeName.INTEGER), true); + + switch (bs.getDirection()) { + case PRECEDING: + if (amt == null) { + rwb = RexWindowBound.create(SqlWindow.createUnboundedPreceding(pos), null); + } else { + sc = (SqlCall) SqlWindow.createPreceding(amt, pos); + rwb = RexWindowBound.create(sc, + cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral)); + } + break; + + case CURRENT: + rwb = RexWindowBound.create(SqlWindow.createCurrentRow(new SqlParserPos(1, 1)), null); + break; + + case FOLLOWING: + if (amt == null) { + rwb = RexWindowBound.create(SqlWindow.createUnboundedFollowing(new SqlParserPos(1, 1)), + null); + } else { + sc = (SqlCall) SqlWindow.createFollowing(amt, pos); + rwb = RexWindowBound.create(sc, + cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral)); + } + break; + } + } + + return rwb; + } + + int getWindowSpecIndx(ASTNode wndAST) { + int wndASTIndx = -1; + int wi = wndAST.getChildCount() - 1; + if (wi <= 0 || (wndAST.getChild(wi).getType() != HiveParser.TOK_WINDOWSPEC)) { + wi = -1; + } + + return wi; + } + + Pair<RexNode, TypeInfo> genWindowingProj(QB qb, WindowExpressionSpec wExpSpec, RelNode srcRel) + throws SemanticException { + RexNode w = null; + TypeInfo wHiveRetType = null; + + if (wExpSpec instanceof WindowFunctionSpec) { + WindowFunctionSpec wFnSpec = (WindowFunctionSpec) wExpSpec; + ASTNode windowProjAst = wFnSpec.getExpression(); + // TODO: do we need to get to child? + int wndSpecASTIndx = getWindowSpecIndx(windowProjAst); + // 2. Get Hive Aggregate Info + AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, + this.relToHiveRR.get(srcRel)); + + // 3. Get Optiq Return type for Agg Fn + wHiveRetType = hiveAggInfo.m_returnType; + RelDataType optiqAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType, + this.cluster.getTypeFactory()); + + // 4. Convert Agg Fn args to Optiq + ImmutableMap<String, Integer> posMap = this.relToHiveColNameOptiqPosMap.get(srcRel); + RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), + posMap, 0, false); + Builder<RexNode> optiqAggFnArgsBldr = ImmutableList.<RexNode> builder(); + Builder<RelDataType> optiqAggFnArgsTypeBldr = ImmutableList.<RelDataType> builder(); + RexNode rexNd = null; + for (int i = 0; i < hiveAggInfo.m_aggParams.size(); i++) { + optiqAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i))); + optiqAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i) + .getTypeInfo(), this.cluster.getTypeFactory())); + } + ImmutableList<RexNode> optiqAggFnArgs = optiqAggFnArgsBldr.build(); + ImmutableList<RelDataType> optiqAggFnArgsType = optiqAggFnArgsTypeBldr.build(); + + // 5. Get Optiq Agg Fn + final SqlAggFunction optiqAggFn = SqlFunctionConverter.getOptiqAggFn(hiveAggInfo.m_udfName, + optiqAggFnArgsType, optiqAggFnRetType); + + // 6. Translate Window spec + RowResolver inputRR = relToHiveRR.get(srcRel); + WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec(); + List<RexNode> partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR); + List<RexFieldCollation> orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR); + RexWindowBound upperBound = getBound(wndSpec.windowFrame.start, converter); + RexWindowBound lowerBound = getBound(wndSpec.windowFrame.end, converter); + boolean isRows = ((wndSpec.windowFrame.start instanceof RangeBoundarySpec) || (wndSpec.windowFrame.end instanceof RangeBoundarySpec)) ? true + : false; + + w = cluster.getRexBuilder().makeOver(optiqAggFnRetType, optiqAggFn, optiqAggFnArgs, + partitionKeys, ImmutableList.<RexFieldCollation> copyOf(orderKeys), lowerBound, + upperBound, isRows, true, false); + } else { + // TODO: Convert to Semantic Exception + throw new RuntimeException("Unsupported window Spec"); + } + + return new Pair(w, wHiveRetType); + } + + private RelNode genSelectForWindowing(QB qb, RelNode srcRel) throws SemanticException { + RelNode selOpForWindow = null; + QBParseInfo qbp = getQBParseInfo(qb); + WindowingSpec wSpec = (!qb.getAllWindowingSpecs().isEmpty()) ? qb.getAllWindowingSpecs() + .values().iterator().next() : null; + + if (wSpec != null) { + // 1. Get valid Window Function Spec + wSpec.validateAndMakeEffective(); + List<WindowExpressionSpec> windowExpressions = wSpec.getWindowExpressions(); + + if (windowExpressions != null && !windowExpressions.isEmpty()) { + RowResolver inputRR = this.relToHiveRR.get(srcRel); + // 2. Get RexNodes for original Projections from below + List<RexNode> projsForWindowSelOp = new ArrayList<RexNode>( + HiveOptiqUtil.getProjsFromBelowAsInputRef(srcRel)); + + // 3. Construct new Row Resolver with everything from below. + RowResolver out_rwsch = new RowResolver(); + RowResolver.add(out_rwsch, inputRR, 0); + + // 4. Walk through Window Expressions & Construct RexNodes for those, + // Update out_rwsch + for (WindowExpressionSpec wExprSpec : windowExpressions) { + if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) { + Pair<RexNode, TypeInfo> wtp = genWindowingProj(qb, wExprSpec, srcRel); + projsForWindowSelOp.add(wtp.getKey()); + + // 6.2.2 Update Output Row Schema + ColumnInfo oColInfo = new ColumnInfo( + getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), null, false); + if (false) { + out_rwsch.checkColumn(null, wExprSpec.getAlias()); + out_rwsch.put(null, wExprSpec.getAlias(), oColInfo); + } else { + out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo); + } + } + } + + selOpForWindow = genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel); + } + } + + return selOpForWindow; + } + + private RelNode genSelectRelNode(List<RexNode> optiqColLst, RowResolver out_rwsch, + RelNode srcRel) throws OptiqSemanticException { + // 1. Build Column Names + // TODO: Should this be external names + ArrayList<String> columnNames = new ArrayList<String>(); + for (int i = 0; i < optiqColLst.size(); i++) { + columnNames.add(getColumnInternalName(i)); + } + + // 2. Prepend column names with '_o_' + /* + * Hive treats names that start with '_c' as internalNames; so change the + * names so we don't run into this issue when converting back to Hive AST. + */ + List<String> oFieldNames = Lists.transform(columnNames, new Function<String, String>() { + @Override + public String apply(String hName) { + return "_o_" + hName; + } + }); + + // 3 Build Optiq Rel Node for project using converted projections & col + // names + HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames); + + // 4. Keep track of colname-to-posmap && RR for new select + this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(out_rwsch, selRel)); + this.relToHiveRR.put(selRel, out_rwsch); + + return selRel; + } + + /** + * NOTE: there can only be one select caluse since we don't handle multi + * destination insert. + * + * @throws SemanticException + */ + private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { + + // 0. Generate a Select Node for Windowing + RelNode selForWindow = genSelectForWindowing(qb, srcRel); + srcRel = (selForWindow == null) ? srcRel : selForWindow; + + boolean subQuery; + ArrayList<ExprNodeDesc> col_list = new ArrayList<ExprNodeDesc>(); + ArrayList<Pair<Integer, RexNode>> windowingRexNodes = new ArrayList<Pair<Integer, RexNode>>(); + + // 1. Get Select Expression List + QBParseInfo qbp = getQBParseInfo(qb); + String selClauseName = qbp.getClauseNames().iterator().next(); + ASTNode selExprList = qbp.getSelForClause(selClauseName); + + // 2.Row resolvers for input, output + RowResolver out_rwsch = new RowResolver(); + ASTNode trfm = null; + Integer pos = Integer.valueOf(0); + RowResolver inputRR = this.relToHiveRR.get(srcRel); + + // 3. Query Hints + // TODO: Handle Query Hints; currently we ignore them + boolean selectStar = false; + int posn = 0; + boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST); + if (hintPresent) { + String hint = SemanticAnalyzer.this.ctx.getTokenRewriteStream(). + toString( + selExprList.getChild(0).getTokenStartIndex(), + selExprList.getChild(0).getTokenStopIndex()); + String msg = String.format("Hint specified for %s." + + " Currently we don't support hints in CBO, turn off cbo to use hints.", hint); + LOG.debug(msg); + throw new OptiqSemanticException(msg); + } + + // 4. Determine if select corresponds to a subquery + subQuery = qb.getParseInfo().getIsSubQ(); + + // 4. Bailout if select involves Transform + boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM); + if (isInTransform) { + String msg = String.format("SELECT TRANSFORM is currently not supported in CBO," + + " turn off cbo to use TRANSFORM."); + LOG.debug(msg); + throw new OptiqSemanticException(msg); + } + + // 5. Bailout if select involves UDTF + ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0); + GenericUDTF genericUDTF = null; + int udtfExprType = udtfExpr.getType(); + if (udtfExprType == HiveParser.TOK_FUNCTION || udtfExprType == HiveParser.TOK_FUNCTIONSTAR) { + String funcName = TypeCheckProcFactory.DefaultExprProcessor.getFunctionText(udtfExpr, true); + FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName); + if (fi != null) { + genericUDTF = fi.getGenericUDTF(); + } + if (genericUDTF != null) { + String msg = String.format("UDTF " + funcName + " is currently not supported in CBO," + + " turn off cbo to use UDTF " + funcName); + LOG.debug(msg); + throw new OptiqSemanticException(msg); + } + } + + // 6. Iterate over all expression (after SELECT) + ASTNode exprList = selExprList; + int startPosn = posn; + int wndProjPos = 0; + List<String> tabAliasesForAllProjs = getTabAliases(inputRR); + for (int i = startPosn; i < exprList.getChildCount(); ++i) { + + // 6.1 child can be EXPR AS ALIAS, or EXPR. + ASTNode child = (ASTNode) exprList.getChild(i); + boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2); + + // 6.2 EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's + // This check is not needed and invalid when there is a transform b/c + // the + // AST's are slightly different. + if (child.getChildCount() > 2) { + throw new SemanticException(generateErrorMessage((ASTNode) child.getChild(2), + ErrorMsg.INVALID_AS.getMsg())); + } + + ASTNode expr; + String tabAlias; + String colAlias; + + // 6.3 Get rid of TOK_SELEXPR + expr = (ASTNode) child.getChild(0); + String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR, + autogenColAliasPrfxIncludeFuncName, i); + tabAlias = colRef[0]; + colAlias = colRef[1]; + + // 6.4 Build ExprNode corresponding to colums + if (expr.getType() == HiveParser.TOK_ALLCOLREF) { + pos = genColListRegex(".*", + expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)) + .toLowerCase(), expr, col_list, inputRR, pos, out_rwsch, tabAliasesForAllProjs, + subQuery); + selectStar = true; + } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause + && !inputRR.getIsExprResolver() + && isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) { + // In case the expression is a regex COL. + // This can only happen without AS clause + // We don't allow this for ExprResolver - the Group By case + pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, + col_list, inputRR, pos, out_rwsch, tabAliasesForAllProjs, subQuery); + } else if (expr.getType() == HiveParser.DOT + && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL + && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText()
[... 309 lines stripped ...]
