Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Oct 14 19:06:45 2014 @@ -22,8 +22,10 @@ import static org.apache.hadoop.hive.con import java.io.IOException; import java.io.Serializable; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -34,12 +36,20 @@ import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import com.google.common.annotations.VisibleForTesting; +import net.hydromatic.optiq.SchemaPlus; +import net.hydromatic.optiq.tools.Frameworks; + import org.antlr.runtime.ClassicToken; import org.antlr.runtime.Token; import org.antlr.runtime.tree.Tree; +import org.antlr.runtime.tree.TreeVisitor; +import org.antlr.runtime.tree.TreeVisitorAction; import org.antlr.runtime.tree.TreeWizard; import org.antlr.runtime.tree.TreeWizard.ContextVisitor; import org.apache.commons.lang.StringUtils; @@ -47,7 +57,6 @@ import org.apache.hadoop.fs.FSDataOutput import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; @@ -60,6 +69,7 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; @@ -103,11 +113,35 @@ import org.apache.hadoop.hive.ql.metadat import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.Optimizer; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; +import org.apache.hadoop.hive.ql.optimizer.optiq.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil; +import org.apache.hadoop.hive.ql.optimizer.optiq.HiveTypeSystemImpl; +import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException; +import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveVolcanoPlanner; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveUnionRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePartitionPrunerRule; +import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushFilterPastJoinRule; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ASTConverter; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinCondTypeCheckProcFactory; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinTypeCheckCtx; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.RexNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.TypeConverter; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; @@ -193,12 +227,74 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.InputFormat; +import org.eigenbase.rel.AggregateCall; +import org.eigenbase.rel.AggregateRelBase; +import org.eigenbase.rel.Aggregation; +import org.eigenbase.rel.FilterRelBase; +import org.eigenbase.rel.InvalidRelException; +import org.eigenbase.rel.JoinRelBase; +import org.eigenbase.rel.JoinRelType; +import org.eigenbase.rel.RelCollation; +import org.eigenbase.rel.RelCollationImpl; +import org.eigenbase.rel.RelFactories; +import org.eigenbase.rel.RelFieldCollation; +import org.eigenbase.rel.RelNode; +import org.eigenbase.rel.metadata.CachingRelMetadataProvider; +import org.eigenbase.rel.metadata.ChainedRelMetadataProvider; +import org.eigenbase.rel.metadata.RelMetadataProvider; +import org.eigenbase.rel.rules.ConvertMultiJoinRule; +import org.eigenbase.rel.rules.FilterAggregateTransposeRule; +import org.eigenbase.rel.rules.LoptOptimizeJoinRule; +import org.eigenbase.rel.rules.MergeFilterRule; +import org.eigenbase.rel.rules.PushFilterPastProjectRule; +import org.eigenbase.rel.rules.PushFilterPastSetOpRule; +import org.eigenbase.rel.rules.SemiJoinRel; +import org.eigenbase.rel.rules.TransitivePredicatesOnJoinRule; +import org.eigenbase.relopt.RelOptCluster; +import org.eigenbase.relopt.RelOptPlanner; +import org.eigenbase.relopt.RelOptQuery; +import org.eigenbase.relopt.RelOptRule; +import org.eigenbase.relopt.RelOptSchema; +import org.eigenbase.relopt.RelOptUtil; +import org.eigenbase.relopt.RelTraitSet; +import org.eigenbase.relopt.hep.HepMatchOrder; +import org.eigenbase.relopt.hep.HepPlanner; +import org.eigenbase.relopt.hep.HepProgram; +import org.eigenbase.relopt.hep.HepProgramBuilder; +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.reltype.RelDataTypeFactory; +import org.eigenbase.reltype.RelDataTypeField; +import org.eigenbase.rex.RexBuilder; +import org.eigenbase.rex.RexInputRef; +import org.eigenbase.rex.RexNode; +import org.eigenbase.rex.RexWindowBound; +import org.eigenbase.rex.RexFieldCollation; +import org.eigenbase.sql.SqlAggFunction; +import org.eigenbase.sql.SqlWindow; +import org.eigenbase.sql.parser.SqlParserPos; +import org.eigenbase.sql.type.SqlTypeName; +import org.eigenbase.sql2rel.RelFieldTrimmer; +import org.eigenbase.sql.SqlCall; +import org.eigenbase.sql.SqlExplainLevel; +import org.eigenbase.sql.SqlKind; +import org.eigenbase.sql.SqlNode; +import org.eigenbase.sql.SqlLiteral; +import org.eigenbase.util.CompositeList; +import org.eigenbase.util.ImmutableIntList; +import org.eigenbase.util.Pair; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; /** * Implementation of the semantic analyzer. It generates the query plan. @@ -215,6 +311,9 @@ public class SemanticAnalyzer extends Ba private static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__"; + @VisibleForTesting + static final String ACID_TABLE_PROPERTY = "transactional"; + private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner; private HashMap<TableScanOperator, PrunedPartitionList> opToPartList; private HashMap<String, Operator<? extends OperatorDesc>> topOps; @@ -266,6 +365,9 @@ public class SemanticAnalyzer extends Ba //flag for partial scan during analyze ... compute statistics protected boolean partialscan; + private volatile boolean runCBO = true; + private volatile boolean disableJoinMerge = false; + /* * Capture the CTE definitions in a Query. */ @@ -280,6 +382,11 @@ public class SemanticAnalyzer extends Ba int nextNum; } + protected SemanticAnalyzer(HiveConf conf, boolean runCBO) throws SemanticException { + this(conf); + this.runCBO = runCBO; + } + public SemanticAnalyzer(HiveConf conf) throws SemanticException { super(conf); opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>(); @@ -316,8 +423,11 @@ public class SemanticAnalyzer extends Ba } @Override - protected void reset() { - super.reset(); + protected void reset(boolean clearPartsCache) { + super.reset(true); + if(clearPartsCache) { + prunedPartitions.clear(); + } loadTableWork.clear(); loadFileWork.clear(); topOps.clear(); @@ -331,7 +441,7 @@ public class SemanticAnalyzer extends Ba smbMapJoinContext.clear(); opParseCtx.clear(); groupOpToInputTables.clear(); - prunedPartitions.clear(); + disableJoinMerge = false; aliasToCTEs.clear(); topToTable.clear(); opToPartPruner.clear(); @@ -345,8 +455,6 @@ public class SemanticAnalyzer extends Ba viewsExpanded = null; viewSelect = null; ctesExpanded = null; - noscan = false; - partialscan = false; globalLimitCtx.disableOpt(); viewAliasToInput.clear(); reduceSinkOperatorsAddedByEnforceBucketingSorting.clear(); @@ -355,7 +463,6 @@ public class SemanticAnalyzer extends Ba unparseTranslator.clear(); queryProperties.clear(); outputs.clear(); - globalLimitCtx.reset(); } public void initParseCtx(ParseContext pctx) { @@ -544,6 +651,10 @@ public class SemanticAnalyzer extends Ba public static String generateErrorMessage(ASTNode ast, String message) { StringBuilder sb = new StringBuilder(); + if (ast == null) { + sb.append("The abstract syntax tree is null"); + return sb.toString(); + } sb.append(ast.getLine()); sb.append(":"); sb.append(ast.getCharPositionInLine()); @@ -805,7 +916,8 @@ public class SemanticAnalyzer extends Ba return PlanUtils.stripQuotes(expr.getText()); case HiveParser.KW_FALSE: - return "FALSE"; + // UDFToBoolean casts any non-empty string to true, so set this to false + return ""; case HiveParser.KW_TRUE: return "TRUE"; @@ -813,6 +925,10 @@ public class SemanticAnalyzer extends Ba case HiveParser.MINUS: return "-" + unparseExprForValuesClause((ASTNode)expr.getChildren().get(0)); + case HiveParser.TOK_NULL: + // Hive's text input will translate this as a null + return "\\N"; + default: throw new SemanticException("Expression of type " + expr.getText() + " not supported in insert/values"); @@ -953,9 +1069,7 @@ public class SemanticAnalyzer extends Ba private boolean isJoinToken(ASTNode node) { if ((node.getToken().getType() == HiveParser.TOK_JOIN) || (node.getToken().getType() == HiveParser.TOK_CROSSJOIN) - || (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) - || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) - || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN) + || isOuterJoinToken(node) || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN) || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) { return true; @@ -964,6 +1078,12 @@ public class SemanticAnalyzer extends Ba return false; } + private boolean isOuterJoinToken(ASTNode node) { + return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) + || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) + || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN); + } + /** * Given the AST with TOK_JOIN as the root, get all the aliases for the tables * or subqueries in the join. @@ -981,6 +1101,7 @@ public class SemanticAnalyzer extends Ba "Join with multiple children")); } + queryProperties.incrementJoinCount(isOuterJoinToken(join)); for (int num = 0; num < numChildren; num++) { ASTNode child = (ASTNode) join.getChild(num); if (child.getToken().getType() == HiveParser.TOK_TABREF) { @@ -1087,10 +1208,15 @@ public class SemanticAnalyzer extends Ba qb.countSel(); qbp.setSelExprForClause(ctx_1.dest, ast); + int posn = 0; if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) { qbp.setHints((ASTNode) ast.getChild(0)); + posn++; } + if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) + queryProperties.setUsesScript(true); + LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest); doPhase1GetColumnAliasesFromSelect(ast, qbp); @@ -1101,6 +1227,8 @@ public class SemanticAnalyzer extends Ba case HiveParser.TOK_WHERE: qbp.setWhrExprForClause(ctx_1.dest, ast); + if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty()) + queryProperties.setFilterWithSubQuery(true); break; case HiveParser.TOK_INSERT_INTO: @@ -1123,6 +1251,9 @@ public class SemanticAnalyzer extends Ba } } qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); + + if (qbp.getClauseNamesForDest().size() > 1) + queryProperties.setMultiDestQuery(true); break; case HiveParser.TOK_FROM: @@ -1146,9 +1277,9 @@ public class SemanticAnalyzer extends Ba processSubQuery(qb, frm); } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) { + queryProperties.setHasLateralViews(true); processLateralView(qb, frm); } else if (isJoinToken(frm)) { - queryProperties.setHasJoin(true); processJoin(qb, frm); qbp.setJoinExpr(frm); }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){ @@ -1361,6 +1492,10 @@ public class SemanticAnalyzer extends Ba } } + public Table getTable(TableScanOperator ts) { + return topToTable.get(ts); + } + public void getMetaData(QB qb) throws SemanticException { getMetaData(qb, null); } @@ -1419,11 +1554,20 @@ public class SemanticAnalyzer extends Ba } // Disallow INSERT INTO on bucketized tables + boolean isAcid = isAcidTable(tab); if (qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName()) && - tab.getNumBuckets() > 0 && !isAcidTable(tab)) { + tab.getNumBuckets() > 0 && !isAcid) { throw new SemanticException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE. getMsg("Table: " + tab_name)); } + // Disallow update and delete on non-acid tables + if ((updating() || deleting()) && !isAcid) { + // isAcidTable above also checks for whether we are using an acid compliant + // transaction manager. But that has already been caught in + // UpdateDeleteSemanticAnalyzer, so if we are updating or deleting and getting nonAcid + // here, it means the table itself doesn't support it. + throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, tab_name); + } // We check offline of the table, as if people only select from an // non-existing partition of an offline table, the partition won't @@ -1502,6 +1646,10 @@ public class SemanticAnalyzer extends Ba qb.getParseInfo().addTableSpec(alias, ts); } + + ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput); + PlanUtils.addInput(inputs, + new ReadEntity(tab, parentViewInfo, parentViewInfo == null)); } LOG.info("Get metadata for subqueries"); @@ -5753,6 +5901,7 @@ public class SemanticAnalyzer extends Ba Integer dest_type = qbm.getDestTypeForAlias(dest); Table dest_tab = null; // destination table if any + boolean destTableIsAcid = false; // should the destination table be written to using ACID Partition dest_part = null;// destination partition if any Path queryTmpdir = null; // the intermediate destination directory Path dest_path = null; // the final destination directory @@ -5769,6 +5918,7 @@ public class SemanticAnalyzer extends Ba case QBMetaData.DEST_TABLE: { dest_tab = qbm.getDestTableForAlias(dest); + destTableIsAcid = isAcidTable(dest_tab); // Is the user trying to insert into a external tables if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && @@ -5864,9 +6014,10 @@ public class SemanticAnalyzer extends Ba // Create the work for moving the table // NOTE: specify Dynamic partitions in dest_tab for WriteEntity if (!isNonNativeTable) { - AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass()); - if (acidOp != AcidUtils.Operation.NOT_ACID) { - checkIfAcidAndOverwriting(qb, table_desc); + AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID; + if (destTableIsAcid) { + acidOp = getAcidType(table_desc.getOutputFileFormatClass()); + checkAcidConstraints(qb, table_desc, dest_tab); } ltd = new LoadTableDesc(queryTmpdir,table_desc, dpCtx, acidOp); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -5924,6 +6075,7 @@ public class SemanticAnalyzer extends Ba dest_part = qbm.getDestPartitionForAlias(dest); dest_tab = dest_part.getTable(); + destTableIsAcid = isAcidTable(dest_tab); if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE)) { throw new SemanticException( @@ -5971,9 +6123,10 @@ public class SemanticAnalyzer extends Ba lbCtx = constructListBucketingCtx(dest_part.getSkewedColNames(), dest_part.getSkewedColValues(), dest_part.getSkewedColValueLocationMaps(), dest_part.isStoredAsSubDirectories(), conf); - AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass()); - if (acidOp != AcidUtils.Operation.NOT_ACID) { - checkIfAcidAndOverwriting(qb, table_desc); + AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID; + if (destTableIsAcid) { + acidOp = getAcidType(table_desc.getOutputFileFormatClass()); + checkAcidConstraints(qb, table_desc, dest_tab); } ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -6128,9 +6281,7 @@ public class SemanticAnalyzer extends Ba ArrayList<ColumnInfo> vecCol = new ArrayList<ColumnInfo>(); if (updating() || deleting()) { - vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), - //TypeInfoUtils.getTypeInfoFromObjectInspector(VirtualColumn.ROWID.getObjectInspector()), - VirtualColumn.ROWID.getTypeInfo(), + vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true)); } else { try { @@ -6159,8 +6310,7 @@ public class SemanticAnalyzer extends Ba conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCESORTING)))); // If this table is working with ACID semantics, turn off merging - boolean acidTable = isAcidTable(dest_tab); - canBeMerged &= !acidTable; + canBeMerged &= !destTableIsAcid; FileSinkDesc fileSinkDesc = new FileSinkDesc( queryTmpdir, @@ -6176,7 +6326,7 @@ public class SemanticAnalyzer extends Ba // If this is an insert, update, or delete on an ACID table then mark that so the // FileSinkOperator knows how to properly write to it. - if (acidTable) { + if (destTableIsAcid) { AcidUtils.Operation wt = updating() ? AcidUtils.Operation.UPDATE : (deleting() ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT); fileSinkDesc.setWriteType(wt); @@ -6233,14 +6383,39 @@ public class SemanticAnalyzer extends Ba return output; } - // Check if we are overwriting any tables. If so, throw an exception as that is not allowed - // when using an Acid compliant txn manager and operating on an acid table. - private void checkIfAcidAndOverwriting(QB qb, TableDesc tableDesc) throws SemanticException { + // Check constraints on acid tables. This includes + // * no insert overwrites + // * no use of vectorization + // * turns off reduce deduplication optimization, as that sometimes breaks acid + // * Check that the table is bucketed + // * Check that the table is not sorted + // This method assumes you have already decided that this is an Acid write. Don't call it if + // that isn't true. + private void checkAcidConstraints(QB qb, TableDesc tableDesc, + Table table) throws SemanticException { String tableName = tableDesc.getTableName(); if (!qb.getParseInfo().isInsertIntoTable(tableName)) { LOG.debug("Couldn't find table " + tableName + " in insertIntoTable"); throw new SemanticException(ErrorMsg.NO_INSERT_OVERWRITE_WITH_ACID.getMsg()); } + if (conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED)) { + LOG.info("Turning off vectorization for acid write operation"); + conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false); + } + LOG.info("Modifying config values for ACID write"); + conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, true); + conf.setIntVar(ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER, 1); + conf.setBoolVar(ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES, true); + conf.set(AcidUtils.CONF_ACID_KEY, "true"); + + if (table.getNumBuckets() < 1) { + throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, table.getTableName()); + } + if (table.getSortCols() != null && table.getSortCols().size() > 0) { + throw new SemanticException(ErrorMsg.ACID_NO_SORTED_BUCKETS, table.getTableName()); + } + + } @@ -6304,6 +6479,7 @@ public class SemanticAnalyzer extends Ba int columnNumber = tableFields.size(); ArrayList<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>( columnNumber); + // MetadataTypedColumnsetSerDe does not need type conversions because it // does the conversion to String by itself. boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals( @@ -6371,17 +6547,19 @@ public class SemanticAnalyzer extends Ba if (converted) { // add the select operator RowResolver rowResolver = new RowResolver(); - ArrayList<String> colName = new ArrayList<String>(); + ArrayList<String> colNames = new ArrayList<String>(); + Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); for (int i = 0; i < expressions.size(); i++) { String name = getColumnInternalName(i); rowResolver.put("", name, new ColumnInfo(name, expressions.get(i) .getTypeInfo(), "", false)); - colName.add(name); + colNames.add(name); + colExprMap.put(name, expressions.get(i)); } Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new SelectDesc(expressions, colName), new RowSchema(rowResolver + new SelectDesc(expressions, colNames), new RowSchema(rowResolver .getColumnInfos()), input), rowResolver); - + output.setColumnExprMap(colExprMap); return output; } else { // not converted @@ -7878,7 +8056,7 @@ public class SemanticAnalyzer extends Ba List<ASTNode> nodeConds = node.getExpressions().get(i + 1); ArrayList<ASTNode> reordereNodeConds = new ArrayList<ASTNode>(); for(int k=0; k < tgtToNodeExprMap.length; k++) { - reordereNodeConds.add(nodeConds.get(k)); + reordereNodeConds.add(nodeConds.get(tgtToNodeExprMap[k])); } expr.add(reordereNodeConds); } @@ -9430,7 +9608,9 @@ public class SemanticAnalyzer extends Ba aliasToOpInfo ); } } - mergeJoinTree(qb); + + if (!disableJoinMerge) + mergeJoinTree(qb); } // if any filters are present in the join tree, push them on top of the @@ -9638,9 +9818,9 @@ public class SemanticAnalyzer extends Ba } @Override - public void init() { + public void init(boolean clearPartsCache) { // clear most members - reset(); + reset(clearPartsCache); // init QB qb = new QB(null, null, false); @@ -9695,11 +9875,82 @@ public class SemanticAnalyzer extends Ba getMetaData(qb); LOG.info("Completed getting MetaData in Semantic Analysis"); + + if (runCBO) { + boolean tokenTypeIsQuery = ast.getToken().getType() == HiveParser.TOK_QUERY + || ast.getToken().getType() == HiveParser.TOK_EXPLAIN; + if (!tokenTypeIsQuery || createVwDesc != null + || !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) + || !canHandleQuery(qb, true) || !HiveOptiqUtil.validateASTForCBO(ast)) { + runCBO = false; + } + + if (runCBO) { + disableJoinMerge = true; + } + } + // Save the result schema derived from the sink operator produced // by genPlan. This has the correct column names, which clients // such as JDBC would prefer instead of the c0, c1 we'll end // up with later. - Operator sinkOp = genPlan(qb); + Operator sinkOp = null; + + if (runCBO) { + OptiqBasedPlanner optiqPlanner = new OptiqBasedPlanner(); + boolean reAnalyzeAST = false; + + try { + // 1. Gen Optimized AST + ASTNode newAST = optiqPlanner.getOptimizedAST(prunedPartitions); + + // 2. Regen OP plan from optimized AST + init(false); + ctx_1 = initPhase1Ctx(); + if (!doPhase1(newAST, qb, ctx_1)) { + throw new RuntimeException( + "Couldn't do phase1 on CBO optimized query plan"); + } + // unfortunately making prunedPartitions immutable is not possible here + // with SemiJoins not all tables are costed in CBO, + // so their PartitionList is not evaluated until the run phase. + //prunedPartitions = ImmutableMap.copyOf(prunedPartitions); + getMetaData(qb); + + disableJoinMerge = true; + sinkOp = genPlan(qb); + LOG.info("CBO Succeeded; optimized logical plan."); + LOG.debug(newAST.dump()); + + /* + * Use non CBO Result Set Schema so as to preserve user specified names. + * Hive seems to have bugs with OB/LIMIT in sub queries. // 3. Reset + * result set schema resultSchema = + * convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp) + * .getRowResolver(), true); + */ + } catch (Exception e) { + LOG.error("CBO failed, skipping CBO. ", e); + if (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || + (optiqPlanner.noColsMissingStats.get() > 0) || + e instanceof OptiqSemanticException) { + reAnalyzeAST = true; + } else { + throw e instanceof SemanticException ? (SemanticException) e : new SemanticException(e); + } + } finally { + runCBO = false; + disableJoinMerge = false; + if (reAnalyzeAST) { + init(true); + prunedPartitions.clear(); + analyzeInternal(ast); + return; + } + } + } else { + sinkOp = genPlan(qb); + } if (createVwDesc != null) resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver()); @@ -9798,14 +10049,19 @@ public class SemanticAnalyzer extends Ba Map<String, List<String>> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap(); if (tableToColumnAccessMap != null && !tableToColumnAccessMap.isEmpty()) { for(ReadEntity entity: inputs) { + List<String> cols; switch (entity.getType()) { case TABLE: - entity.getAccessedColumns().addAll( - tableToColumnAccessMap.get(entity.getTable().getCompleteName())); + cols = tableToColumnAccessMap.get(entity.getTable().getCompleteName()); + if (cols != null && !cols.isEmpty()) { + entity.getAccessedColumns().addAll(cols); + } break; case PARTITION: - entity.getAccessedColumns().addAll( - tableToColumnAccessMap.get(entity.getPartition().getTable().getCompleteName())); + cols = tableToColumnAccessMap.get(entity.getPartition().getTable().getCompleteName()); + if (cols != null && !cols.isEmpty()) { + entity.getAccessedColumns().addAll(cols); + } break; default: // no-op @@ -10831,9 +11087,13 @@ public class SemanticAnalyzer extends Ba Table tbl; try { tbl = db.getTable(tableName); - } catch (HiveException e) { - throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName)); + } catch (InvalidTableException e) { + throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName), e); } + catch (HiveException e) { + throw new SemanticException(e.getMessage(), e); + } + /* noscan uses hdfs apis to retrieve such information from Namenode. */ /* But that will be specific to hdfs. Through storagehandler mechanism, */ /* storage of table could be on any storage system: hbase, cassandra etc. */ @@ -10856,8 +11116,10 @@ public class SemanticAnalyzer extends Ba Table tbl; try { tbl = db.getTable(tableName); + } catch (InvalidTableException e) { + throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName), e); } catch (HiveException e) { - throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName)); + throw new SemanticException(e.getMessage(), e); } /* partialscan uses hdfs apis to retrieve such information from Namenode. */ /* But that will be specific to hdfs. Through storagehandler mechanism, */ @@ -11898,9 +12160,9 @@ public class SemanticAnalyzer extends Ba // Even if the table is of Acid type, if we aren't working with an Acid compliant TxnManager // then return false. private boolean isAcidTable(Table tab) { - if (tab == null || tab.getOutputFormatClass() == null) return false; + if (tab == null) return false; if (!SessionState.get().getTxnMgr().supportsAcid()) return false; - return isAcidOutputFormat(tab.getOutputFormatClass()); + return tab.getProperty(ACID_TABLE_PROPERTY) != null; } private boolean isAcidOutputFormat(Class<? extends HiveOutputFormat> of) { @@ -11939,4 +12201,2042 @@ public class SemanticAnalyzer extends Ba return false; } + /**** Temporary Place Holder For Optiq plan Gen, Optimizer ****/ + + /* + * Entry point to Optimizations using Optiq. + */ + private boolean canHandleQuery(QB qbToChk, boolean topLevelQB) { + boolean runOptiqPlanner = false; + // Assumption: + // 1. If top level QB is query then everything below it must also be Query + // 2. Nested Subquery will return false for qbToChk.getIsQuery() + if ((!topLevelQB || qbToChk.getIsQuery()) + && (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict")) + && (!topLevelQB || (queryProperties.getJoinCount() > 1) || conf.getBoolVar(ConfVars.HIVE_IN_TEST)) + && !queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() + && !queryProperties.hasSortBy() && !queryProperties.hasPTF() + && !queryProperties.usesScript() && !queryProperties.hasMultiDestQuery() + && !queryProperties.hasLateralViews()) { + runOptiqPlanner = true; + } else { + LOG.info("Can not invoke CBO; query contains operators not supported for CBO."); + } + + return runOptiqPlanner; + } + + private class OptiqBasedPlanner implements Frameworks.PlannerAction<RelNode> { + private RelOptCluster cluster; + private RelOptSchema relOptSchema; + private SemanticException semanticException; + private Map<String, PrunedPartitionList> partitionCache; + private final AtomicInteger noColsMissingStats = new AtomicInteger(0); + List<FieldSchema> topLevelFieldSchema; + + // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or + // just last one. + LinkedHashMap<RelNode, RowResolver> relToHiveRR = new LinkedHashMap<RelNode, RowResolver>(); + LinkedHashMap<RelNode, ImmutableMap<String, Integer>> relToHiveColNameOptiqPosMap = new LinkedHashMap<RelNode, ImmutableMap<String, Integer>>(); + + private ASTNode getOptimizedAST(Map<String, PrunedPartitionList> partitionCache) + throws SemanticException { + ASTNode optiqOptimizedAST = null; + RelNode optimizedOptiqPlan = null; + this.partitionCache = partitionCache; + + try { + optimizedOptiqPlan = Frameworks.withPlanner(this, + Frameworks.newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build()); + } catch (Exception e) { + if (semanticException != null) + throw semanticException; + else + throw new RuntimeException(e); + } + optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, topLevelFieldSchema); + + return optiqOptimizedAST; + } + + @Override + public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) { + RelNode optiqGenPlan = null; + RelNode optiqPreCboPlan = null; + RelNode optiqOptimizedPlan = null; + + /* + * recreate cluster, so that it picks up the additional traitDef + */ + RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); + final RelOptQuery query = new RelOptQuery(planner); + final RexBuilder rexBuilder = cluster.getRexBuilder(); + cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); + + this.cluster = cluster; + this.relOptSchema = relOptSchema; + + try { + optiqGenPlan = genLogicalPlan(qb, true); + topLevelFieldSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(optiqGenPlan), + HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); + } catch (SemanticException e) { + semanticException = e; + throw new RuntimeException(e); + } + + optiqPreCboPlan = applyPreCBOTransforms(optiqGenPlan, HiveDefaultRelMetadataProvider.INSTANCE); + List<RelMetadataProvider> list = Lists.newArrayList(); + list.add(HiveDefaultRelMetadataProvider.INSTANCE); + RelTraitSet desiredTraits = cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY); + + HepProgram hepPgm = null; + HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) + .addRuleInstance(new ConvertMultiJoinRule(HiveJoinRel.class)); + hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoinRel.HIVE_JOIN_FACTORY, + HiveProjectRel.DEFAULT_PROJECT_FACTORY, HiveFilterRel.DEFAULT_FILTER_FACTORY)); + + hepPgm = hepPgmBldr.build(); + HepPlanner hepPlanner = new HepPlanner(hepPgm); + + hepPlanner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + + RelNode rootRel = optiqPreCboPlan; + hepPlanner.setRoot(rootRel); + if (!optiqPreCboPlan.getTraitSet().equals(desiredTraits)) { + rootRel = hepPlanner.changeTraits(optiqPreCboPlan, desiredTraits); + } + hepPlanner.setRoot(rootRel); + + optiqOptimizedPlan = hepPlanner.findBestExp(); + + if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { + LOG.debug("CBO Planning details:\n"); + LOG.debug("Original Plan:\n"); + LOG.debug(RelOptUtil.toString(optiqGenPlan)); + LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n"); + LOG.debug(RelOptUtil.toString(optiqPreCboPlan)); + LOG.debug("Plan After Join Reordering:\n"); + LOG.debug(RelOptUtil.toString(optiqOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES)); + } + + return optiqOptimizedPlan; + } + + public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { + + // TODO: Decorelation of subquery should be done before attempting + // Partition Pruning; otherwise Expression evaluation may try to execute + // corelated sub query. + basePlan = hepPlan(basePlan, true, mdProvider, new PushFilterPastProjectRule( + FilterRelBase.class, HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveProjectRel.class, + HiveProjectRel.DEFAULT_PROJECT_FACTORY), new PushFilterPastSetOpRule( + HiveFilterRel.DEFAULT_FILTER_FACTORY), new MergeFilterRule( + HiveFilterRel.DEFAULT_FILTER_FACTORY), HivePushFilterPastJoinRule.JOIN, + HivePushFilterPastJoinRule.FILTER_ON_JOIN, + new FilterAggregateTransposeRule( + FilterRelBase.class, + HiveFilterRel.DEFAULT_FILTER_FACTORY, + AggregateRelBase.class)); + + basePlan = hepPlan(basePlan, false, mdProvider, new TransitivePredicatesOnJoinRule( + JoinRelBase.class, HiveFilterRel.DEFAULT_FILTER_FACTORY), + // TODO: Enable it after OPTIQ-407 is fixed + //RemoveTrivialProjectRule.INSTANCE, + new HivePartitionPrunerRule(SemanticAnalyzer.this.conf)); + + RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProjectRel.DEFAULT_PROJECT_FACTORY, + HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveJoinRel.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, + HiveSortRel.HIVE_SORT_REL_FACTORY, HiveAggregateRel.HIVE_AGGR_REL_FACTORY, HiveUnionRel.UNION_REL_FACTORY); + basePlan = fieldTrimmer.trim(basePlan); + + basePlan = hepPlan(basePlan, true, mdProvider, + new PushFilterPastProjectRule(FilterRelBase.class, + HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveProjectRel.class, + HiveProjectRel.DEFAULT_PROJECT_FACTORY)); + + return basePlan; + } + + private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, + RelMetadataProvider mdProvider, RelOptRule... rules) { + + RelNode optimizedRelNode = basePlan; + HepProgramBuilder programBuilder = new HepProgramBuilder(); + if (followPlanChanges) { + programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN); + programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules)); + } else { + // TODO: Should this be also TOP_DOWN? + for (RelOptRule r : rules) + programBuilder.addRuleInstance(r); + } + + HepPlanner planner = new HepPlanner(programBuilder.build()); + List<RelMetadataProvider> list = Lists.newArrayList(); + list.add(mdProvider); + planner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + basePlan.getCluster().setMetadataProvider( + new CachingRelMetadataProvider(chainedProvider, planner)); + + planner.setRoot(basePlan); + optimizedRelNode = planner.findBestExp(); + + return optimizedRelNode; + } + + @SuppressWarnings("nls") + private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel, + String rightalias, RelNode rightRel) throws SemanticException { + HiveUnionRel unionRel = null; + + // 1. Get Row Resolvers, Column map for original left and right input of + // Union Rel + RowResolver leftRR = this.relToHiveRR.get(leftRel); + RowResolver rightRR = this.relToHiveRR.get(rightRel); + HashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias); + HashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias); + + // 2. Validate that Union is feasible according to Hive (by using type + // info from RR) + if (leftmap.size() != rightmap.size()) { + throw new SemanticException("Schema of both sides of union should match."); + } + + ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias( + qb.getAliases().get(0)); + for (Map.Entry<String, ColumnInfo> lEntry : leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + if (rInfo == null) { + throw new SemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match. " + rightalias + + " does not have the field " + field)); + } + if (lInfo == null) { + throw new SemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match. " + leftalias + + " does not have the field " + field)); + } + if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { + throw new OptiqSemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match: field " + field + ":" + + " appears on the left side of the UNION at column position: " + + getPositionFromInternalName(lInfo.getInternalName()) + + ", and on the right side of the UNION at column position: " + + getPositionFromInternalName(rInfo.getInternalName()) + + ". Column positions should match for a UNION")); + } + // try widening coversion, otherwise fail union + TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + rInfo.getType()); + if (commonTypeInfo == null) { + throw new OptiqSemanticException(generateErrorMessage(tabref, + "Schema of both sides of union should match: Column " + field + " is of type " + + lInfo.getType().getTypeName() + " on first table and type " + + rInfo.getType().getTypeName() + " on second table")); + } + } + + // 3. construct Union Output RR using original left & right Input + RowResolver unionoutRR = new RowResolver(); + for (Map.Entry<String, ColumnInfo> lEntry : leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + ColumnInfo unionColInfo = new ColumnInfo(lInfo); + unionColInfo.setTabAlias(unionalias); + unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + rInfo.getType())); + unionoutRR.put(unionalias, field, unionColInfo); + } + + // 4. Determine which columns requires cast on left/right input (Optiq + // requires exact types on both sides of union) + boolean leftNeedsTypeCast = false; + boolean rightNeedsTypeCast = false; + List<RexNode> leftProjs = new ArrayList<RexNode>(); + List<RexNode> rightProjs = new ArrayList<RexNode>(); + List<RelDataTypeField> leftRowDT = leftRel.getRowType().getFieldList(); + List<RelDataTypeField> rightRowDT = rightRel.getRowType().getFieldList(); + + RelDataType leftFieldDT; + RelDataType rightFieldDT; + RelDataType unionFieldDT; + for (int i = 0; i < leftRowDT.size(); i++) { + leftFieldDT = leftRowDT.get(i).getType(); + rightFieldDT = rightRowDT.get(i).getType(); + if (!leftFieldDT.equals(rightFieldDT)) { + unionFieldDT = TypeConverter.convert(unionoutRR.getColumnInfos().get(i).getType(), + cluster.getTypeFactory()); + if (!unionFieldDT.equals(leftFieldDT)) { + leftNeedsTypeCast = true; + } + leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, + cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); + + if (!unionFieldDT.equals(rightFieldDT)) { + rightNeedsTypeCast = true; + } + rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, + cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); + } else { + leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldDT, + cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); + rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldDT, + cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); + } + } + + // 5. Introduce Project Rel above original left/right inputs if cast is + // needed for type parity + RelNode unionLeftInput = leftRel; + RelNode unionRightInput = rightRel; + if (leftNeedsTypeCast) { + unionLeftInput = HiveProjectRel.create(leftRel, leftProjs, leftRel.getRowType() + .getFieldNames()); + } + if (rightNeedsTypeCast) { + unionRightInput = HiveProjectRel.create(rightRel, rightProjs, rightRel.getRowType() + .getFieldNames()); + } + + // 6. Construct Union Rel + ImmutableList.Builder bldr = new ImmutableList.Builder<RelNode>(); + bldr.add(unionLeftInput); + bldr.add(unionRightInput); + unionRel = new HiveUnionRel(cluster, TraitsUtil.getDefaultTraitSet(cluster), + bldr.build()); + + relToHiveRR.put(unionRel, unionoutRR); + relToHiveColNameOptiqPosMap.put(unionRel, + this.buildHiveToOptiqColumnMap(unionoutRR, unionRel)); + + return unionRel; + } + + private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJoinType, + ASTNode joinCond) throws SemanticException { + RelNode joinRel = null; + + // 1. construct the RowResolver for the new Join Node by combining row + // resolvers from left, right + RowResolver leftRR = this.relToHiveRR.get(leftRel); + RowResolver rightRR = this.relToHiveRR.get(rightRel); + RowResolver joinRR = null; + + if (hiveJoinType != JoinType.LEFTSEMI) { + joinRR = RowResolver.getCombinedRR(leftRR, rightRR); + } else { + joinRR = new RowResolver(); + RowResolver.add(joinRR, leftRR, 0); + } + + // 2. Construct ExpressionNodeDesc representing Join Condition + RexNode optiqJoinCond = null; + if (joinCond != null) { + JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType); + Map<ASTNode, ExprNodeDesc> exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond, + jCtx); + if (jCtx.getError() != null) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(), + jCtx.getError())); + + ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond); + + List<RelNode> inputRels = new ArrayList<RelNode>(); + inputRels.add(leftRel); + inputRels.add(rightRel); + optiqJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels, + relToHiveRR, relToHiveColNameOptiqPosMap, false); + } else { + optiqJoinCond = cluster.getRexBuilder().makeLiteral(true); + } + + // 3. Validate that join condition is legal (i.e no function refering to + // both sides of join, only equi join) + // TODO: Join filter handling (only supported for OJ by runtime or is it + // supported for IJ as well) + + // 4. Construct Join Rel Node + boolean leftSemiJoin = false; + JoinRelType optiqJoinType; + switch (hiveJoinType) { + case LEFTOUTER: + optiqJoinType = JoinRelType.LEFT; + break; + case RIGHTOUTER: + optiqJoinType = JoinRelType.RIGHT; + break; + case FULLOUTER: + optiqJoinType = JoinRelType.FULL; + break; + case LEFTSEMI: + optiqJoinType = JoinRelType.INNER; + leftSemiJoin = true; + break; + case INNER: + default: + optiqJoinType = JoinRelType.INNER; + break; + } + + if (leftSemiJoin) { + List<RelDataTypeField> sysFieldList = new ArrayList<RelDataTypeField>(); + List<RexNode> leftJoinKeys = new ArrayList<RexNode>(); + List<RexNode> rightJoinKeys = new ArrayList<RexNode>(); + + RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel, + optiqJoinCond, leftJoinKeys, rightJoinKeys, null, null); + + if (!nonEquiConds.isAlwaysTrue()) { + throw new SemanticException("Non equality condition not supported in Semi-Join" + + nonEquiConds); + } + + RelNode[] inputRels = new RelNode[] { leftRel, rightRel }; + final List<Integer> leftKeys = new ArrayList<Integer>(); + final List<Integer> rightKeys = new ArrayList<Integer>(); + optiqJoinCond = HiveOptiqUtil.projectNonColumnEquiConditions( + HiveProjectRel.DEFAULT_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0, + leftKeys, rightKeys); + + joinRel = new SemiJoinRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), + inputRels[0], inputRels[1], optiqJoinCond, ImmutableIntList.copyOf(leftKeys), + ImmutableIntList.copyOf(rightKeys)); + } else { + joinRel = HiveJoinRel.getJoin(cluster, leftRel, rightRel, optiqJoinCond, optiqJoinType, + leftSemiJoin); + } + // 5. Add new JoinRel & its RR to the maps + relToHiveColNameOptiqPosMap.put(joinRel, this.buildHiveToOptiqColumnMap(joinRR, joinRel)); + relToHiveRR.put(joinRel, joinRR); + + return joinRel; + } + + /** + * Generate Join Logical Plan Relnode by walking through the join AST. + * + * @param qb + * @param aliasToRel + * Alias(Table/Relation alias) to RelNode; only read and not + * written in to by this method + * @return + * @throws SemanticException + */ + private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map<String, RelNode> aliasToRel) + throws SemanticException { + RelNode leftRel = null; + RelNode rightRel = null; + JoinType hiveJoinType = null; + + if (joinParseTree.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) { + String msg = String.format("UNIQUE JOIN is currently not supported in CBO," + + " turn off cbo to use UNIQUE JOIN."); + LOG.debug(msg); + throw new OptiqSemanticException(msg); + } + + // 1. Determine Join Type + // TODO: What about TOK_CROSSJOIN, TOK_MAPJOIN + switch (joinParseTree.getToken().getType()) { + case HiveParser.TOK_LEFTOUTERJOIN: + hiveJoinType = JoinType.LEFTOUTER; + break; + case HiveParser.TOK_RIGHTOUTERJOIN: + hiveJoinType = JoinType.RIGHTOUTER; + break; + case HiveParser.TOK_FULLOUTERJOIN: + hiveJoinType = JoinType.FULLOUTER; + break; + case HiveParser.TOK_LEFTSEMIJOIN: + hiveJoinType = JoinType.LEFTSEMI; + break; + default: + hiveJoinType = JoinType.INNER; + break; + } + + // 2. Get Left Table Alias + ASTNode left = (ASTNode) joinParseTree.getChild(0); + if ((left.getToken().getType() == HiveParser.TOK_TABREF) + || (left.getToken().getType() == HiveParser.TOK_SUBQUERY) + || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { + String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0)) + .toLowerCase(); + String leftTableAlias = left.getChildCount() == 1 ? tableName : unescapeIdentifier(left + .getChild(left.getChildCount() - 1).getText().toLowerCase()); + // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? + // partitionTableFunctionSource partitioningSpec? expression*) + // guranteed to have an lias here: check done in processJoin + leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(left + .getChild(1).getText().toLowerCase()) + : leftTableAlias; + leftRel = aliasToRel.get(leftTableAlias); + } else if (isJoinToken(left)) { + leftRel = genJoinLogicalPlan(left, aliasToRel); + } else { + assert (false); + } + + // 3. Get Right Table Alias + ASTNode right = (ASTNode) joinParseTree.getChild(1); + if ((right.getToken().getType() == HiveParser.TOK_TABREF) + || (right.getToken().getType() == HiveParser.TOK_SUBQUERY) + || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { + String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0)) + .toLowerCase(); + String rightTableAlias = right.getChildCount() == 1 ? tableName : unescapeIdentifier(right + .getChild(right.getChildCount() - 1).getText().toLowerCase()); + // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? + // partitionTableFunctionSource partitioningSpec? expression*) + // guranteed to have an lias here: check done in processJoin + rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(right + .getChild(1).getText().toLowerCase()) + : rightTableAlias; + rightRel = aliasToRel.get(rightTableAlias); + } else { + assert (false); + } + + // 4. Get Join Condn + ASTNode joinCond = (ASTNode) joinParseTree.getChild(2); + + // 5. Create Join rel + return genJoinRelNode(leftRel, rightRel, hiveJoinType, joinCond); + } + + private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException { + RowResolver rr = new RowResolver(); + HiveTableScanRel tableRel = null; + + try { + + // 1. If the table has a Sample specified, bail from Optiq path. + if ( qb.getParseInfo().getTabSample(tableAlias) != null || + SemanticAnalyzer.this.nameToSplitSample.containsKey(tableAlias)) { + String msg = String.format("Table Sample specified for %s." + + " Currently we don't support Table Sample clauses in CBO," + + " turn off cbo for queries on tableSamples.", tableAlias); + LOG.debug(msg); + throw new OptiqSemanticException(msg); + } + + // 2. Get Table Metadata + Table tab = qb.getMetaData().getSrcForAlias(tableAlias); + + // 3. Get Table Logical Schema (Row Type) + // NOTE: Table logical schema = Non Partition Cols + Partition Cols + + // Virtual Cols + + // 3.1 Add Column info for non partion cols (Object Inspector fields) + StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer() + .getObjectInspector(); + List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs(); + ColumnInfo colInfo; + String colName; + ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>(); + for (int i = 0; i < fields.size(); i++) { + colName = fields.get(i).getFieldName(); + colInfo = new ColumnInfo( + fields.get(i).getFieldName(), + TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), + tableAlias, false); + colInfo.setSkewedCol((isSkewedCol(tableAlias, qb, colName)) ? true : false); + rr.put(tableAlias, colName, colInfo); + cInfoLst.add(colInfo); + } + // TODO: Fix this + ArrayList<ColumnInfo> nonPartitionColumns = new ArrayList<ColumnInfo>(cInfoLst); + ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>(); + + // 3.2 Add column info corresponding to partition columns + for (FieldSchema part_col : tab.getPartCols()) { + colName = part_col.getName(); + colInfo = new ColumnInfo(colName, + TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true); + rr.put(tableAlias, colName, colInfo); + cInfoLst.add(colInfo); + partitionColumns.add(colInfo); + } + + // 3.3 Add column info corresponding to virtual columns + Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator(); + while (vcs.hasNext()) { + VirtualColumn vc = vcs.next(); + colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, + vc.getIsHidden()); + rr.put(tableAlias, vc.getName(), colInfo); + cInfoLst.add(colInfo); + } + + // 3.4 Build row type from field <type, name> + RelDataType rowType = TypeConverter.getType(cluster, rr, null); + + // 4. Build RelOptAbstractTable + String fullyQualifiedTabName = tab.getDbName(); + if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) + fullyQualifiedTabName = fullyQualifiedTabName + "." + tab.getTableName(); + else + fullyQualifiedTabName = tab.getTableName(); + RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, + tableAlias, rowType, tab, nonPartitionColumns, partitionColumns, conf, partitionCache, + noColsMissingStats); + + // 5. Build Hive Table Scan Rel + tableRel = new HiveTableScanRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), optTable, + rowType); + + // 6. Add Schema(RR) to RelNode-Schema map + ImmutableMap<String, Integer> hiveToOptiqColMap = buildHiveToOptiqColumnMap(rr, tableRel); + relToHiveRR.put(tableRel, rr); + relToHiveColNameOptiqPosMap.put(tableRel, hiveToOptiqColMap); + } catch (Exception e) { + if (e instanceof SemanticException) { + throw (SemanticException) e; + } else { + throw (new RuntimeException(e)); + } + } + + return tableRel; + } + + private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) throws SemanticException { + ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel)); + if (filterCondn instanceof ExprNodeConstantDesc && + !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { + // queries like select * from t1 where 'foo'; + // Optiq's rule PushFilterThroughProject chokes on it. Arguably, we can insert a cast to + // boolean in such cases, but since Postgres, Oracle and MS SQL server fail on compile time + // for such queries, its an arcane corner case, not worth of adding that complexity. + throw new OptiqSemanticException("Filter expression with non-boolean return type."); + } + ImmutableMap<String, Integer> hiveColNameOptiqPosMap = this.relToHiveColNameOptiqPosMap + .get(srcRel); + RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), + hiveColNameOptiqPosMap, 0, true).convert(filterCondn); + RelNode filterRel = new HiveFilterRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), + srcRel, convertedFilterExpr); + this.relToHiveColNameOptiqPosMap.put(filterRel, hiveColNameOptiqPosMap); + relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); + relToHiveColNameOptiqPosMap.put(filterRel, hiveColNameOptiqPosMap); + + return filterRel; + } + + private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, + Map<String, RelNode> aliasToRel, boolean forHavingClause) throws SemanticException { + /* + * Handle Subquery predicates. + * + * Notes (8/22/14 hb): Why is this a copy of the code from {@link + * #genFilterPlan} - for now we will support the same behavior as non CBO + * route. - but plan to allow nested SubQueries(Restriction.9.m) and + * multiple SubQuery expressions(Restriction.8.m). This requires use to + * utilize Optiq's Decorrelation mechanics, and for Optiq to fix/flush out + * Null semantics(OPTIQ-373) - besides only the driving code has been + * copied. Most of the code which is SubQueryUtils and QBSubQuery is + * reused. + */ + int numSrcColumns = srcRel.getRowType().getFieldCount(); + List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); + if (subQueriesInOriginalTree.size() > 0) { + + /* + * Restriction.9.m :: disallow nested SubQuery expressions. + */ + if (qb.getSubQueryPredicateDef() != null) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); + } + + /* + * Restriction.8.m :: We allow only 1 SubQuery expression per Query. + */ + if (subQueriesInOriginalTree.size() > 1) { + + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); + } + + /* + * Clone the Search AST; apply all rewrites on the clone. + */ + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); + List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); + + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outerQBRR = inputRR; + ImmutableMap<String, Integer> outerQBPosMap = + relToHiveColNameOptiqPosMap.get(srcRel); + + for (int i = 0; i < subQueries.size(); i++) { + ASTNode subQueryAST = subQueries.get(i); + ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); + + int sqIdx = qb.incrNumSubQueryPredicates(); + clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); + + QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, + originalSubQueryAST, ctx); + + if (!forHavingClause) { + qb.setWhereClauseSubQueryPredicate(subQuery); + } else { + qb.setHavingClauseSubQueryPredicate(subQuery); + } + String havingInputAlias = null; + + if (forHavingClause) { + havingInputAlias = "gby_sq" + sqIdx; + aliasToRel.put(havingInputAlias, srcRel); + } + + subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, + aliasToRel.keySet()); + + QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); + qbSQ.setSubQueryDef(subQuery.getSubQuery()); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1); + getMetaData(qbSQ); + RelNode subQueryRelNode = genLogicalPlan(qbSQ, false); + aliasToRel.put(subQuery.getAlias(), subQueryRelNode); + RowResolver sqRR = relToHiveRR.get(subQueryRelNode); + + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ + if (subQuery.getOperator().getType() != SubQueryType.EXISTS + && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) { + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, + "SubQuery can contain only 1 item in Select List.")); + } + + /* + * If this is a Not In SubQuery Predicate then Join in the Null Check + * SubQuery. See QBSubQuery.NotInCheck for details on why and how this + * is constructed. + */ + if (subQuery.getNotInCheck() != null) { + QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); + notInCheck.setSQRR(sqRR); + QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); + qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery()); + ctx_1 = initPhase1Ctx(); + doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1); + getMetaData(qbSQ_nic); + RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic, false); + aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); + srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, + // set explicitly to inner until we figure out SemiJoin use + // notInCheck.getJoinType(), + JoinType.INNER, notInCheck.getJoinConditionAST()); + inputRR = relToHiveRR.get(srcRel); + if (forHavingClause) { + aliasToRel.put(havingInputAlias, srcRel); + } + } + + /* + * Gen Join between outer Operator and SQ op + */ + subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); + srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(), + subQuery.getJoinConditionAST()); + searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); + + srcRel = genFilterRelNode(searchCond, srcRel); + + /* + * For Not Exists and Not In, add a projection on top of the Left + * Outer Join. + */ + if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + || subQuery.getOperator().getType() != SubQueryType.NOT_IN) { + srcRel = projectLeftOuterSide(srcRel, numSrcColumns); + } + } + relToHiveRR.put(srcRel, outerQBRR); + relToHiveColNameOptiqPosMap.put(srcRel, outerQBPosMap); + return srcRel; + } + + return genFilterRelNode(searchCond, srcRel); + } + + private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException { + RowResolver iRR = relToHiveRR.get(srcRel); + RowResolver oRR = new RowResolver(); + RowResolver.add(oRR, iRR, 0, numColumns); + + List<RexNode> optiqColLst = new ArrayList<RexNode>(); + List<String> oFieldNames = new ArrayList<String>(); + RelDataType iType = srcRel.getRowType(); + + for (int i = 0; i < iType.getFieldCount(); i++) { + RelDataTypeField fType = iType.getFieldList().get(i); + String fName = iType.getFieldNames().get(i); + optiqColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i)); + oFieldNames.add(fName); + } + + HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames); + + this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(oRR, selRel)); + this.relToHiveRR.put(selRel, oRR); + return selRel; + } + + private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map<String, RelNode> aliasToRel, + boolean forHavingClause) throws SemanticException { + RelNode filterRel = null; + + Iterator<ASTNode> whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values() + .iterator(); + if (whereClauseIterator.hasNext()) { + filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel, + aliasToRel, forHavingClause); + } + + return filterRel; + } + + /** + * Class to store GenericUDAF related information. + */ + private class AggInfo { + private final List<ExprNodeDesc> m_aggParams; + private final TypeInfo m_returnType; + private final String m_udfName; + private final boolean m_distinct; + + private AggInfo(List<ExprNodeDesc> aggParams, TypeInfo returnType, String udfName, + boolean isDistinct) { + m_aggParams = aggParams; + m_returnType = returnType; + m_udfName = udfName; + m_distinct = isDistinct; + } + } + + private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List<RexNode> gbChildProjLst, + RexNodeConverter converter, HashMap<String, Integer> rexNodeToPosMap, + Integer childProjLstIndx) throws SemanticException { + + // 1. Get agg fn ret type in Optiq + RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType, + this.cluster.getTypeFactory()); + + // 2. Convert Agg Fn args and type of args to Optiq + // TODO: Does HQL allows expressions as aggregate args or can it only be + // projections from child? + Integer inputIndx; + List<Integer> argList = new ArrayList<Integer>(); + RexNode rexNd = null; + RelDataTypeFactory dtFactory = this.cluster.getTypeFactory(); + ImmutableList.Builder<RelDataType> aggArgRelDTBldr = new ImmutableList.Builder<RelDataType>(); + for (ExprNodeDesc expr : agg.m_aggParams) { + rexNd = converter.convert(expr); + inputIndx = rexNodeToPosMap.get(rexNd.toString()); + if (inputIndx == null) { + gbChildProjLst.add(rexNd); + rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx); + inputIndx = childProjLstIndx; + childProjLstIndx++; + } + argList.add(inputIndx); + + // TODO: does arg need type cast? + aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory)); + } + + // 3. Get Aggregation FN from Optiq given name, ret type and input arg + // type + final Aggregation aggregation = SqlFunctionConverter.getOptiqAggFn(agg.m_udfName, + aggArgRelDTBldr.build(), aggFnRetType); + + return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null); + } + + private RelNode genGBRelNode(List<ExprNodeDesc> gbExprs, List<AggInfo> aggInfoLst, + RelNode srcRel) throws SemanticException { + RowResolver gbInputRR = this.relToHiveRR.get(srcRel); + ImmutableMap<String, Integer> posMap = this.relToHiveColNameOptiqPosMap.get(srcRel); + RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), + posMap, 0, false); + + final List<RexNode> gbChildProjLst = Lists.newArrayList(); + final HashMap<String, Integer> rexNodeToPosMap = new HashMap<String, Integer>(); + final BitSet groupSet = new BitSet(); + Integer gbIndx = 0; + RexNode rnd; + for (ExprNodeDesc key : gbExprs) { + rnd = converter.convert(key); + gbChildProjLst.add(rnd); + groupSet.set(gbIndx); + rexNodeToPosMap.put(rnd.toString(), gbIndx); + gbIndx++; + } + + List<AggregateCall> aggregateCalls = Lists.newArrayList(); + int i = aggInfoLst.size(); + for (AggInfo agg : aggInfoLst) { + aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap, + gbChildProjLst.size())); + } + + if (gbChildProjLst.isEmpty()) { + // This will happen for count(*), in such cases we arbitarily pick + // first element from srcRel + gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0)); + } + RelNode gbInputRel = HiveProjectRel.create(srcRel, gbChildProjLst, null); + + HiveRel aggregateRel = null; + try { + aggregateRel = new HiveAggregateRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), + gbInputRel, groupSet, aggregateCalls); + } catch (InvalidRelException e) { + throw new SemanticException(e); + } + + return aggregateRel; + } + + private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, + RowResolver gByInputRR, RowResolver gByRR) { + if (gByExpr.getType() == HiveParser.DOT + && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { + String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0) + .getText()); + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText()); + gByRR.put(tab_alias, col_alias, colInfo); + } else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) { + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText()); + String tab_alias = null; + /* + * If the input to the GBy has a tab alias for the column, then add an + * entry based on that tab_alias. For e.g. this query: select b.x, + * count(*) from t1 b group by x needs (tab_alias=b, col_alias=x) in the + * GBy RR. tab_alias=b comes from looking at the RowResolver that is the + * ancestor before any GBy/ReduceSinks added for the GBY operation. + */ + try { + ColumnInfo pColInfo = gByInputRR.get(tab_alias, col_alias); + tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); + } catch (SemanticException se) { + } + gByRR.put(tab_alias, col_alias, colInfo); + } + } + + private void addToGBExpr(RowResolver groupByOutputRowResolver, + RowResolver groupByInputRowResolver, ASTNode grpbyExpr, ExprNodeDesc grpbyExprNDesc, + List<ExprNodeDesc> gbExprNDescLst, List<String> outputColumnNames) { + // TODO: Should we use grpbyExprNDesc.getTypeInfo()? what if expr is + // UDF + int i = gbExprNDescLst.size(); + String field = getColumnInternalName(i); + outputColumnNames.add(field); + gbExprNDescLst.add(grpbyExprNDesc); + + ColumnInfo oColInfo = new ColumnInfo(field, grpbyExprNDesc.getTypeInfo(), null, false); + groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo); + + addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver, + groupByOutputRowResolver); + } + + private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR) + throws SemanticException { + AggInfo aInfo = null; + + // 1 Convert UDAF Params to ExprNodeDesc + ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); + for (int i = 1; i <= aggFnLstArgIndx; i++) { + ASTNode paraExpr = (ASTNode) aggAst.getChild(i); + ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR); + aggParameters.add(paraExprNode); + } + + // 2. Is this distinct UDAF + boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI; + + // 3. Determine type of UDAF + TypeInfo udafRetType = null; + + // 3.1 Obtain UDAF name + String aggName = unescapeIdentifier(aggAst.getChild(0).getText()); + + // 3.2 Rank functions type is 'int'/'double' + if (FunctionRegistry.isRankingFunction(aggName)) { + if (aggName.equalsIgnoreCase("percent_rank")) + udafRetType = TypeInfoFactory.doubleTypeInfo; + else + udafRetType = TypeInfoFactory.intTypeInfo; + } else { + // 3.3 Try obtaining UDAF evaluators to determine the ret type + try { + boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; + + // 3.3.1 Get UDAF Evaluator + Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct); + + GenericUDAFEvaluator genericUDAFEvaluator = null; + if (aggName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME) + || aggName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) { + ArrayList<ObjectInspector> originalParameterTypeInfos = + getWritableObjectInspector(aggParameters); + genericUDAFEvaluator = + FunctionRegistry.getGenericWindowingEvaluator(aggName, + originalParameterTypeInfos, isDistinct, isAllColumns); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + udafRetType = ((ListTypeInfo)udaf.returnType).getListElementTypeInfo(); + } else { + genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, + aggParameters, aggAst, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + + // 3.3.2 Get UDAF Info using UDAF Evaluator + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + udafRetType = udaf.returnType; + } + } catch (Exception e) { + LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName + + ", trying to translate to GenericUDF"); + } + + // 3.4 Try GenericUDF translation + if (udafRetType == null) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + tcCtx.setAllowDistinctFunctions(false); + ExprNodeDesc exp = genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, tcCtx); + udafRetType = exp.getTypeInfo(); + } + } + + // 4. Construct AggInfo + aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct); + + return aInfo; + } + + /** + * Generate GB plan. + * + * @param qb + * @param srcRel + * @return TODO: 1. Grouping Sets (roll up..) + * @throws SemanticException + */ + private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { + RelNode gbRel = null; + QBParseInfo qbp = getQBParseInfo(qb); + + // 0. for GSets, Cube, Rollup, bail from Optiq path. + if (!qbp.getDestRollups().isEmpty() + || !qbp.getDestGroupingSets().isEmpty() + || !qbp.getDestCubes().isEmpty()) { + String gbyClause = null; + HashMap<String, ASTNode> gbysMap = qbp.getDestToGroupBy(); + if (gbysMap.size() == 1) { + ASTNode gbyAST = gbysMap.entrySet().iterator().next().getValue(); + gbyClause = SemanticAnalyzer.this.ctx.getTokenRewriteStream() + .toString(gbyAST.getTokenStartIndex(), + gbyAST.getTokenStopIndex()); + gbyClause = "in '" + gbyClause + "'."; + } else { + gbyClause = "."; + } + String msg = String.format("Encountered Grouping Set/Cube/Rollup%s" + + " Currently we don't support Grouping Set/Cube/Rollup" + + " clauses in CBO," + " turn off cbo for these queries.", + gbyClause); + LOG.debug(msg); + throw new OptiqSemanticException(msg); + } + + // 1. Gather GB Expressions (AST) (GB + Aggregations) + // NOTE: Multi Insert is not supported + String detsClauseName = qbp.getClauseNames().iterator().next(); + List<ASTNode> grpByAstExprs = getGroupByForClause(qbp, detsClauseName); + HashMap<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); + boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; + boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true + : false; + + if (hasGrpByAstExprs || hasAggregationTrees) { + ArrayList<ExprNodeDesc> gbExprNDescLst = new ArrayList<ExprNodeDesc>(); + ArrayList<String> outputColumnNames = new ArrayList<String>(); + + // 2. Input, Output Row Resolvers + RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); + RowResolver groupByOutputRowResolver = new RowResolver(); + groupByOutputRowResolver.setIsExprResolver(true); + + if (hasGrpByAstExprs) { + // 3. Construct GB Keys (ExprNode) + for (int i = 0; i < grpByAstExprs.size(); ++i) { + ASTNode grpbyExpr = grpByAstExprs.get(i); + Map<ASTNode, ExprNodeDesc> astToExprNDescMap = TypeCheckProcFactory.genExprNode( + grpbyExpr, new TypeCheckCtx(groupByInputRowResolver)); + ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr); + if (grpbyExprNDesc == null) + throw new OptiqSemanticException("Invalid Column Reference: " + grpbyExpr.dump()); + + addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr, + grpbyExprNDesc, gbExprNDescLst, outputColumnNames); + } + } + + // 4. Construct aggregation function Info + ArrayList<AggInfo> aggregations = new ArrayList<AggInfo>(); + if (hasAggregationTrees) { + assert (aggregationTrees != null); + for (ASTNode value : aggregationTrees.values()) { + // 4.1 Determine type of UDAF + // This is the GenericUDAF name + String aggName = unescapeIdentifier(value.getChild(0).getText()); + boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; + + // 4.2 Convert UDAF Params to ExprNodeDesc + ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode paraExpr = (ASTNode) value.getChild(i); + ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver); + aggParameters.add(paraExprNode); + } + + Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct); + GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, + aggParameters, value, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct); + aggregations.add(aInfo); + String field = getColumnInternalName(gbExprNDescLst.size() + aggregations.size() - 1); + outputColumnNames.add(field); + groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType, + "", false)); + } + } + + gbRel = genGBRelNode(gbExprNDescLst, aggregations, srcRel); + relToHiveColNameOptiqPosMap.put(gbRel, + buildHiveToOptiqColumnMap(groupByOutputRowResolver, gbRel)); + this.relToHiveRR.put(gbRel, groupByOutputRowResolver); + } + + return gbRel; + } + + /** + * Generate OB RelNode and input Select RelNode that should be used to + * introduce top constraining Project. If Input select RelNode is not + * present then don't introduce top constraining select. + * + * @param qb + * @param srcRel + * @param outermostOB + * @return Pair<RelNode, RelNode> Key- OB RelNode, Value - Input Select for + * top constraining Select + * @throws SemanticException + */ + private Pair<RelNode, RelNode> genOBLogicalPlan(QB qb, RelNode srcRel, boolean outermostOB) + throws SemanticException { + RelNode sortRel = null; + RelNode originalOBChild = null; + + QBParseInfo qbp = getQBParseInfo(qb); + String dest = qbp.getClauseNames().iterator().next(); + ASTNode obAST = qbp.getOrderByForClause(dest); + + if (obAST != null) { + // 1. OB Expr sanity test + // in strict mode, in the presence of order by, limit must be specified + Integer limit = qb.getParseInfo().getDestLimit(dest); + if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict") + && limit == null) { + throw new SemanticException(generateErrorMessage(obAST, + ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg())); + } + + // 2. Walk through OB exprs and extract field collations and additional + // virtual columns needed + final List<RexNode> newVCLst = new ArrayList<RexNode>(); + final List<RelFieldCollation> fieldCollations = Lists.newArrayList(); + int fieldIndex = 0; + + List<Node> obASTExprLst = obAST.getChildren(); + ASTNode obASTExpr; + List<Pair<ASTNode, TypeInfo>> vcASTTypePairs = new ArrayList<Pair<ASTNode, TypeInfo>>(); + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outputRR = new RowResolver(); + + RexNode rnd; + RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), + relToHiveColNameOptiqPosMap.get(srcRel), 0, false); + int srcRelRecordSz = srcRel.getRowType().getFieldCount(); + + for (int i = 0; i < obASTExprLst.size(); i++) { + // 2.1 Convert AST Expr to ExprNode + obASTExpr = (ASTNode) obASTExprLst.get(i); + Map<ASTNode, ExprNodeDesc> astToExprNDescMap = TypeCheckProcFactory.genExprNode( + obASTExpr, new TypeCheckCtx(inputRR)); + ExprNodeDesc obExprNDesc = astToExprNDescMap.get(obASTExpr.getChild(0)); + if (obExprNDesc == null) + throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); + + // 2.2 Convert ExprNode to RexNode + rnd = converter.convert(obExprNDesc); + + // 2.3 Determine the index of ob expr in child schema + // NOTE: Optiq can not take compound exprs in OB without it being + // present in the child (& hence we add a child Project Rel) + if (rnd instanceof RexInputRef) { + fieldIndex = ((RexInputRef) rnd).getIndex(); + } else { + fieldIndex = srcRelRecordSz + newVCLst.size(); + newVCLst.add(rnd); + vcASTTypePairs.add(new Pair<ASTNode, TypeInfo>((ASTNode) obASTExpr.getChild(0), + obExprNDesc.getTypeInfo())); + } + + // 2.4 Determine the Direction of order by + org.eigenbase.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; + if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { + order = RelFieldCollation.Direction.ASCENDING; + } + + // 2.5 Add to field collations + fieldCollations.add(new RelFieldCollation(fieldIndex, order)); + } + + // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel + // for top constraining Sel + RelNode obInputRel = srcRel; + if (!newVCLst.isEmpty()) { + List<RexNode> originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), + new Function<RelDataTypeField, RexNode>() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + RowResolver obSyntheticProjectRR = new RowResolver(); + RowResolver.add(obSyntheticProjectRR, inputRR, 0); + int vcolPos = inputRR.getRowSchema().getSignature().size(); + for (Pair<ASTNode, TypeInfo> astTypePair : vcASTTypePairs) { + obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( + getColumnInternalName(vcolPos), astTypePair.getValue(), null, false)); + vcolPos++; + } + obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), + obSyntheticProjectRR, srcRel); + + if (outermostOB) { + RowResolver.add(outputRR, inputRR, 0); + + } else { + RowResolver.add(outputRR, obSyntheticProjectRR, 0); + originalOBChild = srcRel; + } + } else { + RowResolver.add(outputRR, inputRR, 0); + } + + // 4. Construct SortRel + RelTraitSet traitSet = cluster.traitSetOf(HiveRel.CONVENTION);
[... 782 lines stripped ...]
