http://git-wip-us.apache.org/repos/asf/hive/blob/9f5bea3b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig deleted file mode 100644 index 699bb11..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig +++ /dev/null @@ -1,13038 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.parse; - -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.Serializable; -import java.security.AccessControlException; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Deque; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Queue; -import java.util.Set; -import java.util.TreeSet; -import java.util.UUID; -import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; - -import org.antlr.runtime.ClassicToken; -import org.antlr.runtime.CommonToken; -import org.antlr.runtime.Token; -import org.antlr.runtime.tree.Tree; -import org.antlr.runtime.tree.TreeWizard; -import org.antlr.runtime.tree.TreeWizard.ContextVisitor; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.hive.common.BlobStorageUtils; -import org.apache.hadoop.hive.ql.plan.AlterTableDesc; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.ObjectPair; -import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.SQLForeignKey; -import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; -import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.QueryProperties; -import org.apache.hadoop.hive.ql.QueryState; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.ArchiveUtils; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; -import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.FilterOperator; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.RecordReader; -import org.apache.hadoop.hive.ql.exec.RecordWriter; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.SelectOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.UnionOperator; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.io.AcidOutputFormat; -import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; -import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.NullRowsInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.metadata.DummyPartition; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.HiveUtils; -import org.apache.hadoop.hive.ql.metadata.InvalidTableException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.metadata.VirtualColumn; -import org.apache.hadoop.hive.ql.optimizer.Optimizer; -import org.apache.hadoop.hive.ql.optimizer.Transform; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc; -import org.apache.hadoop.hive.ql.optimizer.lineage.Generator; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType; -import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher; -import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; -import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; -import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec; -import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec; -import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType; -import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; -import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec; -import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec; -import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec; -import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType; -import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.ValueBoundarySpec; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; -import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; -import org.apache.hadoop.hive.ql.plan.CreateTableDesc; -import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc; -import org.apache.hadoop.hive.ql.plan.CreateViewDesc; -import org.apache.hadoop.hive.ql.plan.DDLWork; -import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc; -import org.apache.hadoop.hive.ql.plan.ForwardDesc; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; -import org.apache.hadoop.hive.ql.plan.HiveOperation; -import org.apache.hadoop.hive.ql.plan.JoinCondDesc; -import org.apache.hadoop.hive.ql.plan.JoinDesc; -import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; -import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; -import org.apache.hadoop.hive.ql.plan.LimitDesc; -import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; -import org.apache.hadoop.hive.ql.plan.LoadFileDesc; -import org.apache.hadoop.hive.ql.plan.LoadTableDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PTFDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; -import org.apache.hadoop.hive.ql.plan.ScriptDesc; -import org.apache.hadoop.hive.ql.plan.SelectDesc; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.plan.TableScanDesc; -import org.apache.hadoop.hive.ql.plan.UDTFDesc; -import org.apache.hadoop.hive.ql.plan.UnionDesc; -import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef; -import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef; -import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; -import org.apache.hadoop.hive.ql.util.ResourceDownloader; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; -import org.apache.hadoop.hive.serde2.NoOpFetchFormatter; -import org.apache.hadoop.hive.serde2.NullStructSerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.Utils; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.OutputFormat; -import org.apache.hadoop.security.UserGroupInformation; - -import com.google.common.collect.Sets; - -/** - * Implementation of the semantic analyzer. It generates the query plan. - * There are other specific semantic analyzers for some hive operations such as - * DDLSemanticAnalyzer for ddl operations. - */ - -public class SemanticAnalyzer extends BaseSemanticAnalyzer { - - public static final String DUMMY_DATABASE = "_dummy_database"; - public static final String DUMMY_TABLE = "_dummy_table"; - public static final String SUBQUERY_TAG_1 = "-subquery1"; - public static final String SUBQUERY_TAG_2 = "-subquery2"; - - // Max characters when auto generating the column name with func name - private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20; - - private static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__"; - - static final String MATERIALIZATION_MARKER = "$MATERIALIZATION"; - - private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner; - private HashMap<TableScanOperator, PrunedPartitionList> opToPartList; - protected HashMap<String, TableScanOperator> topOps; - protected LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx; - private List<LoadTableDesc> loadTableWork; - private List<LoadFileDesc> loadFileWork; - private List<ColumnStatsAutoGatherContext> columnStatsAutoGatherContexts; - private final Map<JoinOperator, QBJoinTree> joinContext; - private final Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext; - private final HashMap<TableScanOperator, Table> topToTable; - private final Map<FileSinkOperator, Table> fsopToTable; - private final List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting; - private final HashMap<TableScanOperator, Map<String, String>> topToTableProps; - private QB qb; - private ASTNode ast; - private int destTableId; - private UnionProcContext uCtx; - List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer; - private HashMap<TableScanOperator, SampleDesc> opToSamplePruner; - private final Map<TableScanOperator, Map<String, ExprNodeDesc>> opToPartToSkewedPruner; - private Map<SelectOperator, Table> viewProjectToTableSchema; - /** - * a map for the split sampling, from alias to an instance of SplitSample - * that describes percentage and number. - */ - private final HashMap<String, SplitSample> nameToSplitSample; - Map<GroupByOperator, Set<String>> groupOpToInputTables; - Map<String, PrunedPartitionList> prunedPartitions; - protected List<FieldSchema> resultSchema; - private CreateViewDesc createVwDesc; - private ArrayList<String> viewsExpanded; - private ASTNode viewSelect; - protected final UnparseTranslator unparseTranslator; - private final GlobalLimitCtx globalLimitCtx; - - // prefix for column names auto generated by hive - private final String autogenColAliasPrfxLbl; - private final boolean autogenColAliasPrfxIncludeFuncName; - - // Keep track of view alias to read entity corresponding to the view - // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T - // keeps track of aliases for V3, V3:V2, V3:V2:V1. - // This is used when T is added as an input for the query, the parents of T is - // derived from the alias V3:V2:V1:T - private final Map<String, ReadEntity> viewAliasToInput; - - //need merge isDirect flag to input even if the newInput does not have a parent - private boolean mergeIsDirect; - - // flag for no scan during analyze ... compute statistics - protected boolean noscan; - - //flag for partial scan during analyze ... compute statistics - protected boolean partialscan; - - protected volatile boolean disableJoinMerge = false; - - /* - * Capture the CTE definitions in a Query. - */ - final Map<String, CTEClause> aliasToCTEs; - - /* - * Used to check recursive CTE invocations. Similar to viewsExpanded - */ - ArrayList<String> ctesExpanded; - - /* - * Whether root tasks after materialized CTE linkage have been resolved - */ - boolean rootTasksResolved; - - protected TableMask tableMask; - - CreateTableDesc tableDesc; - - /** Not thread-safe. */ - final ASTSearcher astSearcher = new ASTSearcher(); - - protected AnalyzeRewriteContext analyzeRewrite; - - // A mapping from a tableName to a table object in metastore. - Map<String, Table> tabNameToTabObject; - - // The tokens we should ignore when we are trying to do table masking. - private final Set<Integer> ignoredTokens = Sets.newHashSet(HiveParser.TOK_GROUPBY, - HiveParser.TOK_ORDERBY, HiveParser.TOK_WINDOWSPEC, HiveParser.TOK_CLUSTERBY, - HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY); - - static class Phase1Ctx { - String dest; - int nextNum; - } - - public SemanticAnalyzer(QueryState queryState) throws SemanticException { - super(queryState); - opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>(); - opToPartList = new HashMap<TableScanOperator, PrunedPartitionList>(); - opToSamplePruner = new HashMap<TableScanOperator, SampleDesc>(); - nameToSplitSample = new HashMap<String, SplitSample>(); - // Must be deterministic order maps - see HIVE-8707 - topOps = new LinkedHashMap<String, TableScanOperator>(); - loadTableWork = new ArrayList<LoadTableDesc>(); - loadFileWork = new ArrayList<LoadFileDesc>(); - columnStatsAutoGatherContexts = new ArrayList<ColumnStatsAutoGatherContext>(); - opParseCtx = new LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext>(); - joinContext = new HashMap<JoinOperator, QBJoinTree>(); - smbMapJoinContext = new HashMap<SMBMapJoinOperator, QBJoinTree>(); - // Must be deterministic order map for consistent q-test output across Java versions - topToTable = new LinkedHashMap<TableScanOperator, Table>(); - fsopToTable = new HashMap<FileSinkOperator, Table>(); - reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList<ReduceSinkOperator>(); - topToTableProps = new HashMap<TableScanOperator, Map<String, String>>(); - destTableId = 1; - uCtx = null; - listMapJoinOpsNoReducer = new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>(); - groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>(); - prunedPartitions = new HashMap<String, PrunedPartitionList>(); - tabNameToTabObject = new HashMap<String, Table>(); - unparseTranslator = new UnparseTranslator(conf); - autogenColAliasPrfxLbl = HiveConf.getVar(conf, - HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL); - autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME); - queryProperties = new QueryProperties(); - opToPartToSkewedPruner = new HashMap<TableScanOperator, Map<String, ExprNodeDesc>>(); - aliasToCTEs = new HashMap<String, CTEClause>(); - globalLimitCtx = new GlobalLimitCtx(); - viewAliasToInput = new HashMap<String, ReadEntity>(); - mergeIsDirect = true; - noscan = partialscan = false; - tabNameToTabObject = new HashMap<>(); - } - - @Override - protected void reset(boolean clearPartsCache) { - super.reset(true); - if(clearPartsCache) { - prunedPartitions.clear(); - - //When init(true) combine with genResolvedParseTree, it will generate Resolved Parse tree from syntax tree - //ReadEntity created under these conditions should be all relevant to the syntax tree even the ones without parents - //set mergeIsDirect to true here. - mergeIsDirect = true; - } else { - mergeIsDirect = false; - } - tabNameToTabObject.clear(); - loadTableWork.clear(); - loadFileWork.clear(); - columnStatsAutoGatherContexts.clear(); - topOps.clear(); - destTableId = 1; - idToTableNameMap.clear(); - qb = null; - ast = null; - uCtx = null; - joinContext.clear(); - smbMapJoinContext.clear(); - opParseCtx.clear(); - groupOpToInputTables.clear(); - disableJoinMerge = false; - aliasToCTEs.clear(); - topToTable.clear(); - opToPartPruner.clear(); - opToPartList.clear(); - opToPartToSkewedPruner.clear(); - opToSamplePruner.clear(); - nameToSplitSample.clear(); - fsopToTable.clear(); - resultSchema = null; - createVwDesc = null; - viewsExpanded = null; - viewSelect = null; - ctesExpanded = null; - globalLimitCtx.disableOpt(); - viewAliasToInput.clear(); - reduceSinkOperatorsAddedByEnforceBucketingSorting.clear(); - topToTableProps.clear(); - listMapJoinOpsNoReducer.clear(); - unparseTranslator.clear(); - queryProperties.clear(); - outputs.clear(); - } - - public void initParseCtx(ParseContext pctx) { - opToPartPruner = pctx.getOpToPartPruner(); - opToPartList = pctx.getOpToPartList(); - opToSamplePruner = pctx.getOpToSamplePruner(); - topOps = pctx.getTopOps(); - loadTableWork = pctx.getLoadTableWork(); - loadFileWork = pctx.getLoadFileWork(); - ctx = pctx.getContext(); - destTableId = pctx.getDestTableId(); - idToTableNameMap = pctx.getIdToTableNameMap(); - uCtx = pctx.getUCtx(); - listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); - prunedPartitions = pctx.getPrunedPartitions(); - tabNameToTabObject = pctx.getTabNameToTabObject(); - fetchTask = pctx.getFetchTask(); - setLineageInfo(pctx.getLineageInfo()); - } - - public ParseContext getParseContext() { - // Make sure the basic query properties are initialized - copyInfoToQueryProperties(queryProperties); - return new ParseContext(queryState, opToPartPruner, opToPartList, topOps, - new HashSet<JoinOperator>(joinContext.keySet()), - new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), - loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, - listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, - opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, - opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, - analyzeRewrite, tableDesc, queryProperties, viewProjectToTableSchema, acidFileSinks); - } - - public CompilationOpContext getOpContext() { - return ctx.getOpContext(); - } - - public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias) - throws SemanticException { - doPhase1QBExpr(ast, qbexpr, id, alias, false); - } - @SuppressWarnings("nls") - public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, boolean insideView) - throws SemanticException { - - assert (ast.getToken() != null); - switch (ast.getToken().getType()) { - case HiveParser.TOK_QUERY: { - QB qb = new QB(id, alias, true); - qb.setInsideView(insideView); - Phase1Ctx ctx_1 = initPhase1Ctx(); - doPhase1(ast, qb, ctx_1, null); - - qbexpr.setOpcode(QBExpr.Opcode.NULLOP); - qbexpr.setQB(qb); - } - break; - case HiveParser.TOK_UNIONALL: { - qbexpr.setOpcode(QBExpr.Opcode.UNION); - // query 1 - assert (ast.getChild(0) != null); - QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1); - doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + SUBQUERY_TAG_1, - alias + SUBQUERY_TAG_1, insideView); - qbexpr.setQBExpr1(qbexpr1); - - // query 2 - assert (ast.getChild(1) != null); - QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2); - doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + SUBQUERY_TAG_2, - alias + SUBQUERY_TAG_2, insideView); - qbexpr.setQBExpr2(qbexpr2); - } - break; - } - } - - private LinkedHashMap<String, ASTNode> doPhase1GetAggregationsFromSelect( - ASTNode selExpr, QB qb, String dest) throws SemanticException { - - // Iterate over the selects search for aggregation Trees. - // Use String as keys to eliminate duplicate trees. - LinkedHashMap<String, ASTNode> aggregationTrees = new LinkedHashMap<String, ASTNode>(); - List<ASTNode> wdwFns = new ArrayList<ASTNode>(); - for (int i = 0; i < selExpr.getChildCount(); ++i) { - ASTNode function = (ASTNode) selExpr.getChild(i); - if (function.getType() == HiveParser.TOK_SELEXPR || - function.getType() == HiveParser.TOK_SUBQUERY_EXPR) { - function = (ASTNode)function.getChild(0); - } - doPhase1GetAllAggregations(function, aggregationTrees, wdwFns); - } - - // window based aggregations are handled differently - for (ASTNode wdwFn : wdwFns) { - WindowingSpec spec = qb.getWindowingSpec(dest); - if(spec == null) { - queryProperties.setHasWindowing(true); - spec = new WindowingSpec(); - qb.addDestToWindowingSpec(dest, spec); - } - HashMap<String, ASTNode> wExprsInDest = qb.getParseInfo().getWindowingExprsForClause(dest); - int wColIdx = spec.getWindowExpressions() == null ? 0 : spec.getWindowExpressions().size(); - WindowFunctionSpec wFnSpec = processWindowFunction(wdwFn, - (ASTNode)wdwFn.getChild(wdwFn.getChildCount()-1)); - // If this is a duplicate invocation of a function; don't add to WindowingSpec. - if ( wExprsInDest != null && - wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) { - continue; - } - wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx); - spec.addWindowFunction(wFnSpec); - qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression()); - } - - return aggregationTrees; - } - - private void doPhase1GetColumnAliasesFromSelect( - ASTNode selectExpr, QBParseInfo qbp) { - for (int i = 0; i < selectExpr.getChildCount(); ++i) { - ASTNode selExpr = (ASTNode) selectExpr.getChild(i); - if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR) - && (selExpr.getChildCount() == 2)) { - String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText()); - qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias); - } - } - } - - /** - * DFS-scan the expressionTree to find all aggregation subtrees and put them - * in aggregations. - * - * @param expressionTree - * @param aggregations - * the key to the HashTable is the toStringTree() representation of - * the aggregation subtree. - * @throws SemanticException - */ - private void doPhase1GetAllAggregations(ASTNode expressionTree, - HashMap<String, ASTNode> aggregations, List<ASTNode> wdwFns) throws SemanticException { - int exprTokenType = expressionTree.getToken().getType(); - if (exprTokenType == HiveParser.TOK_FUNCTION - || exprTokenType == HiveParser.TOK_FUNCTIONDI - || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) { - assert (expressionTree.getChildCount() != 0); - if (expressionTree.getChild(expressionTree.getChildCount()-1).getType() - == HiveParser.TOK_WINDOWSPEC) { - // If it is a windowing spec, we include it in the list - // Further, we will examine its children AST nodes to check whether - // there are aggregation functions within - wdwFns.add(expressionTree); - doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(expressionTree.getChildCount()-1), - aggregations, wdwFns); - return; - } - if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { - String functionName = unescapeIdentifier(expressionTree.getChild(0) - .getText()); - // Validate the function name - if (FunctionRegistry.getFunctionInfo(functionName) == null) { - throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName)); - } - if(FunctionRegistry.impliesOrder(functionName)) { - throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName)); - } - if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) { - if(containsLeadLagUDF(expressionTree)) { - throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName)); - } - aggregations.put(expressionTree.toStringTree(), expressionTree); - FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName); - if (!fi.isNative()) { - unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree - .getChild(0)); - } - return; - } - } - } - for (int i = 0; i < expressionTree.getChildCount(); i++) { - doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i), - aggregations, wdwFns); - } - } - - private List<ASTNode> doPhase1GetDistinctFuncExprs( - HashMap<String, ASTNode> aggregationTrees) throws SemanticException { - List<ASTNode> exprs = new ArrayList<ASTNode>(); - for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) { - ASTNode value = entry.getValue(); - assert (value != null); - if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) { - exprs.add(value); - } - } - return exprs; - } - - public static String generateErrorMessage(ASTNode ast, String message) { - StringBuilder sb = new StringBuilder(); - if (ast == null) { - sb.append(message).append(". Cannot tell the position of null AST."); - return sb.toString(); - } - sb.append(ast.getLine()); - sb.append(":"); - sb.append(ast.getCharPositionInLine()); - sb.append(" "); - sb.append(message); - sb.append(". Error encountered near token '"); - sb.append(ErrorMsg.getText(ast)); - sb.append("'"); - return sb.toString(); - } - - ASTNode getAST() { - return this.ast; - } - - protected void setAST(ASTNode newAST) { - this.ast = newAST; - } - - /** - * Goes though the tabref tree and finds the alias for the table. Once found, - * it records the table name-> alias association in aliasToTabs. It also makes - * an association from the alias to the table AST in parse info. - * - * @return the alias of the table - */ - private String processTable(QB qb, ASTNode tabref) throws SemanticException { - // For each table reference get the table name - // and the alias (if alias is not present, the table name - // is used as an alias) - int aliasIndex = 0; - int propsIndex = -1; - int tsampleIndex = -1; - int ssampleIndex = -1; - for (int index = 1; index < tabref.getChildCount(); index++) { - ASTNode ct = (ASTNode) tabref.getChild(index); - if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE) { - tsampleIndex = index; - } else if (ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE) { - ssampleIndex = index; - } else if (ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) { - propsIndex = index; - } else { - aliasIndex = index; - } - } - - ASTNode tableTree = (ASTNode) (tabref.getChild(0)); - - String tabIdName = getUnescapedName(tableTree).toLowerCase(); - - String alias; - if (aliasIndex != 0) { - alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText()); - } - else { - alias = getUnescapedUnqualifiedTableName(tableTree); - } - - if (propsIndex >= 0) { - Tree propsAST = tabref.getChild(propsIndex); - Map<String, String> props = DDLSemanticAnalyzer.getProps((ASTNode) propsAST.getChild(0)); - // We get the information from Calcite. - if ("TRUE".equals(props.get("insideView"))) { - qb.getAliasInsideView().add(alias.toLowerCase()); - } - qb.setTabProps(alias, props); - } - - // If the alias is already there then we have a conflict - if (qb.exists(alias)) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref - .getChild(aliasIndex))); - } - if (tsampleIndex >= 0) { - ASTNode sampleClause = (ASTNode) tabref.getChild(tsampleIndex); - ArrayList<ASTNode> sampleCols = new ArrayList<ASTNode>(); - if (sampleClause.getChildCount() > 2) { - for (int i = 2; i < sampleClause.getChildCount(); i++) { - sampleCols.add((ASTNode) sampleClause.getChild(i)); - } - } - // TODO: For now only support sampling on up to two columns - // Need to change it to list of columns - if (sampleCols.size() > 2) { - throw new SemanticException(generateErrorMessage( - (ASTNode) tabref.getChild(0), - ErrorMsg.SAMPLE_RESTRICTION.getMsg())); - } - TableSample tabSample = new TableSample( - unescapeIdentifier(sampleClause.getChild(0).getText()), - unescapeIdentifier(sampleClause.getChild(1).getText()), - sampleCols); - qb.getParseInfo().setTabSample(alias, tabSample); - if (unparseTranslator.isEnabled()) { - for (ASTNode sampleCol : sampleCols) { - unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol - .getChild(0)); - } - } - } else if (ssampleIndex >= 0) { - ASTNode sampleClause = (ASTNode) tabref.getChild(ssampleIndex); - - Tree type = sampleClause.getChild(0); - Tree numerator = sampleClause.getChild(1); - String value = unescapeIdentifier(numerator.getText()); - - - SplitSample sample; - if (type.getType() == HiveParser.TOK_PERCENT) { - assertCombineInputFormat(numerator, "Percentage"); - Double percent = Double.valueOf(value).doubleValue(); - if (percent < 0 || percent > 100) { - throw new SemanticException(generateErrorMessage((ASTNode) numerator, - "Sampling percentage should be between 0 and 100")); - } - int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM); - sample = new SplitSample(percent, seedNum); - } else if (type.getType() == HiveParser.TOK_ROWCOUNT) { - sample = new SplitSample(Integer.parseInt(value)); - } else { - assert type.getType() == HiveParser.TOK_LENGTH; - assertCombineInputFormat(numerator, "Total Length"); - long length = Integer.parseInt(value.substring(0, value.length() - 1)); - char last = value.charAt(value.length() - 1); - if (last == 'k' || last == 'K') { - length <<= 10; - } else if (last == 'm' || last == 'M') { - length <<= 20; - } else if (last == 'g' || last == 'G') { - length <<= 30; - } - int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM); - sample = new SplitSample(length, seedNum); - } - String alias_id = getAliasId(alias, qb); - nameToSplitSample.put(alias_id, sample); - } - // Insert this map into the stats - qb.setTabAlias(alias, tabIdName); - if (qb.isInsideView()) { - qb.getAliasInsideView().add(alias.toLowerCase()); - } - qb.addAlias(alias); - - qb.getParseInfo().setSrcForAlias(alias, tableTree); - - // if alias to CTE contains the alias, we do not do the translation because - // cte is actually a subquery. - if (!this.aliasToCTEs.containsKey(alias)) { - unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase()); - if (aliasIndex != 0) { - unparseTranslator.addIdentifierTranslation((ASTNode) tabref.getChild(aliasIndex)); - } - } - - return alias; - } - - Map<String, SplitSample> getNameToSplitSampleMap() { - return this.nameToSplitSample; - } - - /** - * Convert a string to Text format and write its bytes in the same way TextOutputFormat would do. - * This is needed to properly encode non-ascii characters. - */ - private static void writeAsText(String text, FSDataOutputStream out) throws IOException { - Text to = new Text(text); - out.write(to.getBytes(), 0, to.getLength()); - } - - /** - * Generate a temp table out of a value clause - * See also {@link #preProcessForInsert(ASTNode, QB)} - */ - private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException { - Path dataDir = null; - if(!qb.getEncryptedTargetTablePaths().isEmpty()) { - //currently only Insert into T values(...) is supported thus only 1 values clause - //and only 1 target table are possible. If/when support for - //select ... from values(...) is added an insert statement may have multiple - //encrypted target tables. - dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri()); - } - // Pick a name for the table - SessionState ss = SessionState.get(); - String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix(); - - // Step 1, parse the values clause we were handed - List<? extends Node> fromChildren = originalFrom.getChildren(); - // First child should be the virtual table ref - ASTNode virtualTableRef = (ASTNode)fromChildren.get(0); - assert virtualTableRef.getToken().getType() == HiveParser.TOK_VIRTUAL_TABREF : - "Expected first child of TOK_VIRTUAL_TABLE to be TOK_VIRTUAL_TABREF but was " + - virtualTableRef.getName(); - - List<? extends Node> virtualTableRefChildren = virtualTableRef.getChildren(); - // First child of this should be the table name. If it's anonymous, - // then we don't have a table name. - ASTNode tabName = (ASTNode)virtualTableRefChildren.get(0); - if (tabName.getToken().getType() != HiveParser.TOK_ANONYMOUS) { - // TODO, if you want to make select ... from (values(...) as foo(...) work, - // you need to parse this list of columns names and build it into the table - throw new SemanticException(ErrorMsg.VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED.getMsg()); - } - - // The second child of the TOK_VIRTUAL_TABLE should be TOK_VALUES_TABLE - ASTNode valuesTable = (ASTNode)fromChildren.get(1); - assert valuesTable.getToken().getType() == HiveParser.TOK_VALUES_TABLE : - "Expected second child of TOK_VIRTUAL_TABLE to be TOK_VALUE_TABLE but was " + - valuesTable.getName(); - // Each of the children of TOK_VALUES_TABLE will be a TOK_VALUE_ROW - List<? extends Node> valuesTableChildren = valuesTable.getChildren(); - - // Now that we're going to start reading through the rows, open a file to write the rows too - // If we leave this method before creating the temporary table we need to be sure to clean up - // this file. - Path tablePath = null; - FileSystem fs = null; - FSDataOutputStream out = null; - try { - if(dataDir == null) { - tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf); - } - else { - //if target table of insert is encrypted, make sure temporary table data is stored - //similarly encrypted - tablePath = Warehouse.getDnsPath(new Path(dataDir, tableName), conf); - } - fs = tablePath.getFileSystem(conf); - fs.mkdirs(tablePath); - Path dataFile = new Path(tablePath, "data_file"); - out = fs.create(dataFile); - List<FieldSchema> fields = new ArrayList<FieldSchema>(); - - boolean firstRow = true; - for (Node n : valuesTableChildren) { - ASTNode valuesRow = (ASTNode) n; - assert valuesRow.getToken().getType() == HiveParser.TOK_VALUE_ROW : - "Expected child of TOK_VALUE_TABLE to be TOK_VALUE_ROW but was " + valuesRow.getName(); - // Each of the children of this should be a literal - List<? extends Node> valuesRowChildren = valuesRow.getChildren(); - boolean isFirst = true; - int nextColNum = 1; - for (Node n1 : valuesRowChildren) { - ASTNode value = (ASTNode) n1; - if (firstRow) { - fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", "")); - } - if (isFirst) isFirst = false; - else writeAsText("\u0001", out); - writeAsText(unparseExprForValuesClause(value), out); - } - writeAsText("\n", out); - firstRow = false; - } - - // Step 2, create a temp table, using the created file as the data - StorageFormat format = new StorageFormat(conf); - format.processStorageFormat("TextFile"); - Table table = db.newTable(tableName); - table.setSerializationLib(format.getSerde()); - table.setFields(fields); - table.setDataLocation(tablePath); - table.getTTable().setTemporary(true); - table.setStoredAsSubDirectories(false); - table.setInputFormatClass(format.getInputFormat()); - table.setOutputFormatClass(format.getOutputFormat()); - db.createTable(table, false); - } catch (Exception e) { - String errMsg = ErrorMsg.INSERT_CANNOT_CREATE_TEMP_FILE.getMsg() + e.getMessage(); - LOG.error(errMsg); - // Try to delete the file - if (fs != null && tablePath != null) { - try { - fs.delete(tablePath, false); - } catch (IOException swallowIt) {} - } - throw new SemanticException(errMsg, e); - } finally { - IOUtils.closeStream(out); - } - - // Step 3, return a new subtree with a from clause built around that temp table - // The form of the tree is TOK_TABREF->TOK_TABNAME->identifier(tablename) - Token t = new ClassicToken(HiveParser.TOK_TABREF); - ASTNode tabRef = new ASTNode(t); - t = new ClassicToken(HiveParser.TOK_TABNAME); - ASTNode tabNameNode = new ASTNode(t); - tabRef.addChild(tabNameNode); - t = new ClassicToken(HiveParser.Identifier, tableName); - ASTNode identifier = new ASTNode(t); - tabNameNode.addChild(identifier); - return tabRef; - } - - // Take an expression in the values clause and turn it back into a string. This is far from - // comprehensive. At the moment it only supports: - // * literals (all types) - // * unary negatives - // * true/false - private String unparseExprForValuesClause(ASTNode expr) throws SemanticException { - switch (expr.getToken().getType()) { - case HiveParser.Number: - return expr.getText(); - - case HiveParser.StringLiteral: - return BaseSemanticAnalyzer.unescapeSQLString(expr.getText()); - - case HiveParser.KW_FALSE: - // UDFToBoolean casts any non-empty string to true, so set this to false - return ""; - - case HiveParser.KW_TRUE: - return "TRUE"; - - case HiveParser.MINUS: - return "-" + unparseExprForValuesClause((ASTNode)expr.getChildren().get(0)); - - case HiveParser.TOK_NULL: - // Hive's text input will translate this as a null - return "\\N"; - - default: - throw new SemanticException("Expression of type " + expr.getText() + - " not supported in insert/values"); - } - - } - - private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException { - String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ? - HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT): - HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT); - if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) { - throw new SemanticException(generateErrorMessage((ASTNode) numerator, - message + " sampling is not supported in " + inputFormat)); - } - } - - private String processSubQuery(QB qb, ASTNode subq) throws SemanticException { - - // This is a subquery and must have an alias - if (subq.getChildCount() != 2) { - throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq)); - } - ASTNode subqref = (ASTNode) subq.getChild(0); - String alias = unescapeIdentifier(subq.getChild(1).getText()); - - // Recursively do the first phase of semantic analysis for the subquery - QBExpr qbexpr = new QBExpr(alias); - - doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias); - - // If the alias is already there then we have a conflict - if (qb.exists(alias)) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq - .getChild(1))); - } - // Insert this map into the stats - qb.setSubqAlias(alias, qbexpr); - qb.addAlias(alias); - - unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1)); - - return alias; - } - - /* - * Phase1: hold onto any CTE definitions in aliasToCTE. - * CTE definitions are global to the Query. - */ - private void processCTE(QB qb, ASTNode ctes) throws SemanticException { - - int numCTEs = ctes.getChildCount(); - - for(int i=0; i <numCTEs; i++) { - ASTNode cte = (ASTNode) ctes.getChild(i); - ASTNode cteQry = (ASTNode) cte.getChild(0); - String alias = unescapeIdentifier(cte.getChild(1).getText()); - - String qName = qb.getId() == null ? "" : qb.getId() + ":"; - qName += alias.toLowerCase(); - - if ( aliasToCTEs.containsKey(qName)) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(cte.getChild(1))); - } - aliasToCTEs.put(qName, new CTEClause(qName, cteQry)); - } - } - - /* - * We allow CTE definitions in views. So we can end up with a hierarchy of CTE definitions: - * - at the top level of a query statement - * - where a view is referenced. - * - views may refer to other views. - * - * The scoping rules we use are: to search for a CTE from the current QB outwards. In order to - * disambiguate between CTES are different levels we qualify(prefix) them with the id of the QB - * they appear in when adding them to the <code>aliasToCTEs</code> map. - * - */ - private CTEClause findCTEFromName(QB qb, String cteName) { - StringBuilder qId = new StringBuilder(); - if (qb.getId() != null) { - qId.append(qb.getId()); - } - - while (qId.length() > 0) { - String nm = qId + ":" + cteName; - CTEClause cte = aliasToCTEs.get(nm); - if (cte != null) { - return cte; - } - int lastIndex = qId.lastIndexOf(":"); - lastIndex = lastIndex < 0 ? 0 : lastIndex; - qId.setLength(lastIndex); - } - return aliasToCTEs.get(cteName); - } - - /* - * If a CTE is referenced in a QueryBlock: - * - add it as a SubQuery for now. - * - SQ.alias is the alias used in QB. (if no alias is specified, - * it used the CTE name. Works just like table references) - * - Adding SQ done by: - * - copying AST of CTE - * - setting ASTOrigin on cloned AST. - * - trigger phase 1 on new QBExpr. - * - update QB data structs: remove this as a table reference, move it to a SQ invocation. - */ - private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias) - throws SemanticException { - cteAlias = cteAlias == null ? cteName : cteAlias; - CTEClause cte = findCTEFromName(qb, cteName); - ASTNode cteQryNode = cte.cteNode; - QBExpr cteQBExpr = new QBExpr(cteAlias); - doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias); - qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr); - } - - private final CTEClause rootClause = new CTEClause(null, null); - - @Override - public List<Task<? extends Serializable>> getAllRootTasks() { - if (!rootTasksResolved) { - rootTasks = toRealRootTasks(rootClause.asExecutionOrder()); - rootTasksResolved = true; - } - return rootTasks; - } - - @Override - public HashSet<ReadEntity> getAllInputs() { - HashSet<ReadEntity> readEntities = new HashSet<ReadEntity>(getInputs()); - for (CTEClause cte : rootClause.asExecutionOrder()) { - if (cte.source != null) { - readEntities.addAll(cte.source.getInputs()); - } - } - return readEntities; - } - - @Override - public HashSet<WriteEntity> getAllOutputs() { - HashSet<WriteEntity> writeEntities = new HashSet<WriteEntity>(getOutputs()); - for (CTEClause cte : rootClause.asExecutionOrder()) { - if (cte.source != null) { - writeEntities.addAll(cte.source.getOutputs()); - } - } - return writeEntities; - } - - class CTEClause { - CTEClause(String alias, ASTNode cteNode) { - this.alias = alias; - this.cteNode = cteNode; - } - String alias; - ASTNode cteNode; - boolean materialize; - int reference; - QBExpr qbExpr; - List<CTEClause> parents = new ArrayList<CTEClause>(); - - // materialized - Table table; - SemanticAnalyzer source; - - List<Task<? extends Serializable>> getTasks() { - return source == null ? null : source.rootTasks; - } - - List<CTEClause> asExecutionOrder() { - List<CTEClause> execution = new ArrayList<CTEClause>(); - asExecutionOrder(new HashSet<CTEClause>(), execution); - return execution; - } - - void asExecutionOrder(Set<CTEClause> visited, List<CTEClause> execution) { - for (CTEClause parent : parents) { - if (visited.add(parent)) { - parent.asExecutionOrder(visited, execution); - } - } - execution.add(this); - } - - @Override - public String toString() { - return alias == null ? "<root>" : alias; - } - } - - private List<Task<? extends Serializable>> toRealRootTasks(List<CTEClause> execution) { - List<Task<? extends Serializable>> cteRoots = new ArrayList<>(); - List<Task<? extends Serializable>> cteLeafs = new ArrayList<>(); - List<Task<? extends Serializable>> curTopRoots = null; - List<Task<? extends Serializable>> curBottomLeafs = null; - for (int i = 0; i < execution.size(); i++) { - CTEClause current = execution.get(i); - if (current.parents.isEmpty() && curTopRoots != null) { - cteRoots.addAll(curTopRoots); - cteLeafs.addAll(curBottomLeafs); - curTopRoots = curBottomLeafs = null; - } - List<Task<? extends Serializable>> curTasks = current.getTasks(); - if (curTasks == null) { - continue; - } - if (curTopRoots == null) { - curTopRoots = curTasks; - } - if (curBottomLeafs != null) { - for (Task<?> topLeafTask : curBottomLeafs) { - for (Task<?> currentRootTask : curTasks) { - topLeafTask.addDependentTask(currentRootTask); - } - } - } - curBottomLeafs = Task.findLeafs(curTasks); - } - if (curTopRoots != null) { - cteRoots.addAll(curTopRoots); - cteLeafs.addAll(curBottomLeafs); - } - - if (cteRoots.isEmpty()) { - return rootTasks; - } - for (Task<?> cteLeafTask : cteLeafs) { - for (Task<?> mainRootTask : rootTasks) { - cteLeafTask.addDependentTask(mainRootTask); - } - } - return cteRoots; - } - - Table materializeCTE(String cteName, CTEClause cte) throws HiveException { - - ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE)); - - ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME)); - tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName))); - - ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER)); - - createTable.addChild(tableName); - createTable.addChild(temporary); - createTable.addChild(cte.cteNode); - - SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState); - analyzer.initCtx(ctx); - analyzer.init(false); - - // should share cte contexts - analyzer.aliasToCTEs.putAll(aliasToCTEs); - - HiveOperation operation = queryState.getHiveOperation(); - try { - analyzer.analyzeInternal(createTable); - } finally { - queryState.setCommandType(operation); - } - - Table table = analyzer.tableDesc.toTable(conf); - Path location = table.getDataLocation(); - try { - location.getFileSystem(conf).mkdirs(location); - } catch (IOException e) { - throw new HiveException(e); - } - table.setMaterializedTable(true); - - LOG.info(cteName + " will be materialized into " + location); - cte.table = table; - cte.source = analyzer; - - ctx.addMaterializedTable(cteName, table); - - return table; - } - - - static boolean isJoinToken(ASTNode node) { - if ((node.getToken().getType() == HiveParser.TOK_JOIN) - || (node.getToken().getType() == HiveParser.TOK_CROSSJOIN) - || isOuterJoinToken(node) - || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN) - || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) { - return true; - } - - return false; - } - - static private boolean isOuterJoinToken(ASTNode node) { - return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) - || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) - || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN); - } - - /** - * Given the AST with TOK_JOIN as the root, get all the aliases for the tables - * or subqueries in the join. - * - * @param qb - * @param join - * @throws SemanticException - */ - @SuppressWarnings("nls") - private void processJoin(QB qb, ASTNode join) throws SemanticException { - int numChildren = join.getChildCount(); - if ((numChildren != 2) && (numChildren != 3) - && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) { - throw new SemanticException(generateErrorMessage(join, - "Join with multiple children")); - } - - queryProperties.incrementJoinCount(isOuterJoinToken(join)); - for (int num = 0; num < numChildren; num++) { - ASTNode child = (ASTNode) join.getChild(num); - if (child.getToken().getType() == HiveParser.TOK_TABREF) { - processTable(qb, child); - } else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) { - processSubQuery(qb, child); - } else if (child.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) { - queryProperties.setHasPTF(true); - processPTF(qb, child); - PTFInvocationSpec ptfInvocationSpec = qb.getPTFInvocationSpec(child); - String inputAlias = ptfInvocationSpec == null ? null : - ptfInvocationSpec.getFunction().getAlias();; - if ( inputAlias == null ) { - throw new SemanticException(generateErrorMessage(child, - "PTF invocation in a Join must have an alias")); - } - - } else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || - child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) { - // SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ... - // is not supported. Instead, the lateral view must be in a subquery - // SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a - // JOIN src2 ... - throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN - .getMsg(join)); - } else if (isJoinToken(child)) { - processJoin(qb, child); - } - } - } - - /** - * Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the - * table or subquery in the lateral view and also make a mapping from the - * alias to all the lateral view AST's. - * - * @param qb - * @param lateralView - * @return the alias for the table/subquery - * @throws SemanticException - */ - - private String processLateralView(QB qb, ASTNode lateralView) - throws SemanticException { - int numChildren = lateralView.getChildCount(); - - assert (numChildren == 2); - ASTNode next = (ASTNode) lateralView.getChild(1); - - String alias = null; - - switch (next.getToken().getType()) { - case HiveParser.TOK_TABREF: - alias = processTable(qb, next); - break; - case HiveParser.TOK_SUBQUERY: - alias = processSubQuery(qb, next); - break; - case HiveParser.TOK_LATERAL_VIEW: - case HiveParser.TOK_LATERAL_VIEW_OUTER: - alias = processLateralView(qb, next); - break; - default: - throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD - .getMsg(lateralView)); - } - alias = alias.toLowerCase(); - qb.getParseInfo().addLateralViewForAlias(alias, lateralView); - qb.addAlias(alias); - return alias; - } - - /** - * Phase 1: (including, but not limited to): - * - * 1. Gets all the aliases for all the tables / subqueries and makes the - * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the - * destination and names the clause "inclause" + i 3. Creates a map from a - * string representation of an aggregation tree to the actual aggregation AST - * 4. Creates a mapping from the clause name to the select expression AST in - * destToSelExpr 5. Creates a mapping from a table alias to the lateral view - * AST's in aliasToLateralViews - * - * @param ast - * @param qb - * @param ctx_1 - * @throws SemanticException - */ - @SuppressWarnings({"fallthrough", "nls"}) - public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) - throws SemanticException { - - boolean phase1Result = true; - QBParseInfo qbp = qb.getParseInfo(); - boolean skipRecursion = false; - - if (ast.getToken() != null) { - skipRecursion = true; - switch (ast.getToken().getType()) { - case HiveParser.TOK_SELECTDI: - qb.countSelDi(); - // fall through - case HiveParser.TOK_SELECT: - qb.countSel(); - qbp.setSelExprForClause(ctx_1.dest, ast); - - int posn = 0; - if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) { - qbp.setHints((ASTNode) ast.getChild(0)); - posn++; - } - - if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) - queryProperties.setUsesScript(true); - - LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast, - qb, ctx_1.dest); - doPhase1GetColumnAliasesFromSelect(ast, qbp); - qbp.setAggregationExprsForClause(ctx_1.dest, aggregations); - qbp.setDistinctFuncExprsForClause(ctx_1.dest, - doPhase1GetDistinctFuncExprs(aggregations)); - break; - - case HiveParser.TOK_WHERE: - qbp.setWhrExprForClause(ctx_1.dest, ast); - if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty()) - queryProperties.setFilterWithSubQuery(true); - break; - - case HiveParser.TOK_INSERT_INTO: - String currentDatabase = SessionState.get().getCurrentDatabase(); - String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase); - qbp.addInsertIntoTable(tab_name, ast); - - case HiveParser.TOK_DESTINATION: - ctx_1.dest = "insclause-" + ctx_1.nextNum; - ctx_1.nextNum++; - boolean isTmpFileDest = false; - if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) { - ASTNode ch = (ASTNode) ast.getChild(0); - if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0 - && ch.getChild(0) instanceof ASTNode) { - ch = (ASTNode) ch.getChild(0); - isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE; - } else { - if (ast.getToken().getType() == HiveParser.TOK_DESTINATION - && ast.getChild(0).getType() == HiveParser.TOK_TAB) { - String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), - SessionState.get().getCurrentDatabase()); - qbp.getInsertOverwriteTables().put(fullTableName, ast); - } - } - } - - // is there a insert in the subquery - if (qbp.getIsSubQ() && !isTmpFileDest) { - throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast)); - } - - if (plannerCtx != null) { - plannerCtx.setInsertToken(ast, isTmpFileDest); - } - - qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); - handleInsertStatementSpecPhase1(ast, qbp, ctx_1); - if (qbp.getClauseNamesForDest().size() > 1) { - queryProperties.setMultiDestQuery(true); - } - break; - - case HiveParser.TOK_FROM: - int child_count = ast.getChildCount(); - if (child_count != 1) { - throw new SemanticException(generateErrorMessage(ast, - "Multiple Children " + child_count)); - } - - // Check if this is a subquery / lateral view - ASTNode frm = (ASTNode) ast.getChild(0); - if (frm.getToken().getType() == HiveParser.TOK_TABREF) { - processTable(qb, frm); - } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) { - // Create a temp table with the passed values in it then rewrite this portion of the - // tree to be from that table. - ASTNode newFrom = genValuesTempTable(frm, qb); - ast.setChild(0, newFrom); - processTable(qb, newFrom); - } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) { - processSubQuery(qb, frm); - } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || - frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) { - queryProperties.setHasLateralViews(true); - processLateralView(qb, frm); - } else if (isJoinToken(frm)) { - processJoin(qb, frm); - qbp.setJoinExpr(frm); - }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){ - queryProperties.setHasPTF(true); - processPTF(qb, frm); - } - break; - - case HiveParser.TOK_CLUSTERBY: - // Get the clusterby aliases - these are aliased to the entries in the - // select list - queryProperties.setHasClusterBy(true); - qbp.setClusterByExprForClause(ctx_1.dest, ast); - break; - - case HiveParser.TOK_DISTRIBUTEBY: - // Get the distribute by aliases - these are aliased to the entries in - // the - // select list - queryProperties.setHasDistributeBy(true); - qbp.setDistributeByExprForClause(ctx_1.dest, ast); - if (qbp.getClusterByForClause(ctx_1.dest) != null) { - throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg())); - } else if (qbp.getOrderByForClause(ctx_1.dest) != null) { - throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg())); - } - break; - - case HiveParser.TOK_SORTBY: - // Get the sort by aliases - these are aliased to the entries in the - // select list - queryProperties.setHasSortBy(true); - qbp.setSortByExprForClause(ctx_1.dest, ast); - if (qbp.getClusterByForClause(ctx_1.dest) != null) { - throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg())); - } else if (qbp.getOrderByForClause(ctx_1.dest) != null) { - throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg())); - } - - break; - - case HiveParser.TOK_ORDERBY: - // Get the order by aliases - these are aliased to the entries in the - // select list - queryProperties.setHasOrderBy(true); - qbp.setOrderByExprForClause(ctx_1.dest, ast); - if (qbp.getClusterByForClause(ctx_1.dest) != null) { - throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg())); - } - break; - - case HiveParser.TOK_GROUPBY: - case HiveParser.TOK_ROLLUP_GROUPBY: - case HiveParser.TOK_CUBE_GROUPBY: - case HiveParser.TOK_GROUPING_SETS: - // Get the groupby aliases - these are aliased to the entries in the - // select list - queryProperties.setHasGroupBy(true); - if (qbp.getJoinExpr() != null) { - queryProperties.setHasJoinFollowedByGroupBy(true); - } - if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg())); - } - qbp.setGroupByExprForClause(ctx_1.dest, ast); - skipRecursion = true; - - // Rollup and Cubes are syntactic sugar on top of grouping sets - if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) { - qbp.getDestRollups().add(ctx_1.dest); - } else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) { - qbp.getDestCubes().add(ctx_1.dest); - } else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) { - qbp.getDestGroupingSets().add(ctx_1.dest); - } - break; - - case HiveParser.TOK_HAVING: - qbp.setHavingExprForClause(ctx_1.dest, ast); - qbp.addAggregationExprsForClause(ctx_1.dest, - doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest)); - break; - - case HiveParser.KW_WINDOW: - if (!qb.hasWindowingSpec(ctx_1.dest) ) { - throw new SemanticException(generateErrorMessage(ast, - "Query has no Cluster/Distribute By; but has a Window definition")); - } - handleQueryWindowClauses(qb, ctx_1, ast); - break; - - case HiveParser.TOK_LIMIT: - if (ast.getChildCount() == 2) { - qbp.setDestLimit(ctx_1.dest, - new Integer(ast.getChild(0).getText()), - new Integer(ast.getChild(1).getText())); - } else { - qbp.setDestLimit(ctx_1.dest, new Integer(0), - new Integer(ast.getChild(0).getText())); - } - break; - - case HiveParser.TOK_ANALYZE: - // Case of analyze command - - String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase(); - - - qb.setTabAlias(table_name, table_name); - qb.addAlias(table_name); - qb.getParseInfo().setIsAnalyzeCommand(true); - qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan); - qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan); - // Allow analyze the whole table and dynamic partitions - HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); - HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict"); - - break; - - case HiveParser.TOK_UNIONALL: - if (!qbp.getIsSubQ()) { - // this shouldn't happen. The parser should have converted the union to be - // contained in a subquery. Just in case, we keep the error as a fallback. - throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.UNION_NOTIN_SUBQ.getMsg())); - } - skipRecursion = false; - break; - - case HiveParser.TOK_INSERT: - ASTNode destination = (ASTNode) ast.getChild(0); - Tree tab = destination.getChild(0); - - // Proceed if AST contains partition & If Not Exists - if (destination.getChildCount() == 2 && - tab.getChildCount() == 2 && - destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) { - String tableName = tab.getChild(0).getChild(0).getText(); - - Tree partitions = tab.getChild(1); - int childCount = partitions.getChildCount(); - HashMap<String, String> partition = new HashMap<String, String>(); - for (int i = 0; i < childCount; i++) { - String partitionName = partitions.getChild(i).getChild(0).getText(); - Tree pvalue = partitions.getChild(i).getChild(1); - if (pvalue == null) { - break; - } - String partitionVal = stripQuotes(pvalue.getText()); - partition.put(partitionName, partitionVal); - } - // if it is a dynamic partition throw the exception - if (childCount != partition.size()) { - throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS - .getMsg(partition.toString())); - } - Table table = null; - try { - table = this.getTableObjectByName(tableName); - } catch (HiveException ex) { - throw new SemanticException(ex); - } - try { - Partition parMetaData = db.getPartition(table, partition, false); - // Check partition exists if it exists skip the overwrite - if (parMetaData != null) { - phase1Result = false; - skipRecursion = true; - LOG.info("Partition already exists so insert into overwrite " + - "skipped for partition : " + parMetaData.toString()); - break; - } - } catch (HiveException e) { - LOG.info("Error while getting metadata : ", e); - } - validatePartSpec(table, partition, (ASTNode)tab, conf, false); - } - skipRecursion = false; - break; - case HiveParser.TOK_LATERAL_VIEW: - case HiveParser.TOK_LATERAL_VIEW_OUTER: - // todo: nested LV - assert ast.getChildCount() == 1; - qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast); - break; - case HiveParser.TOK_CTE: - processCTE(qb, ast); - break; - default: - skipRecursion = false; - break; - } - } - - if (!skipRecursion) { - // Iterate over the rest of the children - int child_count = ast.getChildCount(); - for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) { - // Recurse - phase1Result = phase1Result && doPhase1( - (ASTNode)ast.getChild(child_pos), qb, ctx_1, plannerCtx); - } - } - return phase1Result; - } - - /** - * This is phase1 of supporting specifying schema in insert statement - * insert into foo(z,y) select a,b from bar; - * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode) - * @throws SemanticException - */ - private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException { - ASTNode tabColName = (ASTNode)ast.getChild(1); - if(ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) { - //we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present - List<String> targetColNames = new ArrayList<String>(); - for(Node col : tabColName.getChildren()) { - assert ((ASTNode)col).getType() == HiveParser.Identifier : - "expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType(); - targetColNames.add(((ASTNode)col).getText()); - } - String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), - SessionState.get().getCurrentDatabase()); - qbp.setDestSchemaForClause(ctx_1.dest, targetColNames); - Set<String> targetColumns = new HashSet<String>(); - targetColumns.addAll(targetColNames); - if(targetColNames.size() != targetColumns.size()) { - throw new SemanticException(generateErrorMessage(tabColName, - "Duplicate column name detected in " + fullTableName + " table schema specification")); - } - Table targetTable = null; - try { - targetTable = db.getTable(fullTableName, false); - } - catch (HiveException ex) { - LOG.error("Error processing HiveParser.TOK_DESTINATION: " + ex.getMessage(), ex); - throw new SemanticException(ex); - } - if(targetTable == null) { - throw new SemanticException(generateErrorMessage(ast, - "Unable to access metadata for table " + fullTableName)); - } - for(FieldSchema f : targetTable.getCols()) { - //parser only allows foo(a,b), not foo(foo.a, foo.b) - targetColumns.remove(f.getName()); - } - if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns - /* We just checked the user specified schema columns among regular table column and found some which are not - 'regular'. Now check is they are dynamic partition columns - For dynamic partitioning, - Given "create table multipart(a int, b int) partitioned by (c int, d int);" - for "insert into multipart partition(c='1',d)(d,a) values(2,3);" we expect parse tree to look like this - (TOK_INSERT_INTO - (TOK_TAB - (TOK_TABNAME multipart) - (TOK_PARTSPEC - (TOK_PARTVAL c '1') - (TOK_PARTVAL d) - ) - ) - (TOK_TABCOLNAME d a) - )*/ - List<String> dynamicPartitionColumns = new ArrayList<String>(); - if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) { - ASTNode tokTab = (ASTNode)ast.getChild(0); - ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC); - if(tokPartSpec != null) { - for(Node n : tokPartSpec.getChildren()) { - ASTNode tokPartVal = null; - if(n instanceof ASTNode) { - tokPartVal = (ASTNode)n; - } - if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) { - assert tokPartVal.getChild(0).getType() == HiveParser.Identifier : - "Expected column name; found tokType=" + tokPartVal.getType(); - dynamicPartitionColumns.add(tokPartVal.getChild(0).getText()); - } - } - } - } - for(String colName : dynamicPartitionColumns) { - targetColumns.remove(colName); - } - if(!targetColumns.isEmpty()) { - //Found some columns in user specified schema which are neither regular not dynamic partition columns - throw new SemanticException(generateErrorMessage(tabColName, - "'" + (targetColumns.size() == 1 ? targetColumns.iterator().next() : targetColumns) + - "' in insert schema specification " + (targetColumns.size() == 1 ? "is" : "are") + - " not found among regular columns of " + - fullTableName + " nor dynamic partition columns.")); - } - } - } - } - - public void getMaterializationMetadata(QB qb) throws SemanticException { - try { - gatherCTEReferences(qb, rootClause); - int threshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD); - for (CTEClause cte : Sets.newHashSet(aliasToCTEs.values())) { - if (threshold >= 0 && cte.reference >= threshold) { - cte.materialize = true; - } - } - } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with - // org.apache.commons.lang.StringUtils - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - if (e instanceof SemanticException) { - throw (SemanticException)e; - } - throw new SemanticException(e.getMessage(), e); - } - } - - private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent) throws HiveException { - if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { - gatherCTEReferences(qbexpr.getQB(), parent); - } else { - gatherCTEReferences(qbexpr.getQBExpr1(), parent); - gatherCTEReferences(qbexpr.getQBExpr2(), parent); - } - } - - // TODO: check view references, too - private void gatherCTEReferences(QB qb, CTEClause current) throws HiveException { - for (String alias : qb.getTabAliases()) { - String tabName = qb.getTabNameForAlias(alias); - String cteName = tabName.toLowerCase(); - - CTEClause cte = findCTEFromName(qb, cteName); - if (cte != null) { - if (ctesExpanded.contains(cteName)) { - throw new SemanticException("Recursive cte " + cteName + - " detected (cycle: " + StringUtils.join(ctesExpanded, " -> ") + - " -> " + cteName + ")."); - } - cte.reference++; - current.parents.add(cte); - if (cte.qbExpr != null) { - continue; - } - cte.qbExpr = new QBExpr(cteName); - doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName); - - ctesExpanded.add(cteName); - gatherCTEReferences(cte.qbExpr, cte); - ctesExpanded.remove(ctesExpanded.size() - 1); - } - } - for (String alias : qb.getSubqAliases()) { - gatherCTEReferences(qb.getSubqForAlias(alias), current); - } - } - - public void getMetaData(QB qb) throws SemanticException { - getMetaData(qb, false); - } - - public void getMetaData(QB qb, boolean enableMaterialization) throws SemanticException { - try { - if (enableMaterialization) { - getMaterializationMetadata(qb); - } - getMetaData(qb, null); - } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with - // org.apache.commons.lang.StringUtils - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - if (e instanceof SemanticException) { - throw (SemanticException)e; - } - throw new SemanticException(e.getMessage(), e); - } - } - - private void getMetaData(QBExpr qbexpr, ReadEntity parentInput) - throws HiveException { - if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { - getMetaData(qbexpr.getQB(), parentInput); - } else { - getMetaData(qbexpr.getQBExpr1(), parentInput); - getMetaData(qbexpr.getQBExpr2(), parentInput); - } - } - - @SuppressWarnings("nls") - private void getMetaData(QB qb, ReadEntity parentInput) - throws HiveException { - LOG.info("Get metadata for source tables"); - - // Go over the tables and populate the related structures. - // We have to materialize the table alias list since we might - // modify it in the middle for view rewrite. - List<String> tabAliases = new ArrayList<String>(qb.getTabAliases()); - - // Keep track of view alias to view name and read entity - // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T - // keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1. - // This is needed for tracking the dependencies for inputs, along with their parents. - Map<String, ObjectPair<String, ReadEntity>> aliasToViewInfo = - new HashMap<String, ObjectPair<String, ReadEntity>>(); - - /* - * used to capture view to SQ conversions. This is used to check for - * recursive CTE invocations. - */ - Map<String, String> sqAliasToCTEName = new HashMap<String, String>(); - - for (String alias : tabAliases) { - String tabName = qb.getTabNameForAlias(alias); - String cteName = tabName.toLowerCase(); - - Table tab = db.getTable(tabName, false); - if (tab == null || - tab.getDbName().equals(SessionState.get().getCurrentDatabase())) { - Table materializedTab = ctx.getMaterializedTable(cteName); - if (materializedTab == null) { - // we first look for this alias from CTE, and then from catalog. - CTEClause cte = findCTEFromName(qb, cteName); - if (cte != null) { - if (!cte.materialize) { - addCTEAsSubQuery(qb, cteName, alias); - sqAliasToCTEName.put(alias, cteName); - continue; - } - tab = materializeCTE(cteName, cte); - } - } else { - tab = materializedTab; - } - } - - if (tab == null) { - ASTNode src = qb.getParseInfo().getSrcForAlias(alias); - if (null != src) { - throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src)); - } else { - throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias)); - } - } - - // Disallow INSERT INTO on bucketized tables - boolean isAcid = AcidUtils.isAcidTable(tab); - boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName()); - if (isTableWrittenTo && - tab.getNumBuckets() > 0 && !isAcid) { - throw new SemanticException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE. - getMsg("Table: " + tabName)); - } - // Disallow update and delete on non-acid tables - if ((updating() || deleting()) && !isAcid && isTableWrittenTo) { - //isTableWrittenTo: delete from acidTbl where a in (select id from nonAcidTable) - //so only assert this if we are actually writing to this table - // Whether we are using an acid compliant transaction manager has already been caught in - // UpdateDeleteSemanticAnalyzer, so if we are updating or deleting and getting nonAcid - // here, it means the table itself doesn't support it. - throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, tabName); - } - - if (tab.isView()) { - if (qb.getParseInfo().isAnalyzeCommand()) { - throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg()); - } - String fullViewName = tab.getDbName() + "." + tab.getTableName(); - // Prevent view cycles - if (viewsExpanded.contains(fullViewName)) { - throw new SemanticException("Recursive view " + fullViewName + - " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") + - " -> " + fullViewName + ")."); - } - replaceViewReferenceWithDefinition(qb, tab, tabName, alias); - // This is the last time we'll see the Table objects for views, so add it to the inputs - // now. isInsideView will tell if this view is embedded in another view. - ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView()); - viewInput = PlanUtils.addInput(inputs, viewInput); - aliasToViewInfo.put(alias, new ObjectPair<String, ReadEntity>(fullViewName, viewInput)); - String aliasId = getAliasId(alias, qb); - if (aliasId != null) { - aliasId = aliasId.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "") - .replace(SemanticAnalyzer.SUBQUERY_TAG_2, ""); - } - viewAliasToInput.put(aliasId, viewInput); - continue; - } - - if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) { - throw new SemanticException(generateErrorMessage( - qb.getParseInfo().getSrcForAlias(alias), - ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg())); - } - - qb.getMetaData().setSrcForAlias(alias, tab); - - if (qb.getParseInfo().isAnalyzeCommand()) { - // allow partial partition specification for nonscan since noscan is fast. - TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan); - if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions - try { - ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec); - } catch (HiveException e) { - throw new SemanticException(generateErrorMessage( - qb.getParseInfo().getSrcForAlias(alias), - "Cannot get partitions for " + ts.partSpec), e); - } - } - // validate partial scan command - QBParseInfo qbpi = qb.getParseInfo(); - if (qbpi.isPartialScanAnalyzeCommand()) { - Class<? extends InputFormat> inputFormatClass = null; - switch (ts.specType) { - case TABLE_ONLY: - case DYNAMIC_PARTITION: - inputFormatClass = ts.tableHandle.getInputFormatClass(); - break; - case STATIC_PARTITION: - inputFormatClass = ts.partHandle.getInputFormatClass(); - break; - default: - assert false; - } - // throw a HiveException for formats other than rcfile or orcfile. - if (!(inputFormatClass.equals(RCFileInputFormat.class) || inputFormatClass - .equals(OrcInputFormat.class))) { - throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_NON_RCFILE.getMsg()); - } - } - - tab.setTableSpec(ts); - qb.getParseInfo().addTableSpec(alias, ts); - } - - ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput); - PlanUtils.addInput(inputs, - new ReadEntity(tab, parentViewInfo, parentViewInfo == null),mergeIsDirect); - } - - LOG.info("Get metadata for subqueries"); - // Go over the subqueries and getMetaData for these - for (String alias : qb.getSubqAliases()) { - boolean wasView = aliasToViewInfo.containsKey(alias); - boolean wasCTE = sqAliasToCTEName.containsKey(alias); - ReadEntity newParentInput = null; - if (wasView) { - viewsExpanded.add(aliasToViewInfo.get(alias).getFirst()); - newParentInput = aliasToViewInfo.get(alias).getSecond(); - } else if (wasCTE) { - ctesExpanded.add(sqAliasToCTEName.get(alias)); - } - QBExpr qbexpr = qb.getSubqForAlias(alias); - getMetaData(qbexpr, newParentInput); - if (wasView) { - viewsExpanded.remove(viewsExpanded.size() - 1); - } else if (wasCTE) { - ctesExpanded.remove(ctesExpanded.size() - 1); - } - } - - RowFormatParams rowFormatParams = new RowFormatParams(); - StorageFormat storageFormat = new StorageFormat(conf); - - LOG.info("Get metadata for destination tables"); - // Go over all the destination structures and populate the related - // metadata - QBParseInfo qbp = qb.getParseInfo(); - - for (String name : qbp.getClauseNamesForDest()) { - ASTNode ast = qbp.getDestForClause(name); - switch (ast.getToken().getType()) { - case HiveParser.TOK_TAB: { - TableSpec ts = new TableSpec(db, conf, ast); - if (ts.tableHandle.isView()) { - throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg()); - } - - Class<?> outputFormatClass = ts.tableHandle.getOutputFormatClass(); - if (!ts.tableHandle.isNonNative() && - !HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) { - throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE - .getMsg(ast, "The class is " + outputFormatClass.toString())); - } - - // TableSpec ts is got from the query (user specified), - // which means the user didn't specify partitions in their query, - // but whether the table itself is partitioned is not know. - if (ts.specType != SpecType.STATIC_PARTITION) { - // This is a table or dynamic partition - qb.getMetaData().setDestForAlias(name, ts.tableHandle); - // has dynamic as well as static partitions - if (ts.partSpec != null && ts.partSpec.size() > 0) { - qb.getMetaData().setPartSpecForAlias(name, ts.partSpec); - } - } else { - // This is a partition - qb.getMetaData().setDestForAlias(name, ts.partHandle); - } - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - // Add the table spec for the destination table. - qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts); - } - break; - } - - case HiveParser.TOK_DIR: { - // This is a dfs file - String fname = stripQuotes(ast.getChild(0).getText()); - if ((!qb.getParseInfo().getIsSubQ()) - && (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) { - - if (qb.isCTAS()) { - qb.setIsQuery(false); - ctx.setResDir(null); - ctx.setResFile(null); - - // allocate
<TRUNCATED>
