http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java deleted file mode 100644 index a931489..0000000 --- a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java +++ /dev/null @@ -1,2932 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.analysis; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.IdentityHashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.cloudera.impala.analysis.Path.PathType; -import com.cloudera.impala.authorization.AuthorizationConfig; -import com.cloudera.impala.authorization.Privilege; -import com.cloudera.impala.authorization.PrivilegeRequest; -import com.cloudera.impala.authorization.PrivilegeRequestBuilder; -import com.cloudera.impala.authorization.User; -import com.cloudera.impala.catalog.CatalogException; -import com.cloudera.impala.catalog.Column; -import com.cloudera.impala.catalog.DataSourceTable; -import com.cloudera.impala.catalog.DatabaseNotFoundException; -import com.cloudera.impala.catalog.Db; -import com.cloudera.impala.catalog.HBaseTable; -import com.cloudera.impala.catalog.HdfsTable; -import com.cloudera.impala.catalog.ImpaladCatalog; -import com.cloudera.impala.catalog.KuduTable; -import com.cloudera.impala.catalog.Table; -import com.cloudera.impala.catalog.TableLoadingException; -import com.cloudera.impala.catalog.Type; -import com.cloudera.impala.catalog.View; -import com.cloudera.impala.common.AnalysisException; -import com.cloudera.impala.common.Id; -import com.cloudera.impala.common.IdGenerator; -import com.cloudera.impala.common.ImpalaException; -import com.cloudera.impala.common.InternalException; -import com.cloudera.impala.common.Pair; -import com.cloudera.impala.common.PrintUtils; -import com.cloudera.impala.planner.PlanNode; -import com.cloudera.impala.service.FeSupport; -import com.cloudera.impala.thrift.TAccessEvent; -import com.cloudera.impala.thrift.TCatalogObjectType; -import com.cloudera.impala.thrift.TLineageGraph; -import com.cloudera.impala.thrift.TNetworkAddress; -import com.cloudera.impala.thrift.TQueryCtx; -import com.cloudera.impala.util.DisjointSet; -import com.cloudera.impala.util.EventSequence; -import com.cloudera.impala.util.ListMap; -import com.cloudera.impala.util.TSessionStateUtil; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.base.Predicates; -import com.google.common.base.Strings; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; - -/** - * Repository of analysis state for single select block. - * - * Conjuncts: - * Conjuncts are registered during analysis (registerConjuncts()) and assigned during the - * planning process (getUnassigned[Oj]Conjuncts()/isConjunctAssigned()/ - * markConjunctsAssigned()). - * All conjuncts are assigned a unique id when initially registered, and all registered - * conjuncts are referenced by their id (ie, there are no containers other than the one - * holding the referenced conjuncts), to make substitute() simple. - * - * Slot equivalence classes: - * Equivalence of individual slots is computed based on registered equality predicates; - * those predicates are either present directly in the query or are implied by the - * syntactic elements used in query (example: a GROUP BY clause has implied equality - * predicates between the grouping exprs and the grouping slots of the aggregation - * output tuple). - * Implied equality predicates are registered with createAuxEquivPredicate(); they are - * never assigned during plan generation. - * Also tracks each catalog object access, so authorization checks can be performed once - * analysis is complete. - * TODO: We often use the terms stmt/block/analyzer interchangeably, although they may - * have slightly different meanings (sometimes depending on the context). Use the terms - * more accurately and consistently here and elsewhere. - */ -public class Analyzer { - // Common analysis error messages - public final static String DB_DOES_NOT_EXIST_ERROR_MSG = "Database does not exist: "; - public final static String DB_ALREADY_EXISTS_ERROR_MSG = "Database already exists: "; - public final static String TBL_DOES_NOT_EXIST_ERROR_MSG = "Table does not exist: "; - public final static String TBL_ALREADY_EXISTS_ERROR_MSG = "Table already exists: "; - public final static String FN_DOES_NOT_EXIST_ERROR_MSG = "Function does not exist: "; - public final static String FN_ALREADY_EXISTS_ERROR_MSG = "Function already exists: "; - public final static String DATA_SRC_DOES_NOT_EXIST_ERROR_MSG = - "Data source does not exist: "; - public final static String DATA_SRC_ALREADY_EXISTS_ERROR_MSG = - "Data source already exists: "; - - private final static Logger LOG = LoggerFactory.getLogger(Analyzer.class); - - private final User user_; - - // Indicates whether this query block contains a straight join hint. - private boolean isStraightJoin_ = false; - - // Whether to use Hive's auto-generated column labels. - private boolean useHiveColLabels_ = false; - - // True if the corresponding select block has a limit and/or offset clause. - private boolean hasLimitOffsetClause_ = false; - - // Current depth of nested analyze() calls. Used for enforcing a - // maximum expr-tree depth. Needs to be manually maintained by the user - // of this Analyzer with incrementCallDepth() and decrementCallDepth(). - private int callDepth_ = 0; - - // Flag indicating if this analyzer instance belongs to a subquery. - private boolean isSubquery_ = false; - - // Flag indicating whether this analyzer belongs to a WITH clause view. - private boolean isWithClause_ = false; - - // If set, when privilege requests are registered they will use this error - // error message. - private String authErrorMsg_; - - // If false, privilege requests will not be registered in the analyzer. - private boolean enablePrivChecks_ = true; - - // By default, all registered semi-joined tuples are invisible, i.e., their slots - // cannot be referenced. If set, this semi-joined tuple is made visible. Such a tuple - // should only be made visible for analyzing the On-clause of its semi-join. - // In particular, if there are multiple semi-joins in the same query block, then the - // On-clause of any such semi-join is not allowed to reference other semi-joined tuples - // except its own. Therefore, only a single semi-joined tuple can be visible at a time. - private TupleId visibleSemiJoinedTupleId_ = null; - - public void setIsSubquery() { - isSubquery_ = true; - globalState_.containsSubquery = true; - } - public boolean isSubquery() { return isSubquery_; } - public boolean setHasPlanHints() { return globalState_.hasPlanHints = true; } - public boolean hasPlanHints() { return globalState_.hasPlanHints; } - public void setIsWithClause() { isWithClause_ = true; } - public boolean isWithClause() { return isWithClause_; } - - // state shared between all objects of an Analyzer tree - // TODO: Many maps here contain properties about tuples, e.g., whether - // a tuple is outer/semi joined, etc. Remove the maps in favor of making - // them properties of the tuple descriptor itself. - private static class GlobalState { - // TODO: Consider adding an "exec-env"-like global singleton that contains the - // catalog and authzConfig. - public final ImpaladCatalog catalog; - public final TQueryCtx queryCtx; - public final AuthorizationConfig authzConfig; - public final DescriptorTable descTbl = new DescriptorTable(); - public final IdGenerator<ExprId> conjunctIdGenerator = ExprId.createGenerator(); - public final ColumnLineageGraph lineageGraph; - - // True if we are analyzing an explain request. Should be set before starting - // analysis. - public boolean isExplain; - - // Indicates whether the query has plan hints. - public boolean hasPlanHints = false; - - // True if at least one of the analyzers belongs to a subquery. - public boolean containsSubquery = false; - - // all registered conjuncts (map from expr id to conjunct) - public final Map<ExprId, Expr> conjuncts = Maps.newHashMap(); - - // all registered conjuncts bound by a single tuple id; used in getBoundPredicates() - public final ArrayList<ExprId> singleTidConjuncts = Lists.newArrayList(); - - // eqJoinConjuncts[tid] contains all conjuncts of the form - // "<lhs> = <rhs>" in which either lhs or rhs is fully bound by tid - // and the other side is not bound by tid (ie, predicates that express equi-join - // conditions between two tablerefs). - // A predicate such as "t1.a = t2.b" has two entries, one for 't1' and - // another one for 't2'. - public final Map<TupleId, List<ExprId>> eqJoinConjuncts = Maps.newHashMap(); - - // set of conjuncts that have been assigned to some PlanNode - public Set<ExprId> assignedConjuncts = - Collections.newSetFromMap(new IdentityHashMap<ExprId, Boolean>()); - - // map from outer-joined tuple id, i.e., one that is nullable, - // to the last Join clause (represented by its rhs table ref) that outer-joined it - public final Map<TupleId, TableRef> outerJoinedTupleIds = Maps.newHashMap(); - - // Map of registered conjunct to the last full outer join (represented by its - // rhs table ref) that outer joined it. - public final Map<ExprId, TableRef> fullOuterJoinedConjuncts = Maps.newHashMap(); - - // Map of full-outer-joined tuple id to the last full outer join that outer-joined it - public final Map<TupleId, TableRef> fullOuterJoinedTupleIds = Maps.newHashMap(); - - // Map from semi-joined tuple id, i.e., one that is invisible outside the join's - // On-clause, to its Join clause (represented by its rhs table ref). An anti-join is - // a kind of semi-join, so anti-joined tuples are also registered here. - public final Map<TupleId, TableRef> semiJoinedTupleIds = Maps.newHashMap(); - - // Map from right-hand side table-ref id of an outer join to the list of - // conjuncts in its On clause. There is always an entry for an outer join, but the - // corresponding value could be an empty list. There is no entry for non-outer joins. - public final Map<TupleId, List<ExprId>> conjunctsByOjClause = Maps.newHashMap(); - - // map from registered conjunct to its containing outer join On clause (represented - // by its right-hand side table ref); this is limited to conjuncts that can only be - // correctly evaluated by the originating outer join, including constant conjuncts - public final Map<ExprId, TableRef> ojClauseByConjunct = Maps.newHashMap(); - - // map from registered conjunct to its containing semi join On clause (represented - // by its right-hand side table ref) - public final Map<ExprId, TableRef> sjClauseByConjunct = Maps.newHashMap(); - - // map from registered conjunct to its containing inner join On clause (represented - // by its right-hand side table ref) - public final Map<ExprId, TableRef> ijClauseByConjunct = Maps.newHashMap(); - - // map from slot id to the analyzer/block in which it was registered - public final Map<SlotId, Analyzer> blockBySlot = Maps.newHashMap(); - - // Tracks all privilege requests on catalog objects. - private final Set<PrivilegeRequest> privilegeReqs = Sets.newLinkedHashSet(); - - // List of PrivilegeRequest to custom authorization failure error message. - // Tracks all privilege requests on catalog objects that need a custom - // error message returned to avoid exposing existence of catalog objects. - private final List<Pair<PrivilegeRequest, String>> maskedPrivilegeReqs = - Lists.newArrayList(); - - // accesses to catalog objects - // TODO: This can be inferred from privilegeReqs. They should be coalesced. - public Set<TAccessEvent> accessEvents = Sets.newHashSet(); - - // Tracks all warnings (e.g. non-fatal errors) that were generated during analysis. - // These are passed to the backend and eventually propagated to the shell. Maps from - // warning message to the number of times that warning was logged (in order to avoid - // duplicating the same warning over and over). - public final LinkedHashMap<String, Integer> warnings = - new LinkedHashMap<String, Integer>(); - - public final IdGenerator<EquivalenceClassId> equivClassIdGenerator = - EquivalenceClassId.createGenerator(); - - // map from equivalence class id to the list of its member slots - private final Map<EquivalenceClassId, ArrayList<SlotId>> equivClassMembers = - Maps.newHashMap(); - - // map from slot id to its equivalence class id; - // only visible at the root Analyzer - private final Map<SlotId, EquivalenceClassId> equivClassBySlotId = Maps.newHashMap(); - - // map for each slot to the canonical slot of its equivalence class - private final ExprSubstitutionMap equivClassSmap = new ExprSubstitutionMap(); - - // represents the direct and transitive value transfers between slots - private ValueTransferGraph valueTransferGraph; - - private final List<Pair<SlotId, SlotId>> registeredValueTransfers = - Lists.newArrayList(); - - // Bidirectional map between Integer index and TNetworkAddress. - // Decreases the size of the scan range locations. - private final ListMap<TNetworkAddress> hostIndex = new ListMap<TNetworkAddress>(); - - // Timeline of important events in the planning process, used for debugging / - // profiling - private final EventSequence timeline = new EventSequence("Planner Timeline"); - - public GlobalState(ImpaladCatalog catalog, TQueryCtx queryCtx, - AuthorizationConfig authzConfig) { - this.catalog = catalog; - this.queryCtx = queryCtx; - this.authzConfig = authzConfig; - this.lineageGraph = new ColumnLineageGraph(); - } - }; - - private final GlobalState globalState_; - - public boolean containsSubquery() { return globalState_.containsSubquery; } - - /** - * Helper function to reset the global state information about the existence of - * subqueries. - */ - public void resetSubquery() { globalState_.containsSubquery = false; } - - // An analyzer stores analysis state for a single select block. A select block can be - // a top level select statement, or an inline view select block. - // ancestors contains the Analyzers of the enclosing select blocks of 'this' - // (ancestors[0] contains the immediate parent, etc.). - private final ArrayList<Analyzer> ancestors_; - - // map from lowercase table alias to a view definition in this analyzer's scope - private final Map<String, View> localViews_ = Maps.newHashMap(); - - // Map from lowercase table alias to descriptor. Tables without an explicit alias - // are assigned two implicit aliases: the unqualified and fully-qualified table name. - // Such tables have two entries pointing to the same descriptor. If an alias is - // ambiguous, then this map retains the first entry with that alias to simplify error - // checking (duplicate vs. ambiguous alias). - private final Map<String, TupleDescriptor> aliasMap_ = Maps.newHashMap(); - - // Map from tuple id to its corresponding table ref. - private final Map<TupleId, TableRef> tableRefMap_ = Maps.newHashMap(); - - // Set of lowercase ambiguous implicit table aliases. - private final Set<String> ambiguousAliases_ = Sets.newHashSet(); - - // Map from lowercase fully-qualified path to its slot descriptor. Only contains paths - // that have a scalar type as destination (see registerSlotRef()). - private final Map<String, SlotDescriptor> slotPathMap_ = Maps.newHashMap(); - - // Tracks the all tables/views found during analysis that were missing metadata. - private Set<TableName> missingTbls_ = new HashSet<TableName>(); - - // Indicates whether this analyzer/block is guaranteed to have an empty result set - // due to a limit 0 or constant conjunct evaluating to false. - private boolean hasEmptyResultSet_ = false; - - // Indicates whether the select-project-join (spj) portion of this query block - // is guaranteed to return an empty result set. Set due to a constant non-Having - // conjunct evaluating to false. - private boolean hasEmptySpjResultSet_ = false; - - public Analyzer(ImpaladCatalog catalog, TQueryCtx queryCtx, - AuthorizationConfig authzConfig) { - ancestors_ = Lists.newArrayList(); - globalState_ = new GlobalState(catalog, queryCtx, authzConfig); - user_ = new User(TSessionStateUtil.getEffectiveUser(queryCtx.session)); - } - - /** - * Analyzer constructor for nested select block. GlobalState is inherited from the - * parentAnalyzer. - */ - public Analyzer(Analyzer parentAnalyzer) { - this(parentAnalyzer, parentAnalyzer.globalState_); - } - - /** - * Analyzer constructor for nested select block with the specified global state. - */ - private Analyzer(Analyzer parentAnalyzer, GlobalState globalState) { - ancestors_ = Lists.newArrayList(parentAnalyzer); - ancestors_.addAll(parentAnalyzer.ancestors_); - globalState_ = globalState; - missingTbls_ = parentAnalyzer.missingTbls_; - user_ = parentAnalyzer.getUser(); - useHiveColLabels_ = parentAnalyzer.useHiveColLabels_; - authErrorMsg_ = parentAnalyzer.authErrorMsg_; - enablePrivChecks_ = parentAnalyzer.enablePrivChecks_; - isWithClause_ = parentAnalyzer.isWithClause_; - } - - /** - * Returns a new analyzer with the specified parent analyzer but with a new - * global state. - */ - public static Analyzer createWithNewGlobalState(Analyzer parentAnalyzer) { - GlobalState globalState = new GlobalState(parentAnalyzer.globalState_.catalog, - parentAnalyzer.getQueryCtx(), parentAnalyzer.getAuthzConfig()); - return new Analyzer(parentAnalyzer, globalState); - } - - /** - * Makes the given semi-joined tuple visible such that its slots can be referenced. - * If tid is null, makes the currently visible semi-joined tuple invisible again. - */ - public void setVisibleSemiJoinedTuple(TupleId tid) { - Preconditions.checkState(tid == null - || globalState_.semiJoinedTupleIds.containsKey(tid)); - Preconditions.checkState(tid == null || visibleSemiJoinedTupleId_ == null); - visibleSemiJoinedTupleId_ = tid; - } - - public Set<TableName> getMissingTbls() { return missingTbls_; } - public boolean hasMissingTbls() { return !missingTbls_.isEmpty(); } - public boolean hasAncestors() { return !ancestors_.isEmpty(); } - public Analyzer getParentAnalyzer() { - return hasAncestors() ? ancestors_.get(0) : null; - } - - /** - * Returns the analyzer that has an entry for the given tuple descriptor in its - * tableRefMap, or null if no such analyzer could be found. Searches the hierarchy - * of analyzers bottom-up. - */ - public Analyzer findAnalyzer(TupleId tid) { - if (tableRefMap_.containsKey(tid)) return this; - if (hasAncestors()) return getParentAnalyzer().findAnalyzer(tid); - return null; - } - - /** - * Returns a list of each warning logged, indicating if it was logged more than once. - */ - public List<String> getWarnings() { - List<String> result = new ArrayList<String>(); - for (Map.Entry<String, Integer> e : globalState_.warnings.entrySet()) { - String error = e.getKey(); - int count = e.getValue(); - Preconditions.checkState(count > 0); - if (count == 1) { - result.add(error); - } else { - result.add(error + " (" + count + " warnings like this)"); - } - } - return result; - } - - /** - * Registers a local view definition with this analyzer. Throws an exception if a view - * definition with the same alias has already been registered or if the number of - * explicit column labels is greater than the number of columns in the view statement. - */ - public void registerLocalView(View view) throws AnalysisException { - Preconditions.checkState(view.isLocalView()); - if (view.hasColLabels()) { - List<String> viewLabels = view.getColLabels(); - List<String> queryStmtLabels = view.getQueryStmt().getColLabels(); - if (viewLabels.size() > queryStmtLabels.size()) { - throw new AnalysisException("WITH-clause view '" + view.getName() + - "' returns " + queryStmtLabels.size() + " columns, but " + - viewLabels.size() + " labels were specified. The number of column " + - "labels must be smaller or equal to the number of returned columns."); - } - } - if (localViews_.put(view.getName().toLowerCase(), view) != null) { - throw new AnalysisException( - String.format("Duplicate table alias: '%s'", view.getName())); - } - } - - /** - * Creates an returns an empty TupleDescriptor for the given table ref and registers - * it against all its legal aliases. For tables refs with an explicit alias, only the - * explicit alias is legal. For tables refs with no explicit alias, the fully-qualified - * and unqualified table names are legal aliases. Column references against unqualified - * implicit aliases can be ambiguous, therefore, we register such ambiguous aliases - * here. Requires that all views have been substituted. - * Throws if an existing explicit alias or implicit fully-qualified alias - * has already been registered for another table ref. - */ - public TupleDescriptor registerTableRef(TableRef ref) throws AnalysisException { - String uniqueAlias = ref.getUniqueAlias(); - if (aliasMap_.containsKey(uniqueAlias)) { - throw new AnalysisException("Duplicate table alias: '" + uniqueAlias + "'"); - } - - // If ref has no explicit alias, then the unqualified and the fully-qualified table - // names are legal implicit aliases. Column references against unqualified implicit - // aliases can be ambiguous, therefore, we register such ambiguous aliases here. - String unqualifiedAlias = null; - String[] aliases = ref.getAliases(); - if (aliases.length > 1) { - unqualifiedAlias = aliases[1]; - TupleDescriptor tupleDesc = aliasMap_.get(unqualifiedAlias); - if (tupleDesc != null) { - if (tupleDesc.hasExplicitAlias()) { - throw new AnalysisException( - "Duplicate table alias: '" + unqualifiedAlias + "'"); - } else { - ambiguousAliases_.add(unqualifiedAlias); - } - } - } - - // Delegate creation of the tuple descriptor to the concrete table ref. - TupleDescriptor result = ref.createTupleDescriptor(this); - result.setAliases(aliases, ref.hasExplicitAlias()); - // Register all legal aliases. - for (String alias: aliases) { - aliasMap_.put(alias, result); - } - tableRefMap_.put(result.getId(), ref); - return result; - } - - /** - * Resolves the given TableRef into a concrete BaseTableRef, ViewRef or - * CollectionTableRef. Returns the new resolved table ref or the given table - * ref if it is already resolved. - * Registers privilege requests and throws an AnalysisException if the tableRef's - * path could not be resolved. The privilege requests are added to ensure that - * an AuthorizationException is preferred over an AnalysisException so as not to - * accidentally reveal the non-existence of tables/databases. - */ - public TableRef resolveTableRef(TableRef tableRef) throws AnalysisException { - // Return the table if it is already resolved. - if (tableRef.isResolved()) return tableRef; - // Try to find a matching local view. - if (tableRef.getPath().size() == 1) { - // Searches the hierarchy of analyzers bottom-up for a registered local view with - // a matching alias. - String viewAlias = tableRef.getPath().get(0).toLowerCase(); - Analyzer analyzer = this; - do { - View localView = analyzer.localViews_.get(viewAlias); - if (localView != null) return new InlineViewRef(localView, tableRef); - analyzer = (analyzer.ancestors_.isEmpty() ? null : analyzer.ancestors_.get(0)); - } while (analyzer != null); - } - - // Resolve the table ref's path and determine what resolved table ref - // to replace it with. - List<String> rawPath = tableRef.getPath(); - Path resolvedPath = null; - try { - resolvedPath = resolvePath(tableRef.getPath(), PathType.TABLE_REF); - } catch (AnalysisException e) { - if (!hasMissingTbls()) { - // Register privilege requests to prefer reporting an authorization error over - // an analysis error. We should not accidentally reveal the non-existence of a - // table/database if the user is not authorized. - if (rawPath.size() > 1) { - registerPrivReq(new PrivilegeRequestBuilder() - .onTable(rawPath.get(0), rawPath.get(1)) - .allOf(Privilege.SELECT).toRequest()); - } - registerPrivReq(new PrivilegeRequestBuilder() - .onTable(getDefaultDb(), rawPath.get(0)) - .allOf(Privilege.SELECT).toRequest()); - } - throw e; - } catch (TableLoadingException e) { - throw new AnalysisException(String.format( - "Failed to load metadata for table: '%s'", Joiner.on(".").join(rawPath)), e); - } - - Preconditions.checkNotNull(resolvedPath); - if (resolvedPath.destTable() != null) { - Table table = resolvedPath.destTable(); - Preconditions.checkNotNull(table); - if (table instanceof View) return new InlineViewRef((View) table, tableRef); - // The table must be a base table. - Preconditions.checkState(table instanceof HdfsTable || - table instanceof KuduTable || - table instanceof HBaseTable || - table instanceof DataSourceTable); - return new BaseTableRef(tableRef, resolvedPath); - } else { - return new CollectionTableRef(tableRef, resolvedPath); - } - } - - /** - * Register conjuncts that are outer joined by a full outer join. For a given - * predicate, we record the last full outer join that outer-joined any of its - * tuple ids. We need this additional information because full-outer joins obey - * different rules with respect to predicate pushdown compared to left and right - * outer joins. - */ - public void registerFullOuterJoinedConjunct(Expr e) { - Preconditions.checkState( - !globalState_.fullOuterJoinedConjuncts.containsKey(e.getId())); - List<TupleId> tids = Lists.newArrayList(); - e.getIds(tids, null); - for (TupleId tid: tids) { - if (!globalState_.fullOuterJoinedTupleIds.containsKey(tid)) continue; - TableRef currentOuterJoin = globalState_.fullOuterJoinedTupleIds.get(tid); - globalState_.fullOuterJoinedConjuncts.put(e.getId(), currentOuterJoin); - break; - } - LOG.trace("registerFullOuterJoinedConjunct: " + - globalState_.fullOuterJoinedConjuncts.toString()); - } - - /** - * Register tids as being outer-joined by a full outer join clause represented by - * rhsRef. - */ - public void registerFullOuterJoinedTids(List<TupleId> tids, TableRef rhsRef) { - for (TupleId tid: tids) { - globalState_.fullOuterJoinedTupleIds.put(tid, rhsRef); - } - LOG.trace("registerFullOuterJoinedTids: " + - globalState_.fullOuterJoinedTupleIds.toString()); - } - - /** - * Register tids as being outer-joined by Join clause represented by rhsRef. - */ - public void registerOuterJoinedTids(List<TupleId> tids, TableRef rhsRef) { - for (TupleId tid: tids) { - globalState_.outerJoinedTupleIds.put(tid, rhsRef); - } - LOG.trace("registerOuterJoinedTids: " + globalState_.outerJoinedTupleIds.toString()); - } - - /** - * Register the given tuple id as being the invisible side of a semi-join. - */ - public void registerSemiJoinedTid(TupleId tid, TableRef rhsRef) { - globalState_.semiJoinedTupleIds.put(tid, rhsRef); - } - - /** - * Returns the descriptor of the given explicit or implicit table alias or null if no - * such alias has been registered. - * Throws an AnalysisException if the given table alias is ambiguous. - */ - public TupleDescriptor getDescriptor(String tableAlias) throws AnalysisException { - String lookupAlias = tableAlias.toLowerCase(); - if (ambiguousAliases_.contains(lookupAlias)) { - throw new AnalysisException(String.format( - "Unqualified table alias is ambiguous: '%s'", tableAlias)); - } - return aliasMap_.get(lookupAlias); - } - - public TupleDescriptor getTupleDesc(TupleId id) { - return globalState_.descTbl.getTupleDesc(id); - } - - public SlotDescriptor getSlotDesc(SlotId id) { - return globalState_.descTbl.getSlotDesc(id); - } - - public TableRef getTableRef(TupleId tid) { return tableRefMap_.get(tid); } - - /** - * Given a "table alias"."column alias", return the SlotDescriptor - */ - public SlotDescriptor getSlotDescriptor(String qualifiedColumnName) { - return slotPathMap_.get(qualifiedColumnName); - } - - /** - * Return true if this analyzer has no ancestors. (i.e. false for the analyzer created - * for inline views/ union operands, etc.) - */ - public boolean isRootAnalyzer() { return ancestors_.isEmpty(); } - - /** - * Returns true if the query block corresponding to this analyzer is guaranteed - * to return an empty result set, e.g., due to a limit 0 or a constant predicate - * that evaluates to false. - */ - public boolean hasEmptyResultSet() { return hasEmptyResultSet_; } - public void setHasEmptyResultSet() { hasEmptyResultSet_ = true; } - - /** - * Returns true if the select-project-join portion of this query block returns - * an empty result set. - */ - public boolean hasEmptySpjResultSet() { return hasEmptySpjResultSet_; } - - /** - * Resolves the given raw path according to the given path type, as follows: - * SLOT_REF and STAR: Resolves the path in the context of all registered tuple - * descriptors, considering qualified as well as unqualified matches. - * TABLE_REF: Resolves the path in the context of all registered tuple descriptors - * only considering qualified matches, as well as catalog tables/views. - * - * Path resolution: - * Regardless of the path type, a raw path can have multiple successful resolutions. - * A resolution is said to be 'successful' if all raw path elements can be mapped - * to a corresponding alias/table/column/field. - * - * Path legality: - * A successful resolution may be illegal with respect to the path type, e.g., - * a SlotRef cannot reference intermediate collection types, etc. - * - * Path ambiguity: - * A raw path is ambiguous if it has multiple legal resolutions. Otherwise, - * the ambiguity is resolved in favor of the legal resolution. - * - * Returns the single legal path resolution if it exists. - * Throws if there was no legal resolution or if the path is ambiguous. - */ - public Path resolvePath(List<String> rawPath, PathType pathType) - throws AnalysisException, TableLoadingException { - // We only allow correlated references in predicates of a subquery. - boolean resolveInAncestors = false; - if (pathType == PathType.TABLE_REF || pathType == PathType.ANY) { - resolveInAncestors = true; - } else if (pathType == PathType.SLOT_REF) { - resolveInAncestors = isSubquery_; - } - // Convert all path elements to lower case. - ArrayList<String> lcRawPath = Lists.newArrayListWithCapacity(rawPath.size()); - for (String s: rawPath) lcRawPath.add(s.toLowerCase()); - return resolvePath(lcRawPath, pathType, resolveInAncestors); - } - - private Path resolvePath(List<String> rawPath, PathType pathType, - boolean resolveInAncestors) throws AnalysisException, TableLoadingException { - // List of all candidate paths with different roots. Paths in this list are initially - // unresolved and may be illegal with respect to the pathType. - List<Path> candidates = getTupleDescPaths(rawPath); - - LinkedList<String> errors = Lists.newLinkedList(); - if (pathType == PathType.SLOT_REF || pathType == PathType.STAR) { - // Paths rooted at all of the unique registered tuple descriptors. - for (TableRef tblRef: tableRefMap_.values()) { - candidates.add(new Path(tblRef.getDesc(), rawPath)); - } - } else { - // Always prefer table ref paths rooted at a registered tuples descriptor. - Preconditions.checkState(pathType == PathType.TABLE_REF || - pathType == PathType.ANY); - Path result = resolvePaths(rawPath, candidates, pathType, errors); - if (result != null) return result; - candidates.clear(); - - // Add paths rooted at a table with an unqualified and fully-qualified table name. - int end = Math.min(2, rawPath.size()); - for (int tblNameIdx = 0; tblNameIdx < end; ++tblNameIdx) { - String dbName = (tblNameIdx == 0) ? getDefaultDb() : rawPath.get(0); - String tblName = rawPath.get(tblNameIdx); - Table tbl = null; - try { - tbl = getTable(dbName, tblName); - } catch (AnalysisException e) { - if (hasMissingTbls()) throw e; - // Ignore other exceptions to allow path resolution to continue. - } - if (tbl != null) { - candidates.add(new Path(tbl, rawPath.subList(tblNameIdx + 1, rawPath.size()))); - } - } - } - - Path result = resolvePaths(rawPath, candidates, pathType, errors); - if (result == null && resolveInAncestors && hasAncestors()) { - result = getParentAnalyzer().resolvePath(rawPath, pathType, true); - } - if (result == null) { - Preconditions.checkState(!errors.isEmpty()); - throw new AnalysisException(errors.getFirst()); - } - return result; - } - - /** - * Returns a list of unresolved Paths that are rooted at a registered tuple - * descriptor matching a prefix of the given raw path. - */ - public List<Path> getTupleDescPaths(List<String> rawPath) - throws AnalysisException { - ArrayList<Path> result = Lists.newArrayList(); - - // Path rooted at a tuple desc with an explicit or implicit unqualified alias. - TupleDescriptor rootDesc = getDescriptor(rawPath.get(0)); - if (rootDesc != null) { - result.add(new Path(rootDesc, rawPath.subList(1, rawPath.size()))); - } - - // Path rooted at a tuple desc with an implicit qualified alias. - if (rawPath.size() > 1) { - rootDesc = getDescriptor(rawPath.get(0) + "." + rawPath.get(1)); - if (rootDesc != null) { - result.add(new Path(rootDesc, rawPath.subList(2, rawPath.size()))); - } - } - return result; - } - - /** - * Resolves the given paths and checks them for legality and ambiguity. Returns the - * single legal path resolution if it exists, null otherwise. - * Populates 'errors' with a a prioritized list of error messages starting with the - * most relevant one. The list contains at least one error message if null is returned. - */ - private Path resolvePaths(List<String> rawPath, List<Path> paths, PathType pathType, - LinkedList<String> errors) { - // For generating error messages. - String pathTypeStr = null; - String pathStr = Joiner.on(".").join(rawPath); - if (pathType == PathType.SLOT_REF) { - pathTypeStr = "Column/field reference"; - } else if (pathType == PathType.TABLE_REF) { - pathTypeStr = "Table reference"; - } else if (pathType == PathType.ANY) { - pathTypeStr = "Path"; - } else { - Preconditions.checkState(pathType == PathType.STAR); - pathTypeStr = "Star expression"; - pathStr += ".*"; - } - - List<Path> legalPaths = Lists.newArrayList(); - for (Path p: paths) { - if (!p.resolve()) continue; - - // Check legality of the resolved path. - if (p.isRootedAtTuple() && !isVisible(p.getRootDesc().getId())) { - errors.addLast(String.format( - "Illegal %s '%s' of semi-/anti-joined table '%s'", - pathTypeStr.toLowerCase(), pathStr, p.getRootDesc().getAlias())); - continue; - } - switch (pathType) { - // Illegal cases: - // 1. Destination type is not a collection. - case TABLE_REF: { - if (!p.destType().isCollectionType()) { - errors.addFirst(String.format( - "Illegal table reference to non-collection type: '%s'\n" + - "Path resolved to type: %s", pathStr, p.destType().toSql())); - continue; - } - break; - } - case SLOT_REF: { - // Illegal cases: - // 1. Path contains an intermediate collection reference. - // 2. Destination of the path is a catalog table or a registered alias. - if (p.hasNonDestCollection()) { - errors.addFirst(String.format( - "Illegal column/field reference '%s' with intermediate " + - "collection '%s' of type '%s'", - pathStr, p.getFirstCollectionName(), - p.getFirstCollectionType().toSql())); - continue; - } - // Error should be "Could not resolve...". No need to add it here explicitly. - if (p.getMatchedTypes().isEmpty()) continue; - break; - } - // Illegal cases: - // 1. Path contains an intermediate collection reference. - // 2. Destination type of the path is not a struct. - case STAR: { - if (p.hasNonDestCollection()) { - errors.addFirst(String.format( - "Illegal star expression '%s' with intermediate " + - "collection '%s' of type '%s'", - pathStr, p.getFirstCollectionName(), - p.getFirstCollectionType().toSql())); - continue; - } - if (!p.destType().isStructType()) { - errors.addFirst(String.format( - "Cannot expand star in '%s' because path '%s' resolved to type '%s'." + - "\nStar expansion is only valid for paths to a struct type.", - pathStr, Joiner.on(".").join(rawPath), p.destType().toSql())); - continue; - } - break; - } - case ANY: { - // Any path is valid. - break; - } - } - legalPaths.add(p); - } - - if (legalPaths.size() > 1) { - errors.addFirst(String.format("%s is ambiguous: '%s'", - pathTypeStr, pathStr)); - return null; - } - if (legalPaths.isEmpty()) { - if (errors.isEmpty()) { - errors.addFirst(String.format("Could not resolve %s: '%s'", - pathTypeStr.toLowerCase(), pathStr)); - } - return null; - } - return legalPaths.get(0); - } - - /** - * Returns an existing or new SlotDescriptor for the given path. Always returns - * a new empty SlotDescriptor for paths with a collection-typed destination. - */ - public SlotDescriptor registerSlotRef(Path slotPath) throws AnalysisException { - Preconditions.checkState(slotPath.isRootedAtTuple()); - // Always register a new slot descriptor for collection types. The BE currently - // relies on this behavior for setting unnested collection slots to NULL. - if (slotPath.destType().isCollectionType()) { - SlotDescriptor result = addSlotDescriptor(slotPath.getRootDesc()); - result.setPath(slotPath); - registerColumnPrivReq(result); - return result; - } - // SlotRefs with a scalar type are registered against the slot's - // fully-qualified lowercase path. - String key = slotPath.toString(); - SlotDescriptor existingSlotDesc = slotPathMap_.get(key); - if (existingSlotDesc != null) return existingSlotDesc; - SlotDescriptor result = addSlotDescriptor(slotPath.getRootDesc()); - result.setPath(slotPath); - slotPathMap_.put(key, result); - registerColumnPrivReq(result); - return result; - } - - /** - * Registers a column-level privilege request if 'slotDesc' directly or indirectly - * refers to a table column. It handles both scalar and complex-typed columns. - */ - private void registerColumnPrivReq(SlotDescriptor slotDesc) { - Preconditions.checkNotNull(slotDesc.getPath()); - TupleDescriptor tupleDesc = slotDesc.getParent(); - if (tupleDesc.isMaterialized() && tupleDesc.getTable() != null) { - Column column = tupleDesc.getTable().getColumn( - slotDesc.getPath().getRawPath().get(0)); - if (column != null) { - registerPrivReq(new PrivilegeRequestBuilder(). - allOf(Privilege.SELECT).onColumn(tupleDesc.getTableName().getDb(), - tupleDesc.getTableName().getTbl(), column.getName()).toRequest()); - } - } - } - - /** - * Creates a new slot descriptor and related state in globalState. - */ - public SlotDescriptor addSlotDescriptor(TupleDescriptor tupleDesc) { - SlotDescriptor result = globalState_.descTbl.addSlotDescriptor(tupleDesc); - globalState_.blockBySlot.put(result.getId(), this); - return result; - } - - /** - * Adds a new slot descriptor in tupleDesc that is identical to srcSlotDesc - * except for the path and slot id. - */ - public SlotDescriptor copySlotDescriptor(SlotDescriptor srcSlotDesc, - TupleDescriptor tupleDesc) { - SlotDescriptor result = globalState_.descTbl.addSlotDescriptor(tupleDesc); - globalState_.blockBySlot.put(result.getId(), this); - result.setSourceExprs(srcSlotDesc.getSourceExprs()); - result.setLabel(srcSlotDesc.getLabel()); - result.setStats(srcSlotDesc.getStats()); - result.setType(srcSlotDesc.getType()); - result.setItemTupleDesc(srcSlotDesc.getItemTupleDesc()); - return result; - } - - /** - * Register all conjuncts in a list of predicates as Having-clause conjuncts. - */ - public void registerConjuncts(List<Expr> l) throws AnalysisException { - for (Expr e: l) { - registerConjuncts(e, true); - } - } - - /** - * Register all conjuncts in 'conjuncts' that make up the On-clause of the given - * right-hand side of a join. Assigns each conjunct a unique id. If rhsRef is - * the right-hand side of an outer join, then the conjuncts conjuncts are - * registered such that they can only be evaluated by the node implementing that - * join. - */ - public void registerOnClauseConjuncts(List<Expr> conjuncts, TableRef rhsRef) - throws AnalysisException { - Preconditions.checkNotNull(rhsRef); - Preconditions.checkNotNull(conjuncts); - List<ExprId> ojConjuncts = null; - if (rhsRef.getJoinOp().isOuterJoin()) { - ojConjuncts = globalState_.conjunctsByOjClause.get(rhsRef.getId()); - if (ojConjuncts == null) { - ojConjuncts = Lists.newArrayList(); - globalState_.conjunctsByOjClause.put(rhsRef.getId(), ojConjuncts); - } - } - for (Expr conjunct: conjuncts) { - conjunct.setIsOnClauseConjunct(true); - registerConjunct(conjunct); - if (rhsRef.getJoinOp().isOuterJoin()) { - globalState_.ojClauseByConjunct.put(conjunct.getId(), rhsRef); - ojConjuncts.add(conjunct.getId()); - } - if (rhsRef.getJoinOp().isSemiJoin()) { - globalState_.sjClauseByConjunct.put(conjunct.getId(), rhsRef); - } - if (rhsRef.getJoinOp().isInnerJoin()) { - globalState_.ijClauseByConjunct.put(conjunct.getId(), rhsRef); - } - markConstantConjunct(conjunct, false); - } - } - - /** - * Register all conjuncts that make up 'e'. If fromHavingClause is false, this conjunct - * is assumed to originate from a WHERE or ON clause. - */ - public void registerConjuncts(Expr e, boolean fromHavingClause) - throws AnalysisException { - for (Expr conjunct: e.getConjuncts()) { - registerConjunct(conjunct); - markConstantConjunct(conjunct, fromHavingClause); - } - } - - /** - * If the given conjunct is a constant non-oj conjunct, marks it as assigned, and - * evaluates the conjunct. If the conjunct evaluates to false, marks this query - * block as having an empty result set or as having an empty select-project-join - * portion, if fromHavingClause is true or false, respectively. - * No-op if the conjunct is not constant or is outer joined. - * Throws an AnalysisException if there is an error evaluating `conjunct` - */ - private void markConstantConjunct(Expr conjunct, boolean fromHavingClause) - throws AnalysisException { - if (!conjunct.isConstant() || isOjConjunct(conjunct)) return; - markConjunctAssigned(conjunct); - if ((!fromHavingClause && !hasEmptySpjResultSet_) - || (fromHavingClause && !hasEmptyResultSet_)) { - try { - if (!FeSupport.EvalPredicate(conjunct, globalState_.queryCtx)) { - if (fromHavingClause) { - hasEmptyResultSet_ = true; - } else { - hasEmptySpjResultSet_ = true; - } - } - } catch (InternalException ex) { - throw new AnalysisException("Error evaluating \"" + conjunct.toSql() + "\"", ex); - } - } - } - - /** - * Assigns a new id to the given conjunct and registers it with all tuple and slot ids - * it references and with the global conjunct list. - */ - private void registerConjunct(Expr e) { - // always generate a new expr id; this might be a cloned conjunct that already - // has the id of its origin set - e.setId(globalState_.conjunctIdGenerator.getNextId()); - globalState_.conjuncts.put(e.getId(), e); - - ArrayList<TupleId> tupleIds = Lists.newArrayList(); - ArrayList<SlotId> slotIds = Lists.newArrayList(); - e.getIds(tupleIds, slotIds); - registerFullOuterJoinedConjunct(e); - - // register single tid conjuncts - if (tupleIds.size() == 1) globalState_.singleTidConjuncts.add(e.getId()); - - LOG.trace("register tuple/slotConjunct: " + Integer.toString(e.getId().asInt()) - + " " + e.toSql() + " " + e.debugString()); - - if (!(e instanceof BinaryPredicate)) return; - BinaryPredicate binaryPred = (BinaryPredicate) e; - - // check whether this is an equi-join predicate, ie, something of the - // form <expr1> = <expr2> where at least one of the exprs is bound by - // exactly one tuple id - if (binaryPred.getOp() != BinaryPredicate.Operator.EQ && - binaryPred.getOp() != BinaryPredicate.Operator.NULL_MATCHING_EQ && - binaryPred.getOp() != BinaryPredicate.Operator.NOT_DISTINCT) { - return; - } - // the binary predicate must refer to at least two tuples to be an eqJoinConjunct - if (tupleIds.size() < 2) return; - - // examine children and update eqJoinConjuncts - for (int i = 0; i < 2; ++i) { - tupleIds = Lists.newArrayList(); - binaryPred.getChild(i).getIds(tupleIds, null); - if (tupleIds.size() == 1) { - if (!globalState_.eqJoinConjuncts.containsKey(tupleIds.get(0))) { - List<ExprId> conjunctIds = Lists.newArrayList(); - conjunctIds.add(e.getId()); - globalState_.eqJoinConjuncts.put(tupleIds.get(0), conjunctIds); - } else { - globalState_.eqJoinConjuncts.get(tupleIds.get(0)).add(e.getId()); - } - binaryPred.setIsEqJoinConjunct(true); - LOG.trace("register eqJoinConjunct: " + Integer.toString(e.getId().asInt())); - } - } - } - - /** - * Create and register an auxiliary predicate to express an equivalence between two - * exprs (BinaryPredicate with EQ); this predicate does not need to be assigned, but - * it's used for equivalence class computation. - * Does nothing if the lhs or rhs expr are NULL. Registering an equivalence with NULL - * would be incorrect, because <expr> = NULL is false (even NULL = NULL). - */ - public void createAuxEquivPredicate(Expr lhs, Expr rhs) { - // Check the expr type as well as the class because NullLiteral could have been - // implicitly cast to a type different than NULL. - if (lhs instanceof NullLiteral || rhs instanceof NullLiteral || - lhs.getType().isNull() || rhs.getType().isNull()) { - return; - } - // create an eq predicate between lhs and rhs - BinaryPredicate p = new BinaryPredicate(BinaryPredicate.Operator.EQ, lhs, rhs); - p.setIsAuxExpr(); - LOG.trace("register equiv predicate: " + p.toSql() + " " + p.debugString()); - registerConjunct(p); - } - - /** - * Creates an inferred equality predicate between the given slots. - */ - public BinaryPredicate createInferredEqPred(SlotId lhsSlotId, SlotId rhsSlotId) { - BinaryPredicate pred = new BinaryPredicate(BinaryPredicate.Operator.EQ, - new SlotRef(globalState_.descTbl.getSlotDesc(lhsSlotId)), - new SlotRef(globalState_.descTbl.getSlotDesc(rhsSlotId))); - pred.setIsInferred(); - // create casts if needed - pred.analyzeNoThrow(this); - return pred; - } - - /** - * Return all unassigned non-constant registered conjuncts that are fully bound by - * given list of tuple ids. If 'inclOjConjuncts' is false, conjuncts tied to an - * Outer Join clause are excluded. - */ - public List<Expr> getUnassignedConjuncts( - List<TupleId> tupleIds, boolean inclOjConjuncts) { - LOG.trace("getUnassignedConjuncts for " + Id.printIds(tupleIds)); - List<Expr> result = Lists.newArrayList(); - for (Expr e: globalState_.conjuncts.values()) { - if (e.isBoundByTupleIds(tupleIds) - && !e.isAuxExpr() - && !globalState_.assignedConjuncts.contains(e.getId()) - && ((inclOjConjuncts && !e.isConstant()) - || !globalState_.ojClauseByConjunct.containsKey(e.getId()))) { - result.add(e); - LOG.trace("getUnassignedConjunct: " + e.toSql()); - } - } - return result; - } - - public boolean isOjConjunct(Expr e) { - return globalState_.ojClauseByConjunct.containsKey(e.getId()); - } - - public boolean isIjConjunct(Expr e) { - return globalState_.ijClauseByConjunct.containsKey(e.getId()); - } - - public TableRef getFullOuterJoinRef(Expr e) { - return globalState_.fullOuterJoinedConjuncts.get(e.getId()); - } - - public boolean isFullOuterJoined(Expr e) { - return globalState_.fullOuterJoinedConjuncts.containsKey(e.getId()); - } - - /** - * Return all unassigned registered conjuncts for node's table ref ids. - * Wrapper around getUnassignedConjuncts(List<TupleId> tupleIds). - */ - public List<Expr> getUnassignedConjuncts(PlanNode node) { - return getUnassignedConjuncts(node.getTblRefIds()); - } - - /** - * Return all unassigned registered conjuncts that are fully bound by the given - * (logical) tuple ids, can be evaluated by 'tupleIds' and are not tied to an - * Outer Join clause. - */ - public List<Expr> getUnassignedConjuncts(List<TupleId> tupleIds) { - LOG.trace("getUnassignedConjuncts for node with " + Id.printIds(tupleIds)); - List<Expr> result = Lists.newArrayList(); - for (Expr e: getUnassignedConjuncts(tupleIds, true)) { - if (canEvalPredicate(tupleIds, e)) { - result.add(e); - LOG.trace("getUnassignedConjunct: " + e.toSql()); - } - } - return result; - } - - /** - * Returns true if e must be evaluated by a join node. Note that it may still be - * safe to evaluate e elsewhere as well, but in any case the join must evaluate e. - */ - public boolean evalByJoin(Expr e) { - List<TupleId> tids = Lists.newArrayList(); - e.getIds(tids, null); - if (tids.isEmpty()) return false; - if (tids.size() > 1 || isOjConjunct(e) || isFullOuterJoined(e) - || (isOuterJoined(tids.get(0)) - && (!e.isOnClauseConjunct() || isIjConjunct(e))) - || (isAntiJoinedConjunct(e) && !isSemiJoined(tids.get(0)))) { - return true; - } - return false; - } - - /** - * Return all unassigned conjuncts of the outer join referenced by right-hand side - * table ref. - */ - public List<Expr> getUnassignedOjConjuncts(TableRef ref) { - Preconditions.checkState(ref.getJoinOp().isOuterJoin()); - List<Expr> result = Lists.newArrayList(); - List<ExprId> candidates = globalState_.conjunctsByOjClause.get(ref.getId()); - if (candidates == null) return result; - for (ExprId conjunctId: candidates) { - if (!globalState_.assignedConjuncts.contains(conjunctId)) { - Expr e = globalState_.conjuncts.get(conjunctId); - Preconditions.checkNotNull(e); - result.add(e); - LOG.trace("getUnassignedOjConjunct: " + e.toSql()); - } - } - return result; - } - - /** - * Return rhs ref of last Join clause that outer-joined id. - */ - public TableRef getLastOjClause(TupleId id) { - return globalState_.outerJoinedTupleIds.get(id); - } - - /** - * Return slot descriptor corresponding to column referenced in the context of - * tupleDesc, or null if no such reference exists. - */ - public SlotDescriptor getColumnSlot(TupleDescriptor tupleDesc, Column col) { - for (SlotDescriptor slotDesc: tupleDesc.getSlots()) { - if (slotDesc.getColumn() == col) return slotDesc; - } - return null; - } - - public DescriptorTable getDescTbl() { return globalState_.descTbl; } - public ImpaladCatalog getCatalog() { return globalState_.catalog; } - public Set<String> getAliases() { return aliasMap_.keySet(); } - - /** - * Returns list of candidate equi-join conjuncts to be evaluated by the join node - * that is specified by the table ref ids of its left and right children. - * If the join to be performed is an outer join, then only equi-join conjuncts - * from its On-clause are returned. If an equi-join conjunct is full outer joined, - * then it is only added to the result if this join is the one to full-outer join it. - */ - public List<Expr> getEqJoinConjuncts(List<TupleId> lhsTblRefIds, - List<TupleId> rhsTblRefIds) { - // Contains all equi-join conjuncts that have one child fully bound by one of the - // rhs table ref ids (the other child is not bound by that rhs table ref id). - List<ExprId> conjunctIds = Lists.newArrayList(); - for (TupleId rhsId: rhsTblRefIds) { - List<ExprId> cids = globalState_.eqJoinConjuncts.get(rhsId); - if (cids == null) continue; - for (ExprId eid: cids) { - if (!conjunctIds.contains(eid)) conjunctIds.add(eid); - } - } - - // Since we currently prevent join re-reordering across outer joins, we can never - // have a bushy outer join with multiple rhs table ref ids. A busy outer join can - // only be constructed with an inline view (which has a single table ref id). - List<ExprId> ojClauseConjuncts = null; - if (rhsTblRefIds.size() == 1) { - ojClauseConjuncts = globalState_.conjunctsByOjClause.get(rhsTblRefIds.get(0)); - } - - // List of table ref ids that the join node will 'materialize'. - List<TupleId> nodeTblRefIds = Lists.newArrayList(lhsTblRefIds); - nodeTblRefIds.addAll(rhsTblRefIds); - List<Expr> result = Lists.newArrayList(); - for (ExprId conjunctId: conjunctIds) { - Expr e = globalState_.conjuncts.get(conjunctId); - Preconditions.checkState(e != null); - if (!canEvalFullOuterJoinedConjunct(e, nodeTblRefIds) || - !canEvalAntiJoinedConjunct(e, nodeTblRefIds)) { - continue; - } - if (ojClauseConjuncts != null && !ojClauseConjuncts.contains(conjunctId)) continue; - result.add(e); - } - return result; - } - - /** - * Checks if a conjunct can be evaluated at a node materializing a list of tuple ids - * 'tids'. - */ - public boolean canEvalFullOuterJoinedConjunct(Expr e, List<TupleId> tids) { - TableRef fullOuterJoin = getFullOuterJoinRef(e); - if (fullOuterJoin == null) return true; - return tids.containsAll(fullOuterJoin.getAllTableRefIds()); - } - - /** - * Returns true if predicate 'e' can be correctly evaluated by a tree materializing - * 'tupleIds', otherwise false: - * - the predicate needs to be bound by tupleIds - * - an On clause predicate against the non-nullable side of an Outer Join clause - * can only be correctly evaluated by the join node that materializes the - * Outer Join clause - * - otherwise, a predicate can only be correctly evaluated if for all outer-joined - * referenced tids the last join to outer-join this tid has been materialized - */ - public boolean canEvalPredicate(List<TupleId> tupleIds, Expr e) { - LOG.trace("canEval: " + e.toSql() + " " + e.debugString() + " " - + Id.printIds(tupleIds)); - if (!e.isBoundByTupleIds(tupleIds)) return false; - ArrayList<TupleId> tids = Lists.newArrayList(); - e.getIds(tids, null); - if (tids.isEmpty()) return true; - - if (e.isOnClauseConjunct()) { - if (tids.size() > 1) { - // If the conjunct is from the ON-clause of an anti join, check if we can - // assign it to this node. - if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds); - // bail if this is from an OJ On clause; the join node will pick - // it up later via getUnassignedOjConjuncts() - if (globalState_.ojClauseByConjunct.containsKey(e.getId())) return false; - // If this is not from an OJ On clause (e.g. where clause or On clause of an - // inner join) and is full-outer joined, we need to make sure it is not - // assigned below the full outer join node that outer-joined it. - return canEvalFullOuterJoinedConjunct(e, tupleIds); - } - - TupleId tid = tids.get(0); - if (globalState_.ojClauseByConjunct.containsKey(e.getId())) { - // OJ On-clause predicate: okay if it's from - // the same On clause that makes tid nullable - // (otherwise e needn't be true when that tuple is set) - if (!globalState_.outerJoinedTupleIds.containsKey(tid)) return false; - if (globalState_.ojClauseByConjunct.get(e.getId()) - != globalState_.outerJoinedTupleIds.get(tid)) { - return false; - } - // Single tuple id conjuncts specified in the FOJ On-clause are not allowed to be - // assigned below that full outer join in the operator tree. - TableRef tblRef = globalState_.ojClauseByConjunct.get(e.getId()); - if (tblRef.getJoinOp().isFullOuterJoin()) return false; - } else { - // Non-OJ On-clause conjunct. - if (isOuterJoined(tid)) { - // If the conjunct references an outer-joined tuple, then evaluate the - // conjunct at the join that the On-clause belongs to. - TableRef onClauseTableRef = globalState_.ijClauseByConjunct.get(e.getId()); - Preconditions.checkNotNull(onClauseTableRef); - return tupleIds.containsAll(onClauseTableRef.getAllTableRefIds()); - } - // If this single tid conjunct is from the On-clause of an anti-join, check if we - // can assign it to this node. - if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds); - } - // Single tid predicate that is not from an OJ On-clause and is outer-joined by a - // full outer join cannot be assigned below that full outer join in the - // operator tree. - return canEvalFullOuterJoinedConjunct(e, tupleIds); - } - if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds); - - for (TupleId tid: tids) { - LOG.trace("canEval: checking tid " + tid.toString()); - TableRef rhsRef = getLastOjClause(tid); - // this is not outer-joined; ignore - if (rhsRef == null) continue; - // check whether the last join to outer-join 'tid' is materialized by tupleIds - boolean contains = tupleIds.containsAll(rhsRef.getAllTableRefIds()); - LOG.trace("canEval: contains=" + (contains ? "true " : "false ") - + Id.printIds(tupleIds) + " " + Id.printIds(rhsRef.getAllTableRefIds())); - if (!tupleIds.containsAll(rhsRef.getAllTableRefIds())) return false; - } - return true; - } - - /** - * Checks if a conjunct from the On-clause of an anti join can be evaluated in a node - * that materializes a given list of tuple ids. - */ - public boolean canEvalAntiJoinedConjunct(Expr e, List<TupleId> nodeTupleIds) { - TableRef antiJoinRef = getAntiJoinRef(e); - if (antiJoinRef == null) return true; - List<TupleId> tids = Lists.newArrayList(); - e.getIds(tids, null); - if (tids.size() > 1) { - return nodeTupleIds.containsAll(antiJoinRef.getAllTableRefIds()) - && antiJoinRef.getAllTableRefIds().containsAll(nodeTupleIds); - } - // A single tid conjunct that is anti-joined can be safely assigned to a - // node below the anti join that specified it. - return globalState_.semiJoinedTupleIds.containsKey(tids.get(0)); - } - - /** - * Returns a list of predicates that are fully bound by destTid. Predicates are derived - * by replacing the slots of a source predicate with slots of the destTid, if for each - * source slot there is an equivalent slot in destTid. - * In particular, the returned list contains predicates that must be evaluated - * at a join node (bound to outer-joined tuple) but can also be safely evaluated by a - * plan node materializing destTid. Such predicates are not marked as assigned. - * All other inferred predicates are marked as assigned if 'markAssigned' - * is true. This function returns bound predicates regardless of whether the source - * predicated have been assigned. It is up to the caller to decide if a bound predicate - * should actually be used. - * Destination slots in destTid can be ignored by passing them in ignoreSlots. - * TODO: exclude UDFs from predicate propagation? their overloaded variants could - * have very different semantics - */ - public ArrayList<Expr> getBoundPredicates(TupleId destTid, Set<SlotId> ignoreSlots, - boolean markAssigned) { - ArrayList<Expr> result = Lists.newArrayList(); - for (ExprId srcConjunctId: globalState_.singleTidConjuncts) { - Expr srcConjunct = globalState_.conjuncts.get(srcConjunctId); - if (srcConjunct instanceof SlotRef) continue; - Preconditions.checkNotNull(srcConjunct); - List<TupleId> srcTids = Lists.newArrayList(); - List<SlotId> srcSids = Lists.newArrayList(); - srcConjunct.getIds(srcTids, srcSids); - Preconditions.checkState(srcTids.size() == 1); - - // Generate slot-mappings to bind srcConjunct to destTid. - TupleId srcTid = srcTids.get(0); - List<List<SlotId>> allDestSids = - getEquivDestSlotIds(srcTid, srcSids, destTid, ignoreSlots); - if (allDestSids.isEmpty()) continue; - - // Indicates whether the source slots have equivalent slots that belong - // to an outer-joined tuple. - boolean hasOuterJoinedTuple = false; - for (SlotId srcSid: srcSids) { - if (hasOuterJoinedTuple(globalState_.equivClassBySlotId.get(srcSid))) { - hasOuterJoinedTuple = true; - break; - } - } - - // It is incorrect to propagate predicates into a plan subtree that is on the - // nullable side of an outer join if the predicate evaluates to true when all - // its referenced tuples are NULL. The check below is conservative because the - // outer-joined tuple making 'hasOuterJoinedTuple' true could be in a parent block - // of 'srcConjunct', in which case it is safe to propagate 'srcConjunct' within - // child blocks of the outer-joined parent block. - // TODO: Make the check precise by considering the blocks (analyzers) where the - // outer-joined tuples in the dest slot's equivalence classes appear - // relative to 'srcConjunct'. - if (hasOuterJoinedTuple && isTrueWithNullSlots(srcConjunct)) continue; - - // if srcConjunct comes out of an OJ's On clause, we need to make sure it's the - // same as the one that makes destTid nullable - // (otherwise srcConjunct needn't be true when destTid is set) - if (globalState_.ojClauseByConjunct.containsKey(srcConjunct.getId())) { - if (!globalState_.outerJoinedTupleIds.containsKey(destTid)) continue; - if (globalState_.ojClauseByConjunct.get(srcConjunct.getId()) - != globalState_.outerJoinedTupleIds.get(destTid)) { - continue; - } - // Do not propagate conjuncts from the on-clause of full-outer or anti-joins. - TableRef tblRef = globalState_.ojClauseByConjunct.get(srcConjunct.getId()); - if (tblRef.getJoinOp().isFullOuterJoin()) continue; - } - - // Conjuncts specified in the ON-clause of an anti-join must be evaluated at that - // join node. - if (isAntiJoinedConjunct(srcConjunct)) continue; - - // Generate predicates for all src-to-dest slot mappings. - for (List<SlotId> destSids: allDestSids) { - Preconditions.checkState(destSids.size() == srcSids.size()); - Expr p; - if (srcSids.containsAll(destSids)) { - p = srcConjunct; - } else { - ExprSubstitutionMap smap = new ExprSubstitutionMap(); - for (int i = 0; i < srcSids.size(); ++i) { - smap.put( - new SlotRef(globalState_.descTbl.getSlotDesc(srcSids.get(i))), - new SlotRef(globalState_.descTbl.getSlotDesc(destSids.get(i)))); - } - try { - p = srcConjunct.trySubstitute(smap, this, false); - } catch (ImpalaException exc) { - // not an executable predicate; ignore - continue; - } - // Unset the id because this bound predicate itself is not registered, and - // to prevent callers from inadvertently marking the srcConjunct as assigned. - p.setId(null); - if (p instanceof BinaryPredicate) ((BinaryPredicate) p).setIsInferred(); - LOG.trace("new pred: " + p.toSql() + " " + p.debugString()); - } - - if (markAssigned) { - // predicate assignment doesn't hold if: - // - the application against slotId doesn't transfer the value back to its - // originating slot - // - the original predicate is on an OJ'd table but doesn't originate from - // that table's OJ clause's ON clause (if it comes from anywhere but that - // ON clause, it needs to be evaluated directly by the join node that - // materializes the OJ'd table) - boolean reverseValueTransfer = true; - for (int i = 0; i < srcSids.size(); ++i) { - if (!hasValueTransfer(destSids.get(i), srcSids.get(i))) { - reverseValueTransfer = false; - break; - } - } - - // Check if either srcConjunct or the generated predicate needs to be evaluated - // at a join node (IMPALA-2018). - boolean evalByJoin = - (evalByJoin(srcConjunct) - && (globalState_.ojClauseByConjunct.get(srcConjunct.getId()) - != globalState_.outerJoinedTupleIds.get(srcTid))) - || (evalByJoin(p) - && (globalState_.ojClauseByConjunct.get(p.getId()) - != globalState_.outerJoinedTupleIds.get(destTid))); - - // mark all bound predicates including duplicate ones - if (reverseValueTransfer && !evalByJoin) markConjunctAssigned(srcConjunct); - } - - // check if we already created this predicate - if (!result.contains(p)) result.add(p); - } - } - return result; - } - - public ArrayList<Expr> getBoundPredicates(TupleId destTid) { - return getBoundPredicates(destTid, new HashSet<SlotId>(), true); - } - - /** - * Modifies the analysis state associated with the rhs table ref of an outer join - * to accomodate a join inversion that changes the rhs table ref of the join from - * oldRhsTbl to newRhsTbl. - * TODO: Revisit this function and how outer joins are inverted. This function - * should not be necessary because the semantics of an inverted outer join do - * not change. This function will naturally become obsolete when we can transform - * outer joins with otherPredicates into inner joins. - */ - public void invertOuterJoinState(TableRef oldRhsTbl, TableRef newRhsTbl) { - Preconditions.checkState(oldRhsTbl.getJoinOp().isOuterJoin()); - // Invert analysis state for an outer join. - List<ExprId> conjunctIds = - globalState_.conjunctsByOjClause.remove(oldRhsTbl.getId()); - if (conjunctIds != null) { - globalState_.conjunctsByOjClause.put(newRhsTbl.getId(), conjunctIds); - for (ExprId eid: conjunctIds) { - globalState_.ojClauseByConjunct.put(eid, newRhsTbl); - } - } else { - // An outer join is allowed not to have an On-clause if the rhs table ref is - // correlated or relative. - Preconditions.checkState(oldRhsTbl.isCorrelated() || oldRhsTbl.isRelative()); - } - for (Map.Entry<TupleId, TableRef> e: globalState_.outerJoinedTupleIds.entrySet()) { - if (e.getValue() == oldRhsTbl) e.setValue(newRhsTbl); - } - } - - /** - * For each equivalence class, adds/removes predicates from conjuncts such that it - * contains a minimum set of <lhsSlot> = <rhsSlot> predicates that establish the known - * equivalences between slots in lhsTids and rhsTids which must be disjoint. - * Preserves original conjuncts when possible. Assumes that predicates for establishing - * equivalences among slots in only lhsTids and only rhsTids have already been - * established. This function adds the remaining predicates to "connect" the disjoint - * equivalent slot sets of lhsTids and rhsTids. - * The intent of this function is to enable construction of a minimum spanning tree - * to cover the known slot equivalences. This function should be called for join - * nodes during plan generation to (1) remove redundant join predicates, and (2) - * establish equivalences among slots materialized at that join node. - * TODO: Consider optimizing for the cheapest minimum set of predicates. - * TODO: Consider caching the DisjointSet during plan generation instead of - * re-creating it here on every invocation. - */ - public <T extends Expr> void createEquivConjuncts(List<TupleId> lhsTids, - List<TupleId> rhsTids, List<T> conjuncts) { - Preconditions.checkState(Collections.disjoint(lhsTids, rhsTids)); - - // Equivalence classes only containing slots belonging to lhsTids. - Map<EquivalenceClassId, List<SlotId>> lhsEquivClasses = - getEquivClasses(lhsTids); - - // Equivalence classes only containing slots belonging to rhsTids. - Map<EquivalenceClassId, List<SlotId>> rhsEquivClasses = - getEquivClasses(rhsTids); - - // Maps from a slot id to its set of equivalent slots. Used to track equivalences - // that have been established by predicates assigned/generated to plan nodes - // materializing lhsTids as well as the given conjuncts. - DisjointSet<SlotId> partialEquivSlots = new DisjointSet<SlotId>(); - // Add the partial equivalences to the partialEquivSlots map. The equivalent-slot - // sets of slots from lhsTids are disjoint from those of slots from rhsTids. - // We need to 'connect' the disjoint slot sets by constructing a new predicate - // for each equivalence class (unless there is already one in 'conjuncts'). - for (List<SlotId> partialEquivClass: lhsEquivClasses.values()) { - partialEquivSlots.bulkUnion(partialEquivClass); - } - for (List<SlotId> partialEquivClass: rhsEquivClasses.values()) { - partialEquivSlots.bulkUnion(partialEquivClass); - } - - // Set of outer-joined slots referenced by conjuncts. - Set<SlotId> outerJoinedSlots = Sets.newHashSet(); - - // Update partialEquivSlots based on equality predicates in 'conjuncts'. Removes - // redundant conjuncts, unless they reference outer-joined slots (see below). - Iterator<T> conjunctIter = conjuncts.iterator(); - while (conjunctIter.hasNext()) { - Expr conjunct = conjunctIter.next(); - Pair<SlotId, SlotId> eqSlots = BinaryPredicate.getEqSlots(conjunct); - if (eqSlots == null) continue; - EquivalenceClassId firstEqClassId = getEquivClassId(eqSlots.first); - EquivalenceClassId secondEqClassId = getEquivClassId(eqSlots.second); - // slots may not be in the same eq class due to outer joins - if (!firstEqClassId.equals(secondEqClassId)) continue; - - // Retain an otherwise redundant predicate if it references a slot of an - // outer-joined tuple that is not already referenced by another join predicate - // to maintain that the rows must satisfy outer-joined-slot IS NOT NULL - // (otherwise NULL tuples from outer joins could survive). - // TODO: Consider better fixes for outer-joined slots: (1) Create IS NOT NULL - // predicates and place them at the lowest possible plan node. (2) Convert outer - // joins into inner joins (or full outer joins into left/right outer joins). - boolean filtersOuterJoinNulls = false; - if (isOuterJoined(eqSlots.first) - && lhsTids.contains(getTupleId(eqSlots.first)) - && !outerJoinedSlots.contains(eqSlots.first)) { - outerJoinedSlots.add(eqSlots.first); - filtersOuterJoinNulls = true; - } - if (isOuterJoined(eqSlots.second) - && lhsTids.contains(getTupleId(eqSlots.second)) - && !outerJoinedSlots.contains(eqSlots.second)) { - outerJoinedSlots.add(eqSlots.second); - filtersOuterJoinNulls = true; - } - // retain conjunct if it connects two formerly unconnected equiv classes or - // it is required for outer-join semantics - if (!partialEquivSlots.union(eqSlots.first, eqSlots.second) - && !filtersOuterJoinNulls) { - conjunctIter.remove(); - } - } - - // For each equivalence class, construct a new predicate to 'connect' the disjoint - // slot sets. - for (Map.Entry<EquivalenceClassId, List<SlotId>> rhsEquivClass: - rhsEquivClasses.entrySet()) { - List<SlotId> lhsSlots = lhsEquivClasses.get(rhsEquivClass.getKey()); - if (lhsSlots == null) continue; - List<SlotId> rhsSlots = rhsEquivClass.getValue(); - Preconditions.checkState(!lhsSlots.isEmpty() && !rhsSlots.isEmpty()); - - if (!partialEquivSlots.union(lhsSlots.get(0), rhsSlots.get(0))) continue; - // Do not create a new predicate from slots that are full outer joined because that - // predicate may be incorrectly assigned to a node below the associated full outer - // join. - if (isFullOuterJoined(lhsSlots.get(0)) || isFullOuterJoined(rhsSlots.get(0))) { - continue; - } - T newEqPred = (T) createInferredEqPred(lhsSlots.get(0), rhsSlots.get(0)); - if (!hasMutualValueTransfer(lhsSlots.get(0), rhsSlots.get(0))) continue; - conjuncts.add(newEqPred); - } - } - - /** - * For each equivalence class, adds/removes predicates from conjuncts such that - * it contains a minimum set of <slot> = <slot> predicates that establish - * the known equivalences between slots belonging to tid. Preserves original - * conjuncts when possible. - * The intent of this function is to enable construction of a minimum spanning tree - * to cover the known slot equivalences. This function should be called to add - * conjuncts to plan nodes that materialize a new tuple, e.g., scans and aggregations. - * Does not enforce equivalence between slots in ignoreSlots. Equivalences (if any) - * among slots in ignoreSlots are assumed to have already been enforced. - * TODO: Consider optimizing for the cheapest minimum set of predicates. - */ - public <T extends Expr> void createEquivConjuncts(TupleId tid, List<T> conjuncts, - Set<SlotId> ignoreSlots) { - // Maps from a slot id to its set of equivalent slots. Used to track equivalences - // that have been established by 'conjuncts' and the 'ignoredsSlots'. - DisjointSet<SlotId> partialEquivSlots = new DisjointSet<SlotId>(); - - // Treat ignored slots as already connected. Add the ignored slots at this point - // such that redundant conjuncts are removed. - partialEquivSlots.bulkUnion(ignoreSlots); - partialEquivSlots.checkConsistency(); - - // Update partialEquivSlots based on equality predicates in 'conjuncts'. Removes - // redundant conjuncts, unless they reference outer-joined slots (see below). - Iterator<T> conjunctIter = conjuncts.iterator(); - while (conjunctIter.hasNext()) { - Expr conjunct = conjunctIter.next(); - Pair<SlotId, SlotId> eqSlots = BinaryPredicate.getEqSlots(conjunct); - if (eqSlots == null) continue; - EquivalenceClassId firstEqClassId = getEquivClassId(eqSlots.first); - EquivalenceClassId secondEqClassId = getEquivClassId(eqSlots.second); - // slots may not be in the same eq class due to outer joins - if (!firstEqClassId.equals(secondEqClassId)) continue; - // update equivalences and remove redundant conjuncts - if (!partialEquivSlots.union(eqSlots.first, eqSlots.second)) conjunctIter.remove(); - } - // Suppose conjuncts had these predicates belonging to equivalence classes e1 and e2: - // e1: s1 = s2, s3 = s4, s3 = s5 - // e2: s10 = s11 - // The conjunctsEquivSlots should contain the following entries at this point: - // s1 -> {s1, s2} - // s2 -> {s1, s2} - // s3 -> {s3, s4, s5} - // s4 -> {s3, s4, s5} - // s5 -> {s3, s4, s5} - // s10 -> {s10, s11} - // s11 -> {s10, s11} - // Assuming e1 = {s1, s2, s3, s4, s5} we need to generate one additional equality - // predicate to "connect" {s1, s2} and {s3, s4, s5}. - - // These are the equivalences that need to be established by constructing conjuncts - // to form a minimum spanning tree. - Map<EquivalenceClassId, List<SlotId>> targetEquivClasses = - getEquivClasses(Lists.newArrayList(tid)); - for (Map.Entry<EquivalenceClassId, List<SlotId>> targetEquivClass: - targetEquivClasses.entrySet()) { - // Loop over all pairs of equivalent slots and merge their disjoint slots sets, - // creating missing equality predicates as necessary. - List<SlotId> slotIds = targetEquivClass.getValue(); - boolean done = false; - for (int i = 1; i < slotIds.size(); ++i) { - SlotId rhs = slotIds.get(i); - for (int j = 0; j < i; ++j) { - SlotId lhs = slotIds.get(j); - if (!partialEquivSlots.union(lhs, rhs)) continue; - if (!hasMutualValueTransfer(lhs, rhs)) continue; - conjuncts.add((T) createInferredEqPred(lhs, rhs)); - // Check for early termination. - if (partialEquivSlots.get(lhs).size() == slotIds.size()) { - done = true; - break; - } - } - if (done) break; - } - } - } - - public <T extends Expr> void createEquivConjuncts(TupleId tid, List<T> conjuncts) { - createEquivConjuncts(tid, conjuncts, new HashSet<SlotId>()); - } - - /** - * Returns a map of partial equivalence classes that only contains slot ids belonging - * to the given tuple ids. Only contains equivalence classes with more than one member. - */ - private Map<EquivalenceClassId, List<SlotId>> getEquivClasses(List<TupleId> tids) { - Map<EquivalenceClassId, List<SlotId>> result = Maps.newHashMap(); - for (TupleId tid: tids) { - for (SlotDescriptor slotDesc: getTupleDesc(tid).getSlots()) { - EquivalenceClassId eqClassId = getEquivClassId(slotDesc.getId()); - // Ignore equivalence classes that are empty or only have a single member. - if (globalState_.equivClassMembers.get(eqClassId).size() <= 1) continue; - List<SlotId> slotIds = result.get(eqClassId); - if (slotIds == null) { - slotIds = Lists.newArrayList(); - result.put(eqClassId, slotIds); - } - slotIds.add(slotDesc.getId()); - } - } - return result; - } - - /** - * Returns a list of slot mappings from srcTid to destTid for the purpose of predicate - * propagation. Each mapping assigns every slot in srcSids to an equivalent slot in - * destTid. Does not generate all possible mappings, but limits the results to - * useful and/or non-redundant mappings, i.e., those mappings that would improve - * the performance of query execution. - */ - private List<List<SlotId>> getEquivDestSlotIds(TupleId srcTid, List<SlotId> srcSids, - TupleId destTid, Set<SlotId> ignoreSlots) { - List<List<SlotId>> allDestSids = Lists.newArrayList(); - TupleDescriptor destTupleDesc = getTupleDesc(destTid); - if (srcSids.size() == 1) { - // Generate all mappings to propagate predicates of the form <slot> <op> <constant> - // to as many destination slots as possible. - // TODO: If srcTid == destTid we could limit the mapping to partition - // columns because mappings to non-partition columns do not provide - // a performance benefit. - SlotId srcSid = srcSids.get(0); - for (SlotDescriptor destSlot: destTupleDesc.getSlots()) { - if (ignoreSlots.contains(destSlot.getId())) continue; - if (hasValueTransfer(srcSid, destSlot.getId())) { - allDestSids.add(Lists.newArrayList(destSlot.getId())); - } - } - } else if (srcTid.equals(destTid)) { - // Multiple source slot ids and srcTid == destTid. Inter-tuple transfers are - // already expressed by the original conjuncts. Any mapping would be redundant. - // Still add srcSids to the result because we rely on getBoundPredicates() to - // include predicates that can safely be evaluated below an outer join, but must - // also be evaluated by the join itself (evalByJoin() == true). - allDestSids.add(srcSids); - } else { - // Multiple source slot ids and srcTid != destTid. Pick the first mapping - // where each srcSid is mapped to a different destSid to avoid generating - // redundant and/or trivial predicates. - // TODO: This approach is not guaranteed to find the best slot mapping - // (e.g., against partition columns) or all non-redundant mappings. - // The limitations are show in predicate-propagation.test. - List<SlotId> destSids = Lists.newArrayList(); - for (SlotId srcSid: srcSids) { - for (SlotDescriptor destSlot: destTupleDesc.getSlots()) { - if (ignoreSlots.contains(destSlot.getId())) continue; - if (hasValueTransfer(srcSid, destSlot.getId()) - && !destSids.contains(destSlot.getId())) { - destSids.add(destSlot.getId()); - break; - } - } - } - if (destSids.size() == srcSids.size()) allDestSids.add(destSids); - } - return allDestSids; - } - - /** - * Returns true if the equivalence class identified by 'eqClassId' contains - * a slot belonging to an outer-joined tuple. - */ - private boolean hasOuterJoinedTuple(EquivalenceClassId eqClassId) { - ArrayList<SlotId> eqClass = globalState_.equivClassMembers.get(eqClassId); - for (SlotId s: eqClass) { - if (isOuterJoined(getTupleId(s))) return true; - } - return false; - } - - /** - * Returns true if 'p' evaluates to true when all its referenced slots are NULL, - * false otherwise. - * TODO: Can we avoid dealing with the exceptions thrown by analysis and eval? - */ - public boolean isTrueWithNullSlots(Expr p) { - // Construct predicate with all SlotRefs substituted by NullLiterals. - List<SlotRef> slotRefs = Lists.newArrayList(); - p.collect(Predicates.instanceOf(SlotRef.class), slotRefs); - - // Map for substituting SlotRefs with NullLiterals. - ExprSubstitutionMap nullSmap = new ExprSubstitutionMap(); - for (SlotRef slotRef: slotRefs) { - // Preserve the original SlotRef type to ensure all substituted - // subexpressions in the predicate have the same return type and - // function signature as in the original predicate. - nullSmap.put(slotRef.clone(), NullLiteral.create(slotRef.getType())); - } - Expr nullTuplePred = p.substitute(nullSmap, this, false); - try { - return FeSupport.EvalPredicate(nullTuplePred, getQueryCtx()); - } catch (InternalException e) { - Preconditions.checkState(false, "Failed to evaluate generated predicate: " - + nullTuplePred.toSql() + "." + e.getMessage()); - } - return true; - } - - public TupleId getTupleId(SlotId slotId) { - return globalState_.descTbl.getSlotDesc(slotId).getParent().getId(); - } - - public void registerValueTransfer(SlotId id1, SlotId id2) { - globalState_.registeredValueTransfers.add(new Pair(id1, id2)); - } - - public boolean isOuterJoined(TupleId tid) { - return globalState_.outerJoinedTupleIds.containsKey(tid); - } - - public boolean isOuterJoined(SlotId sid) { - return isOuterJoined(getTupleId(sid)); - } - - public boolean isSemiJoined(TupleId tid) { - return globalState_.semiJoinedTupleIds.containsKey(tid); - } - - public boolean isAntiJoinedConjunct(Expr e) { - return getAntiJoinRef(e) != null; - } - - public TableRef getAntiJoinRef(Expr e) { - TableRef tblRef = globalState_.sjClauseByConjunct.get(e.getId()); - if (tblRef == null) return null; - return (tblRef.getJoinOp().isAntiJoin()) ? tblRef : null; - } - - public boolean isFullOuterJoined(TupleId tid) { - return globalState_.fullOuterJoinedTupleIds.containsKey(tid); - } - - public boolean isFullOuterJoined(SlotId sid) { - return isFullOuterJoined(getTupleId(sid)); - } - - public boolean isVisible(TupleId tid) { - return tid == visibleSemiJoinedTupleId_ || !isSemiJoined(tid); - } - - public boolean containsOuterJoinedTid(List<TupleId> tids) { - for (TupleId tid: tids) { - if (isOuterJoined(tid)) return true; - } - return false; - } - - /** - * Populate globalState.valueTransfer based on the registered equi-join predicates - * of the form <slotref> = <slotref>. - */ - public void computeEquivClasses() { - globalState_.valueTransferGraph = new ValueTransferGraph(); - globalState_.valueTransferGraph.computeValueTransfers(); - - // we start out by assigning each slot to its own equiv class - int numSlots = globalState_.descTbl.getMaxSlotId().asInt() + 1; - for (int i = 0; i < numSlots; ++i) { - EquivalenceClassId id = globalState_.equivClassIdGenerator.getNextId(); - globalState_.equivClassMembers.put(id, Lists.newArrayList(new SlotId(i))); - } - - // merge two classes if there is a value transfer between all members of the - // combined class; do this until there's nothing left to merge - boolean merged; - do { - merged = false; - for (Map.Entry<EquivalenceClassId, ArrayList<SlotId>> e1: - globalState_.equivClassMembers.entrySet()) { - for (Map.Entry<EquivalenceClassId, ArrayList<SlotId>> e2: - globalState_.equivClassMembers.entrySet()) { - if (e1.getKey() == e2.getKey()) continue; - List<SlotId> class1Members = e1.getValue(); - if (class1Members.isEmpty()) continue; - List<SlotId> class2Members = e2.getValue(); - if (class2Members.isEmpty()) continue; - - // check whether we can transfer values between all members - boolean canMerge = true; - for (SlotId class1Slot: class1Members) { - for (SlotId class2Slot: class2Members) { - if (!hasValueTransfer(class1Slot, class2Slot) - && !hasValueTransfer(class2Slot, class1Slot)) { - canMerge = false; - break; - } - } - if (!canMerge) break; - } - if (!canMerge) continue; - - // merge classes 1 and 2 by transfering 2 into 1 - class1Members.addAll(class2Members); - class2Members.clear(); - merged = true; - } - } - } while (merged); - - // populate equivC
<TRUNCATED>
