http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/org/apache/impala/analysis/Analyzer.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java new file mode 100644 index 0000000..a931489 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -0,0 +1,2932 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloudera.impala.analysis; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cloudera.impala.analysis.Path.PathType; +import com.cloudera.impala.authorization.AuthorizationConfig; +import com.cloudera.impala.authorization.Privilege; +import com.cloudera.impala.authorization.PrivilegeRequest; +import com.cloudera.impala.authorization.PrivilegeRequestBuilder; +import com.cloudera.impala.authorization.User; +import com.cloudera.impala.catalog.CatalogException; +import com.cloudera.impala.catalog.Column; +import com.cloudera.impala.catalog.DataSourceTable; +import com.cloudera.impala.catalog.DatabaseNotFoundException; +import com.cloudera.impala.catalog.Db; +import com.cloudera.impala.catalog.HBaseTable; +import com.cloudera.impala.catalog.HdfsTable; +import com.cloudera.impala.catalog.ImpaladCatalog; +import com.cloudera.impala.catalog.KuduTable; +import com.cloudera.impala.catalog.Table; +import com.cloudera.impala.catalog.TableLoadingException; +import com.cloudera.impala.catalog.Type; +import com.cloudera.impala.catalog.View; +import com.cloudera.impala.common.AnalysisException; +import com.cloudera.impala.common.Id; +import com.cloudera.impala.common.IdGenerator; +import com.cloudera.impala.common.ImpalaException; +import com.cloudera.impala.common.InternalException; +import com.cloudera.impala.common.Pair; +import com.cloudera.impala.common.PrintUtils; +import com.cloudera.impala.planner.PlanNode; +import com.cloudera.impala.service.FeSupport; +import com.cloudera.impala.thrift.TAccessEvent; +import com.cloudera.impala.thrift.TCatalogObjectType; +import com.cloudera.impala.thrift.TLineageGraph; +import com.cloudera.impala.thrift.TNetworkAddress; +import com.cloudera.impala.thrift.TQueryCtx; +import com.cloudera.impala.util.DisjointSet; +import com.cloudera.impala.util.EventSequence; +import com.cloudera.impala.util.ListMap; +import com.cloudera.impala.util.TSessionStateUtil; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicates; +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +/** + * Repository of analysis state for single select block. + * + * Conjuncts: + * Conjuncts are registered during analysis (registerConjuncts()) and assigned during the + * planning process (getUnassigned[Oj]Conjuncts()/isConjunctAssigned()/ + * markConjunctsAssigned()). + * All conjuncts are assigned a unique id when initially registered, and all registered + * conjuncts are referenced by their id (ie, there are no containers other than the one + * holding the referenced conjuncts), to make substitute() simple. + * + * Slot equivalence classes: + * Equivalence of individual slots is computed based on registered equality predicates; + * those predicates are either present directly in the query or are implied by the + * syntactic elements used in query (example: a GROUP BY clause has implied equality + * predicates between the grouping exprs and the grouping slots of the aggregation + * output tuple). + * Implied equality predicates are registered with createAuxEquivPredicate(); they are + * never assigned during plan generation. + * Also tracks each catalog object access, so authorization checks can be performed once + * analysis is complete. + * TODO: We often use the terms stmt/block/analyzer interchangeably, although they may + * have slightly different meanings (sometimes depending on the context). Use the terms + * more accurately and consistently here and elsewhere. + */ +public class Analyzer { + // Common analysis error messages + public final static String DB_DOES_NOT_EXIST_ERROR_MSG = "Database does not exist: "; + public final static String DB_ALREADY_EXISTS_ERROR_MSG = "Database already exists: "; + public final static String TBL_DOES_NOT_EXIST_ERROR_MSG = "Table does not exist: "; + public final static String TBL_ALREADY_EXISTS_ERROR_MSG = "Table already exists: "; + public final static String FN_DOES_NOT_EXIST_ERROR_MSG = "Function does not exist: "; + public final static String FN_ALREADY_EXISTS_ERROR_MSG = "Function already exists: "; + public final static String DATA_SRC_DOES_NOT_EXIST_ERROR_MSG = + "Data source does not exist: "; + public final static String DATA_SRC_ALREADY_EXISTS_ERROR_MSG = + "Data source already exists: "; + + private final static Logger LOG = LoggerFactory.getLogger(Analyzer.class); + + private final User user_; + + // Indicates whether this query block contains a straight join hint. + private boolean isStraightJoin_ = false; + + // Whether to use Hive's auto-generated column labels. + private boolean useHiveColLabels_ = false; + + // True if the corresponding select block has a limit and/or offset clause. + private boolean hasLimitOffsetClause_ = false; + + // Current depth of nested analyze() calls. Used for enforcing a + // maximum expr-tree depth. Needs to be manually maintained by the user + // of this Analyzer with incrementCallDepth() and decrementCallDepth(). + private int callDepth_ = 0; + + // Flag indicating if this analyzer instance belongs to a subquery. + private boolean isSubquery_ = false; + + // Flag indicating whether this analyzer belongs to a WITH clause view. + private boolean isWithClause_ = false; + + // If set, when privilege requests are registered they will use this error + // error message. + private String authErrorMsg_; + + // If false, privilege requests will not be registered in the analyzer. + private boolean enablePrivChecks_ = true; + + // By default, all registered semi-joined tuples are invisible, i.e., their slots + // cannot be referenced. If set, this semi-joined tuple is made visible. Such a tuple + // should only be made visible for analyzing the On-clause of its semi-join. + // In particular, if there are multiple semi-joins in the same query block, then the + // On-clause of any such semi-join is not allowed to reference other semi-joined tuples + // except its own. Therefore, only a single semi-joined tuple can be visible at a time. + private TupleId visibleSemiJoinedTupleId_ = null; + + public void setIsSubquery() { + isSubquery_ = true; + globalState_.containsSubquery = true; + } + public boolean isSubquery() { return isSubquery_; } + public boolean setHasPlanHints() { return globalState_.hasPlanHints = true; } + public boolean hasPlanHints() { return globalState_.hasPlanHints; } + public void setIsWithClause() { isWithClause_ = true; } + public boolean isWithClause() { return isWithClause_; } + + // state shared between all objects of an Analyzer tree + // TODO: Many maps here contain properties about tuples, e.g., whether + // a tuple is outer/semi joined, etc. Remove the maps in favor of making + // them properties of the tuple descriptor itself. + private static class GlobalState { + // TODO: Consider adding an "exec-env"-like global singleton that contains the + // catalog and authzConfig. + public final ImpaladCatalog catalog; + public final TQueryCtx queryCtx; + public final AuthorizationConfig authzConfig; + public final DescriptorTable descTbl = new DescriptorTable(); + public final IdGenerator<ExprId> conjunctIdGenerator = ExprId.createGenerator(); + public final ColumnLineageGraph lineageGraph; + + // True if we are analyzing an explain request. Should be set before starting + // analysis. + public boolean isExplain; + + // Indicates whether the query has plan hints. + public boolean hasPlanHints = false; + + // True if at least one of the analyzers belongs to a subquery. + public boolean containsSubquery = false; + + // all registered conjuncts (map from expr id to conjunct) + public final Map<ExprId, Expr> conjuncts = Maps.newHashMap(); + + // all registered conjuncts bound by a single tuple id; used in getBoundPredicates() + public final ArrayList<ExprId> singleTidConjuncts = Lists.newArrayList(); + + // eqJoinConjuncts[tid] contains all conjuncts of the form + // "<lhs> = <rhs>" in which either lhs or rhs is fully bound by tid + // and the other side is not bound by tid (ie, predicates that express equi-join + // conditions between two tablerefs). + // A predicate such as "t1.a = t2.b" has two entries, one for 't1' and + // another one for 't2'. + public final Map<TupleId, List<ExprId>> eqJoinConjuncts = Maps.newHashMap(); + + // set of conjuncts that have been assigned to some PlanNode + public Set<ExprId> assignedConjuncts = + Collections.newSetFromMap(new IdentityHashMap<ExprId, Boolean>()); + + // map from outer-joined tuple id, i.e., one that is nullable, + // to the last Join clause (represented by its rhs table ref) that outer-joined it + public final Map<TupleId, TableRef> outerJoinedTupleIds = Maps.newHashMap(); + + // Map of registered conjunct to the last full outer join (represented by its + // rhs table ref) that outer joined it. + public final Map<ExprId, TableRef> fullOuterJoinedConjuncts = Maps.newHashMap(); + + // Map of full-outer-joined tuple id to the last full outer join that outer-joined it + public final Map<TupleId, TableRef> fullOuterJoinedTupleIds = Maps.newHashMap(); + + // Map from semi-joined tuple id, i.e., one that is invisible outside the join's + // On-clause, to its Join clause (represented by its rhs table ref). An anti-join is + // a kind of semi-join, so anti-joined tuples are also registered here. + public final Map<TupleId, TableRef> semiJoinedTupleIds = Maps.newHashMap(); + + // Map from right-hand side table-ref id of an outer join to the list of + // conjuncts in its On clause. There is always an entry for an outer join, but the + // corresponding value could be an empty list. There is no entry for non-outer joins. + public final Map<TupleId, List<ExprId>> conjunctsByOjClause = Maps.newHashMap(); + + // map from registered conjunct to its containing outer join On clause (represented + // by its right-hand side table ref); this is limited to conjuncts that can only be + // correctly evaluated by the originating outer join, including constant conjuncts + public final Map<ExprId, TableRef> ojClauseByConjunct = Maps.newHashMap(); + + // map from registered conjunct to its containing semi join On clause (represented + // by its right-hand side table ref) + public final Map<ExprId, TableRef> sjClauseByConjunct = Maps.newHashMap(); + + // map from registered conjunct to its containing inner join On clause (represented + // by its right-hand side table ref) + public final Map<ExprId, TableRef> ijClauseByConjunct = Maps.newHashMap(); + + // map from slot id to the analyzer/block in which it was registered + public final Map<SlotId, Analyzer> blockBySlot = Maps.newHashMap(); + + // Tracks all privilege requests on catalog objects. + private final Set<PrivilegeRequest> privilegeReqs = Sets.newLinkedHashSet(); + + // List of PrivilegeRequest to custom authorization failure error message. + // Tracks all privilege requests on catalog objects that need a custom + // error message returned to avoid exposing existence of catalog objects. + private final List<Pair<PrivilegeRequest, String>> maskedPrivilegeReqs = + Lists.newArrayList(); + + // accesses to catalog objects + // TODO: This can be inferred from privilegeReqs. They should be coalesced. + public Set<TAccessEvent> accessEvents = Sets.newHashSet(); + + // Tracks all warnings (e.g. non-fatal errors) that were generated during analysis. + // These are passed to the backend and eventually propagated to the shell. Maps from + // warning message to the number of times that warning was logged (in order to avoid + // duplicating the same warning over and over). + public final LinkedHashMap<String, Integer> warnings = + new LinkedHashMap<String, Integer>(); + + public final IdGenerator<EquivalenceClassId> equivClassIdGenerator = + EquivalenceClassId.createGenerator(); + + // map from equivalence class id to the list of its member slots + private final Map<EquivalenceClassId, ArrayList<SlotId>> equivClassMembers = + Maps.newHashMap(); + + // map from slot id to its equivalence class id; + // only visible at the root Analyzer + private final Map<SlotId, EquivalenceClassId> equivClassBySlotId = Maps.newHashMap(); + + // map for each slot to the canonical slot of its equivalence class + private final ExprSubstitutionMap equivClassSmap = new ExprSubstitutionMap(); + + // represents the direct and transitive value transfers between slots + private ValueTransferGraph valueTransferGraph; + + private final List<Pair<SlotId, SlotId>> registeredValueTransfers = + Lists.newArrayList(); + + // Bidirectional map between Integer index and TNetworkAddress. + // Decreases the size of the scan range locations. + private final ListMap<TNetworkAddress> hostIndex = new ListMap<TNetworkAddress>(); + + // Timeline of important events in the planning process, used for debugging / + // profiling + private final EventSequence timeline = new EventSequence("Planner Timeline"); + + public GlobalState(ImpaladCatalog catalog, TQueryCtx queryCtx, + AuthorizationConfig authzConfig) { + this.catalog = catalog; + this.queryCtx = queryCtx; + this.authzConfig = authzConfig; + this.lineageGraph = new ColumnLineageGraph(); + } + }; + + private final GlobalState globalState_; + + public boolean containsSubquery() { return globalState_.containsSubquery; } + + /** + * Helper function to reset the global state information about the existence of + * subqueries. + */ + public void resetSubquery() { globalState_.containsSubquery = false; } + + // An analyzer stores analysis state for a single select block. A select block can be + // a top level select statement, or an inline view select block. + // ancestors contains the Analyzers of the enclosing select blocks of 'this' + // (ancestors[0] contains the immediate parent, etc.). + private final ArrayList<Analyzer> ancestors_; + + // map from lowercase table alias to a view definition in this analyzer's scope + private final Map<String, View> localViews_ = Maps.newHashMap(); + + // Map from lowercase table alias to descriptor. Tables without an explicit alias + // are assigned two implicit aliases: the unqualified and fully-qualified table name. + // Such tables have two entries pointing to the same descriptor. If an alias is + // ambiguous, then this map retains the first entry with that alias to simplify error + // checking (duplicate vs. ambiguous alias). + private final Map<String, TupleDescriptor> aliasMap_ = Maps.newHashMap(); + + // Map from tuple id to its corresponding table ref. + private final Map<TupleId, TableRef> tableRefMap_ = Maps.newHashMap(); + + // Set of lowercase ambiguous implicit table aliases. + private final Set<String> ambiguousAliases_ = Sets.newHashSet(); + + // Map from lowercase fully-qualified path to its slot descriptor. Only contains paths + // that have a scalar type as destination (see registerSlotRef()). + private final Map<String, SlotDescriptor> slotPathMap_ = Maps.newHashMap(); + + // Tracks the all tables/views found during analysis that were missing metadata. + private Set<TableName> missingTbls_ = new HashSet<TableName>(); + + // Indicates whether this analyzer/block is guaranteed to have an empty result set + // due to a limit 0 or constant conjunct evaluating to false. + private boolean hasEmptyResultSet_ = false; + + // Indicates whether the select-project-join (spj) portion of this query block + // is guaranteed to return an empty result set. Set due to a constant non-Having + // conjunct evaluating to false. + private boolean hasEmptySpjResultSet_ = false; + + public Analyzer(ImpaladCatalog catalog, TQueryCtx queryCtx, + AuthorizationConfig authzConfig) { + ancestors_ = Lists.newArrayList(); + globalState_ = new GlobalState(catalog, queryCtx, authzConfig); + user_ = new User(TSessionStateUtil.getEffectiveUser(queryCtx.session)); + } + + /** + * Analyzer constructor for nested select block. GlobalState is inherited from the + * parentAnalyzer. + */ + public Analyzer(Analyzer parentAnalyzer) { + this(parentAnalyzer, parentAnalyzer.globalState_); + } + + /** + * Analyzer constructor for nested select block with the specified global state. + */ + private Analyzer(Analyzer parentAnalyzer, GlobalState globalState) { + ancestors_ = Lists.newArrayList(parentAnalyzer); + ancestors_.addAll(parentAnalyzer.ancestors_); + globalState_ = globalState; + missingTbls_ = parentAnalyzer.missingTbls_; + user_ = parentAnalyzer.getUser(); + useHiveColLabels_ = parentAnalyzer.useHiveColLabels_; + authErrorMsg_ = parentAnalyzer.authErrorMsg_; + enablePrivChecks_ = parentAnalyzer.enablePrivChecks_; + isWithClause_ = parentAnalyzer.isWithClause_; + } + + /** + * Returns a new analyzer with the specified parent analyzer but with a new + * global state. + */ + public static Analyzer createWithNewGlobalState(Analyzer parentAnalyzer) { + GlobalState globalState = new GlobalState(parentAnalyzer.globalState_.catalog, + parentAnalyzer.getQueryCtx(), parentAnalyzer.getAuthzConfig()); + return new Analyzer(parentAnalyzer, globalState); + } + + /** + * Makes the given semi-joined tuple visible such that its slots can be referenced. + * If tid is null, makes the currently visible semi-joined tuple invisible again. + */ + public void setVisibleSemiJoinedTuple(TupleId tid) { + Preconditions.checkState(tid == null + || globalState_.semiJoinedTupleIds.containsKey(tid)); + Preconditions.checkState(tid == null || visibleSemiJoinedTupleId_ == null); + visibleSemiJoinedTupleId_ = tid; + } + + public Set<TableName> getMissingTbls() { return missingTbls_; } + public boolean hasMissingTbls() { return !missingTbls_.isEmpty(); } + public boolean hasAncestors() { return !ancestors_.isEmpty(); } + public Analyzer getParentAnalyzer() { + return hasAncestors() ? ancestors_.get(0) : null; + } + + /** + * Returns the analyzer that has an entry for the given tuple descriptor in its + * tableRefMap, or null if no such analyzer could be found. Searches the hierarchy + * of analyzers bottom-up. + */ + public Analyzer findAnalyzer(TupleId tid) { + if (tableRefMap_.containsKey(tid)) return this; + if (hasAncestors()) return getParentAnalyzer().findAnalyzer(tid); + return null; + } + + /** + * Returns a list of each warning logged, indicating if it was logged more than once. + */ + public List<String> getWarnings() { + List<String> result = new ArrayList<String>(); + for (Map.Entry<String, Integer> e : globalState_.warnings.entrySet()) { + String error = e.getKey(); + int count = e.getValue(); + Preconditions.checkState(count > 0); + if (count == 1) { + result.add(error); + } else { + result.add(error + " (" + count + " warnings like this)"); + } + } + return result; + } + + /** + * Registers a local view definition with this analyzer. Throws an exception if a view + * definition with the same alias has already been registered or if the number of + * explicit column labels is greater than the number of columns in the view statement. + */ + public void registerLocalView(View view) throws AnalysisException { + Preconditions.checkState(view.isLocalView()); + if (view.hasColLabels()) { + List<String> viewLabels = view.getColLabels(); + List<String> queryStmtLabels = view.getQueryStmt().getColLabels(); + if (viewLabels.size() > queryStmtLabels.size()) { + throw new AnalysisException("WITH-clause view '" + view.getName() + + "' returns " + queryStmtLabels.size() + " columns, but " + + viewLabels.size() + " labels were specified. The number of column " + + "labels must be smaller or equal to the number of returned columns."); + } + } + if (localViews_.put(view.getName().toLowerCase(), view) != null) { + throw new AnalysisException( + String.format("Duplicate table alias: '%s'", view.getName())); + } + } + + /** + * Creates an returns an empty TupleDescriptor for the given table ref and registers + * it against all its legal aliases. For tables refs with an explicit alias, only the + * explicit alias is legal. For tables refs with no explicit alias, the fully-qualified + * and unqualified table names are legal aliases. Column references against unqualified + * implicit aliases can be ambiguous, therefore, we register such ambiguous aliases + * here. Requires that all views have been substituted. + * Throws if an existing explicit alias or implicit fully-qualified alias + * has already been registered for another table ref. + */ + public TupleDescriptor registerTableRef(TableRef ref) throws AnalysisException { + String uniqueAlias = ref.getUniqueAlias(); + if (aliasMap_.containsKey(uniqueAlias)) { + throw new AnalysisException("Duplicate table alias: '" + uniqueAlias + "'"); + } + + // If ref has no explicit alias, then the unqualified and the fully-qualified table + // names are legal implicit aliases. Column references against unqualified implicit + // aliases can be ambiguous, therefore, we register such ambiguous aliases here. + String unqualifiedAlias = null; + String[] aliases = ref.getAliases(); + if (aliases.length > 1) { + unqualifiedAlias = aliases[1]; + TupleDescriptor tupleDesc = aliasMap_.get(unqualifiedAlias); + if (tupleDesc != null) { + if (tupleDesc.hasExplicitAlias()) { + throw new AnalysisException( + "Duplicate table alias: '" + unqualifiedAlias + "'"); + } else { + ambiguousAliases_.add(unqualifiedAlias); + } + } + } + + // Delegate creation of the tuple descriptor to the concrete table ref. + TupleDescriptor result = ref.createTupleDescriptor(this); + result.setAliases(aliases, ref.hasExplicitAlias()); + // Register all legal aliases. + for (String alias: aliases) { + aliasMap_.put(alias, result); + } + tableRefMap_.put(result.getId(), ref); + return result; + } + + /** + * Resolves the given TableRef into a concrete BaseTableRef, ViewRef or + * CollectionTableRef. Returns the new resolved table ref or the given table + * ref if it is already resolved. + * Registers privilege requests and throws an AnalysisException if the tableRef's + * path could not be resolved. The privilege requests are added to ensure that + * an AuthorizationException is preferred over an AnalysisException so as not to + * accidentally reveal the non-existence of tables/databases. + */ + public TableRef resolveTableRef(TableRef tableRef) throws AnalysisException { + // Return the table if it is already resolved. + if (tableRef.isResolved()) return tableRef; + // Try to find a matching local view. + if (tableRef.getPath().size() == 1) { + // Searches the hierarchy of analyzers bottom-up for a registered local view with + // a matching alias. + String viewAlias = tableRef.getPath().get(0).toLowerCase(); + Analyzer analyzer = this; + do { + View localView = analyzer.localViews_.get(viewAlias); + if (localView != null) return new InlineViewRef(localView, tableRef); + analyzer = (analyzer.ancestors_.isEmpty() ? null : analyzer.ancestors_.get(0)); + } while (analyzer != null); + } + + // Resolve the table ref's path and determine what resolved table ref + // to replace it with. + List<String> rawPath = tableRef.getPath(); + Path resolvedPath = null; + try { + resolvedPath = resolvePath(tableRef.getPath(), PathType.TABLE_REF); + } catch (AnalysisException e) { + if (!hasMissingTbls()) { + // Register privilege requests to prefer reporting an authorization error over + // an analysis error. We should not accidentally reveal the non-existence of a + // table/database if the user is not authorized. + if (rawPath.size() > 1) { + registerPrivReq(new PrivilegeRequestBuilder() + .onTable(rawPath.get(0), rawPath.get(1)) + .allOf(Privilege.SELECT).toRequest()); + } + registerPrivReq(new PrivilegeRequestBuilder() + .onTable(getDefaultDb(), rawPath.get(0)) + .allOf(Privilege.SELECT).toRequest()); + } + throw e; + } catch (TableLoadingException e) { + throw new AnalysisException(String.format( + "Failed to load metadata for table: '%s'", Joiner.on(".").join(rawPath)), e); + } + + Preconditions.checkNotNull(resolvedPath); + if (resolvedPath.destTable() != null) { + Table table = resolvedPath.destTable(); + Preconditions.checkNotNull(table); + if (table instanceof View) return new InlineViewRef((View) table, tableRef); + // The table must be a base table. + Preconditions.checkState(table instanceof HdfsTable || + table instanceof KuduTable || + table instanceof HBaseTable || + table instanceof DataSourceTable); + return new BaseTableRef(tableRef, resolvedPath); + } else { + return new CollectionTableRef(tableRef, resolvedPath); + } + } + + /** + * Register conjuncts that are outer joined by a full outer join. For a given + * predicate, we record the last full outer join that outer-joined any of its + * tuple ids. We need this additional information because full-outer joins obey + * different rules with respect to predicate pushdown compared to left and right + * outer joins. + */ + public void registerFullOuterJoinedConjunct(Expr e) { + Preconditions.checkState( + !globalState_.fullOuterJoinedConjuncts.containsKey(e.getId())); + List<TupleId> tids = Lists.newArrayList(); + e.getIds(tids, null); + for (TupleId tid: tids) { + if (!globalState_.fullOuterJoinedTupleIds.containsKey(tid)) continue; + TableRef currentOuterJoin = globalState_.fullOuterJoinedTupleIds.get(tid); + globalState_.fullOuterJoinedConjuncts.put(e.getId(), currentOuterJoin); + break; + } + LOG.trace("registerFullOuterJoinedConjunct: " + + globalState_.fullOuterJoinedConjuncts.toString()); + } + + /** + * Register tids as being outer-joined by a full outer join clause represented by + * rhsRef. + */ + public void registerFullOuterJoinedTids(List<TupleId> tids, TableRef rhsRef) { + for (TupleId tid: tids) { + globalState_.fullOuterJoinedTupleIds.put(tid, rhsRef); + } + LOG.trace("registerFullOuterJoinedTids: " + + globalState_.fullOuterJoinedTupleIds.toString()); + } + + /** + * Register tids as being outer-joined by Join clause represented by rhsRef. + */ + public void registerOuterJoinedTids(List<TupleId> tids, TableRef rhsRef) { + for (TupleId tid: tids) { + globalState_.outerJoinedTupleIds.put(tid, rhsRef); + } + LOG.trace("registerOuterJoinedTids: " + globalState_.outerJoinedTupleIds.toString()); + } + + /** + * Register the given tuple id as being the invisible side of a semi-join. + */ + public void registerSemiJoinedTid(TupleId tid, TableRef rhsRef) { + globalState_.semiJoinedTupleIds.put(tid, rhsRef); + } + + /** + * Returns the descriptor of the given explicit or implicit table alias or null if no + * such alias has been registered. + * Throws an AnalysisException if the given table alias is ambiguous. + */ + public TupleDescriptor getDescriptor(String tableAlias) throws AnalysisException { + String lookupAlias = tableAlias.toLowerCase(); + if (ambiguousAliases_.contains(lookupAlias)) { + throw new AnalysisException(String.format( + "Unqualified table alias is ambiguous: '%s'", tableAlias)); + } + return aliasMap_.get(lookupAlias); + } + + public TupleDescriptor getTupleDesc(TupleId id) { + return globalState_.descTbl.getTupleDesc(id); + } + + public SlotDescriptor getSlotDesc(SlotId id) { + return globalState_.descTbl.getSlotDesc(id); + } + + public TableRef getTableRef(TupleId tid) { return tableRefMap_.get(tid); } + + /** + * Given a "table alias"."column alias", return the SlotDescriptor + */ + public SlotDescriptor getSlotDescriptor(String qualifiedColumnName) { + return slotPathMap_.get(qualifiedColumnName); + } + + /** + * Return true if this analyzer has no ancestors. (i.e. false for the analyzer created + * for inline views/ union operands, etc.) + */ + public boolean isRootAnalyzer() { return ancestors_.isEmpty(); } + + /** + * Returns true if the query block corresponding to this analyzer is guaranteed + * to return an empty result set, e.g., due to a limit 0 or a constant predicate + * that evaluates to false. + */ + public boolean hasEmptyResultSet() { return hasEmptyResultSet_; } + public void setHasEmptyResultSet() { hasEmptyResultSet_ = true; } + + /** + * Returns true if the select-project-join portion of this query block returns + * an empty result set. + */ + public boolean hasEmptySpjResultSet() { return hasEmptySpjResultSet_; } + + /** + * Resolves the given raw path according to the given path type, as follows: + * SLOT_REF and STAR: Resolves the path in the context of all registered tuple + * descriptors, considering qualified as well as unqualified matches. + * TABLE_REF: Resolves the path in the context of all registered tuple descriptors + * only considering qualified matches, as well as catalog tables/views. + * + * Path resolution: + * Regardless of the path type, a raw path can have multiple successful resolutions. + * A resolution is said to be 'successful' if all raw path elements can be mapped + * to a corresponding alias/table/column/field. + * + * Path legality: + * A successful resolution may be illegal with respect to the path type, e.g., + * a SlotRef cannot reference intermediate collection types, etc. + * + * Path ambiguity: + * A raw path is ambiguous if it has multiple legal resolutions. Otherwise, + * the ambiguity is resolved in favor of the legal resolution. + * + * Returns the single legal path resolution if it exists. + * Throws if there was no legal resolution or if the path is ambiguous. + */ + public Path resolvePath(List<String> rawPath, PathType pathType) + throws AnalysisException, TableLoadingException { + // We only allow correlated references in predicates of a subquery. + boolean resolveInAncestors = false; + if (pathType == PathType.TABLE_REF || pathType == PathType.ANY) { + resolveInAncestors = true; + } else if (pathType == PathType.SLOT_REF) { + resolveInAncestors = isSubquery_; + } + // Convert all path elements to lower case. + ArrayList<String> lcRawPath = Lists.newArrayListWithCapacity(rawPath.size()); + for (String s: rawPath) lcRawPath.add(s.toLowerCase()); + return resolvePath(lcRawPath, pathType, resolveInAncestors); + } + + private Path resolvePath(List<String> rawPath, PathType pathType, + boolean resolveInAncestors) throws AnalysisException, TableLoadingException { + // List of all candidate paths with different roots. Paths in this list are initially + // unresolved and may be illegal with respect to the pathType. + List<Path> candidates = getTupleDescPaths(rawPath); + + LinkedList<String> errors = Lists.newLinkedList(); + if (pathType == PathType.SLOT_REF || pathType == PathType.STAR) { + // Paths rooted at all of the unique registered tuple descriptors. + for (TableRef tblRef: tableRefMap_.values()) { + candidates.add(new Path(tblRef.getDesc(), rawPath)); + } + } else { + // Always prefer table ref paths rooted at a registered tuples descriptor. + Preconditions.checkState(pathType == PathType.TABLE_REF || + pathType == PathType.ANY); + Path result = resolvePaths(rawPath, candidates, pathType, errors); + if (result != null) return result; + candidates.clear(); + + // Add paths rooted at a table with an unqualified and fully-qualified table name. + int end = Math.min(2, rawPath.size()); + for (int tblNameIdx = 0; tblNameIdx < end; ++tblNameIdx) { + String dbName = (tblNameIdx == 0) ? getDefaultDb() : rawPath.get(0); + String tblName = rawPath.get(tblNameIdx); + Table tbl = null; + try { + tbl = getTable(dbName, tblName); + } catch (AnalysisException e) { + if (hasMissingTbls()) throw e; + // Ignore other exceptions to allow path resolution to continue. + } + if (tbl != null) { + candidates.add(new Path(tbl, rawPath.subList(tblNameIdx + 1, rawPath.size()))); + } + } + } + + Path result = resolvePaths(rawPath, candidates, pathType, errors); + if (result == null && resolveInAncestors && hasAncestors()) { + result = getParentAnalyzer().resolvePath(rawPath, pathType, true); + } + if (result == null) { + Preconditions.checkState(!errors.isEmpty()); + throw new AnalysisException(errors.getFirst()); + } + return result; + } + + /** + * Returns a list of unresolved Paths that are rooted at a registered tuple + * descriptor matching a prefix of the given raw path. + */ + public List<Path> getTupleDescPaths(List<String> rawPath) + throws AnalysisException { + ArrayList<Path> result = Lists.newArrayList(); + + // Path rooted at a tuple desc with an explicit or implicit unqualified alias. + TupleDescriptor rootDesc = getDescriptor(rawPath.get(0)); + if (rootDesc != null) { + result.add(new Path(rootDesc, rawPath.subList(1, rawPath.size()))); + } + + // Path rooted at a tuple desc with an implicit qualified alias. + if (rawPath.size() > 1) { + rootDesc = getDescriptor(rawPath.get(0) + "." + rawPath.get(1)); + if (rootDesc != null) { + result.add(new Path(rootDesc, rawPath.subList(2, rawPath.size()))); + } + } + return result; + } + + /** + * Resolves the given paths and checks them for legality and ambiguity. Returns the + * single legal path resolution if it exists, null otherwise. + * Populates 'errors' with a a prioritized list of error messages starting with the + * most relevant one. The list contains at least one error message if null is returned. + */ + private Path resolvePaths(List<String> rawPath, List<Path> paths, PathType pathType, + LinkedList<String> errors) { + // For generating error messages. + String pathTypeStr = null; + String pathStr = Joiner.on(".").join(rawPath); + if (pathType == PathType.SLOT_REF) { + pathTypeStr = "Column/field reference"; + } else if (pathType == PathType.TABLE_REF) { + pathTypeStr = "Table reference"; + } else if (pathType == PathType.ANY) { + pathTypeStr = "Path"; + } else { + Preconditions.checkState(pathType == PathType.STAR); + pathTypeStr = "Star expression"; + pathStr += ".*"; + } + + List<Path> legalPaths = Lists.newArrayList(); + for (Path p: paths) { + if (!p.resolve()) continue; + + // Check legality of the resolved path. + if (p.isRootedAtTuple() && !isVisible(p.getRootDesc().getId())) { + errors.addLast(String.format( + "Illegal %s '%s' of semi-/anti-joined table '%s'", + pathTypeStr.toLowerCase(), pathStr, p.getRootDesc().getAlias())); + continue; + } + switch (pathType) { + // Illegal cases: + // 1. Destination type is not a collection. + case TABLE_REF: { + if (!p.destType().isCollectionType()) { + errors.addFirst(String.format( + "Illegal table reference to non-collection type: '%s'\n" + + "Path resolved to type: %s", pathStr, p.destType().toSql())); + continue; + } + break; + } + case SLOT_REF: { + // Illegal cases: + // 1. Path contains an intermediate collection reference. + // 2. Destination of the path is a catalog table or a registered alias. + if (p.hasNonDestCollection()) { + errors.addFirst(String.format( + "Illegal column/field reference '%s' with intermediate " + + "collection '%s' of type '%s'", + pathStr, p.getFirstCollectionName(), + p.getFirstCollectionType().toSql())); + continue; + } + // Error should be "Could not resolve...". No need to add it here explicitly. + if (p.getMatchedTypes().isEmpty()) continue; + break; + } + // Illegal cases: + // 1. Path contains an intermediate collection reference. + // 2. Destination type of the path is not a struct. + case STAR: { + if (p.hasNonDestCollection()) { + errors.addFirst(String.format( + "Illegal star expression '%s' with intermediate " + + "collection '%s' of type '%s'", + pathStr, p.getFirstCollectionName(), + p.getFirstCollectionType().toSql())); + continue; + } + if (!p.destType().isStructType()) { + errors.addFirst(String.format( + "Cannot expand star in '%s' because path '%s' resolved to type '%s'." + + "\nStar expansion is only valid for paths to a struct type.", + pathStr, Joiner.on(".").join(rawPath), p.destType().toSql())); + continue; + } + break; + } + case ANY: { + // Any path is valid. + break; + } + } + legalPaths.add(p); + } + + if (legalPaths.size() > 1) { + errors.addFirst(String.format("%s is ambiguous: '%s'", + pathTypeStr, pathStr)); + return null; + } + if (legalPaths.isEmpty()) { + if (errors.isEmpty()) { + errors.addFirst(String.format("Could not resolve %s: '%s'", + pathTypeStr.toLowerCase(), pathStr)); + } + return null; + } + return legalPaths.get(0); + } + + /** + * Returns an existing or new SlotDescriptor for the given path. Always returns + * a new empty SlotDescriptor for paths with a collection-typed destination. + */ + public SlotDescriptor registerSlotRef(Path slotPath) throws AnalysisException { + Preconditions.checkState(slotPath.isRootedAtTuple()); + // Always register a new slot descriptor for collection types. The BE currently + // relies on this behavior for setting unnested collection slots to NULL. + if (slotPath.destType().isCollectionType()) { + SlotDescriptor result = addSlotDescriptor(slotPath.getRootDesc()); + result.setPath(slotPath); + registerColumnPrivReq(result); + return result; + } + // SlotRefs with a scalar type are registered against the slot's + // fully-qualified lowercase path. + String key = slotPath.toString(); + SlotDescriptor existingSlotDesc = slotPathMap_.get(key); + if (existingSlotDesc != null) return existingSlotDesc; + SlotDescriptor result = addSlotDescriptor(slotPath.getRootDesc()); + result.setPath(slotPath); + slotPathMap_.put(key, result); + registerColumnPrivReq(result); + return result; + } + + /** + * Registers a column-level privilege request if 'slotDesc' directly or indirectly + * refers to a table column. It handles both scalar and complex-typed columns. + */ + private void registerColumnPrivReq(SlotDescriptor slotDesc) { + Preconditions.checkNotNull(slotDesc.getPath()); + TupleDescriptor tupleDesc = slotDesc.getParent(); + if (tupleDesc.isMaterialized() && tupleDesc.getTable() != null) { + Column column = tupleDesc.getTable().getColumn( + slotDesc.getPath().getRawPath().get(0)); + if (column != null) { + registerPrivReq(new PrivilegeRequestBuilder(). + allOf(Privilege.SELECT).onColumn(tupleDesc.getTableName().getDb(), + tupleDesc.getTableName().getTbl(), column.getName()).toRequest()); + } + } + } + + /** + * Creates a new slot descriptor and related state in globalState. + */ + public SlotDescriptor addSlotDescriptor(TupleDescriptor tupleDesc) { + SlotDescriptor result = globalState_.descTbl.addSlotDescriptor(tupleDesc); + globalState_.blockBySlot.put(result.getId(), this); + return result; + } + + /** + * Adds a new slot descriptor in tupleDesc that is identical to srcSlotDesc + * except for the path and slot id. + */ + public SlotDescriptor copySlotDescriptor(SlotDescriptor srcSlotDesc, + TupleDescriptor tupleDesc) { + SlotDescriptor result = globalState_.descTbl.addSlotDescriptor(tupleDesc); + globalState_.blockBySlot.put(result.getId(), this); + result.setSourceExprs(srcSlotDesc.getSourceExprs()); + result.setLabel(srcSlotDesc.getLabel()); + result.setStats(srcSlotDesc.getStats()); + result.setType(srcSlotDesc.getType()); + result.setItemTupleDesc(srcSlotDesc.getItemTupleDesc()); + return result; + } + + /** + * Register all conjuncts in a list of predicates as Having-clause conjuncts. + */ + public void registerConjuncts(List<Expr> l) throws AnalysisException { + for (Expr e: l) { + registerConjuncts(e, true); + } + } + + /** + * Register all conjuncts in 'conjuncts' that make up the On-clause of the given + * right-hand side of a join. Assigns each conjunct a unique id. If rhsRef is + * the right-hand side of an outer join, then the conjuncts conjuncts are + * registered such that they can only be evaluated by the node implementing that + * join. + */ + public void registerOnClauseConjuncts(List<Expr> conjuncts, TableRef rhsRef) + throws AnalysisException { + Preconditions.checkNotNull(rhsRef); + Preconditions.checkNotNull(conjuncts); + List<ExprId> ojConjuncts = null; + if (rhsRef.getJoinOp().isOuterJoin()) { + ojConjuncts = globalState_.conjunctsByOjClause.get(rhsRef.getId()); + if (ojConjuncts == null) { + ojConjuncts = Lists.newArrayList(); + globalState_.conjunctsByOjClause.put(rhsRef.getId(), ojConjuncts); + } + } + for (Expr conjunct: conjuncts) { + conjunct.setIsOnClauseConjunct(true); + registerConjunct(conjunct); + if (rhsRef.getJoinOp().isOuterJoin()) { + globalState_.ojClauseByConjunct.put(conjunct.getId(), rhsRef); + ojConjuncts.add(conjunct.getId()); + } + if (rhsRef.getJoinOp().isSemiJoin()) { + globalState_.sjClauseByConjunct.put(conjunct.getId(), rhsRef); + } + if (rhsRef.getJoinOp().isInnerJoin()) { + globalState_.ijClauseByConjunct.put(conjunct.getId(), rhsRef); + } + markConstantConjunct(conjunct, false); + } + } + + /** + * Register all conjuncts that make up 'e'. If fromHavingClause is false, this conjunct + * is assumed to originate from a WHERE or ON clause. + */ + public void registerConjuncts(Expr e, boolean fromHavingClause) + throws AnalysisException { + for (Expr conjunct: e.getConjuncts()) { + registerConjunct(conjunct); + markConstantConjunct(conjunct, fromHavingClause); + } + } + + /** + * If the given conjunct is a constant non-oj conjunct, marks it as assigned, and + * evaluates the conjunct. If the conjunct evaluates to false, marks this query + * block as having an empty result set or as having an empty select-project-join + * portion, if fromHavingClause is true or false, respectively. + * No-op if the conjunct is not constant or is outer joined. + * Throws an AnalysisException if there is an error evaluating `conjunct` + */ + private void markConstantConjunct(Expr conjunct, boolean fromHavingClause) + throws AnalysisException { + if (!conjunct.isConstant() || isOjConjunct(conjunct)) return; + markConjunctAssigned(conjunct); + if ((!fromHavingClause && !hasEmptySpjResultSet_) + || (fromHavingClause && !hasEmptyResultSet_)) { + try { + if (!FeSupport.EvalPredicate(conjunct, globalState_.queryCtx)) { + if (fromHavingClause) { + hasEmptyResultSet_ = true; + } else { + hasEmptySpjResultSet_ = true; + } + } + } catch (InternalException ex) { + throw new AnalysisException("Error evaluating \"" + conjunct.toSql() + "\"", ex); + } + } + } + + /** + * Assigns a new id to the given conjunct and registers it with all tuple and slot ids + * it references and with the global conjunct list. + */ + private void registerConjunct(Expr e) { + // always generate a new expr id; this might be a cloned conjunct that already + // has the id of its origin set + e.setId(globalState_.conjunctIdGenerator.getNextId()); + globalState_.conjuncts.put(e.getId(), e); + + ArrayList<TupleId> tupleIds = Lists.newArrayList(); + ArrayList<SlotId> slotIds = Lists.newArrayList(); + e.getIds(tupleIds, slotIds); + registerFullOuterJoinedConjunct(e); + + // register single tid conjuncts + if (tupleIds.size() == 1) globalState_.singleTidConjuncts.add(e.getId()); + + LOG.trace("register tuple/slotConjunct: " + Integer.toString(e.getId().asInt()) + + " " + e.toSql() + " " + e.debugString()); + + if (!(e instanceof BinaryPredicate)) return; + BinaryPredicate binaryPred = (BinaryPredicate) e; + + // check whether this is an equi-join predicate, ie, something of the + // form <expr1> = <expr2> where at least one of the exprs is bound by + // exactly one tuple id + if (binaryPred.getOp() != BinaryPredicate.Operator.EQ && + binaryPred.getOp() != BinaryPredicate.Operator.NULL_MATCHING_EQ && + binaryPred.getOp() != BinaryPredicate.Operator.NOT_DISTINCT) { + return; + } + // the binary predicate must refer to at least two tuples to be an eqJoinConjunct + if (tupleIds.size() < 2) return; + + // examine children and update eqJoinConjuncts + for (int i = 0; i < 2; ++i) { + tupleIds = Lists.newArrayList(); + binaryPred.getChild(i).getIds(tupleIds, null); + if (tupleIds.size() == 1) { + if (!globalState_.eqJoinConjuncts.containsKey(tupleIds.get(0))) { + List<ExprId> conjunctIds = Lists.newArrayList(); + conjunctIds.add(e.getId()); + globalState_.eqJoinConjuncts.put(tupleIds.get(0), conjunctIds); + } else { + globalState_.eqJoinConjuncts.get(tupleIds.get(0)).add(e.getId()); + } + binaryPred.setIsEqJoinConjunct(true); + LOG.trace("register eqJoinConjunct: " + Integer.toString(e.getId().asInt())); + } + } + } + + /** + * Create and register an auxiliary predicate to express an equivalence between two + * exprs (BinaryPredicate with EQ); this predicate does not need to be assigned, but + * it's used for equivalence class computation. + * Does nothing if the lhs or rhs expr are NULL. Registering an equivalence with NULL + * would be incorrect, because <expr> = NULL is false (even NULL = NULL). + */ + public void createAuxEquivPredicate(Expr lhs, Expr rhs) { + // Check the expr type as well as the class because NullLiteral could have been + // implicitly cast to a type different than NULL. + if (lhs instanceof NullLiteral || rhs instanceof NullLiteral || + lhs.getType().isNull() || rhs.getType().isNull()) { + return; + } + // create an eq predicate between lhs and rhs + BinaryPredicate p = new BinaryPredicate(BinaryPredicate.Operator.EQ, lhs, rhs); + p.setIsAuxExpr(); + LOG.trace("register equiv predicate: " + p.toSql() + " " + p.debugString()); + registerConjunct(p); + } + + /** + * Creates an inferred equality predicate between the given slots. + */ + public BinaryPredicate createInferredEqPred(SlotId lhsSlotId, SlotId rhsSlotId) { + BinaryPredicate pred = new BinaryPredicate(BinaryPredicate.Operator.EQ, + new SlotRef(globalState_.descTbl.getSlotDesc(lhsSlotId)), + new SlotRef(globalState_.descTbl.getSlotDesc(rhsSlotId))); + pred.setIsInferred(); + // create casts if needed + pred.analyzeNoThrow(this); + return pred; + } + + /** + * Return all unassigned non-constant registered conjuncts that are fully bound by + * given list of tuple ids. If 'inclOjConjuncts' is false, conjuncts tied to an + * Outer Join clause are excluded. + */ + public List<Expr> getUnassignedConjuncts( + List<TupleId> tupleIds, boolean inclOjConjuncts) { + LOG.trace("getUnassignedConjuncts for " + Id.printIds(tupleIds)); + List<Expr> result = Lists.newArrayList(); + for (Expr e: globalState_.conjuncts.values()) { + if (e.isBoundByTupleIds(tupleIds) + && !e.isAuxExpr() + && !globalState_.assignedConjuncts.contains(e.getId()) + && ((inclOjConjuncts && !e.isConstant()) + || !globalState_.ojClauseByConjunct.containsKey(e.getId()))) { + result.add(e); + LOG.trace("getUnassignedConjunct: " + e.toSql()); + } + } + return result; + } + + public boolean isOjConjunct(Expr e) { + return globalState_.ojClauseByConjunct.containsKey(e.getId()); + } + + public boolean isIjConjunct(Expr e) { + return globalState_.ijClauseByConjunct.containsKey(e.getId()); + } + + public TableRef getFullOuterJoinRef(Expr e) { + return globalState_.fullOuterJoinedConjuncts.get(e.getId()); + } + + public boolean isFullOuterJoined(Expr e) { + return globalState_.fullOuterJoinedConjuncts.containsKey(e.getId()); + } + + /** + * Return all unassigned registered conjuncts for node's table ref ids. + * Wrapper around getUnassignedConjuncts(List<TupleId> tupleIds). + */ + public List<Expr> getUnassignedConjuncts(PlanNode node) { + return getUnassignedConjuncts(node.getTblRefIds()); + } + + /** + * Return all unassigned registered conjuncts that are fully bound by the given + * (logical) tuple ids, can be evaluated by 'tupleIds' and are not tied to an + * Outer Join clause. + */ + public List<Expr> getUnassignedConjuncts(List<TupleId> tupleIds) { + LOG.trace("getUnassignedConjuncts for node with " + Id.printIds(tupleIds)); + List<Expr> result = Lists.newArrayList(); + for (Expr e: getUnassignedConjuncts(tupleIds, true)) { + if (canEvalPredicate(tupleIds, e)) { + result.add(e); + LOG.trace("getUnassignedConjunct: " + e.toSql()); + } + } + return result; + } + + /** + * Returns true if e must be evaluated by a join node. Note that it may still be + * safe to evaluate e elsewhere as well, but in any case the join must evaluate e. + */ + public boolean evalByJoin(Expr e) { + List<TupleId> tids = Lists.newArrayList(); + e.getIds(tids, null); + if (tids.isEmpty()) return false; + if (tids.size() > 1 || isOjConjunct(e) || isFullOuterJoined(e) + || (isOuterJoined(tids.get(0)) + && (!e.isOnClauseConjunct() || isIjConjunct(e))) + || (isAntiJoinedConjunct(e) && !isSemiJoined(tids.get(0)))) { + return true; + } + return false; + } + + /** + * Return all unassigned conjuncts of the outer join referenced by right-hand side + * table ref. + */ + public List<Expr> getUnassignedOjConjuncts(TableRef ref) { + Preconditions.checkState(ref.getJoinOp().isOuterJoin()); + List<Expr> result = Lists.newArrayList(); + List<ExprId> candidates = globalState_.conjunctsByOjClause.get(ref.getId()); + if (candidates == null) return result; + for (ExprId conjunctId: candidates) { + if (!globalState_.assignedConjuncts.contains(conjunctId)) { + Expr e = globalState_.conjuncts.get(conjunctId); + Preconditions.checkNotNull(e); + result.add(e); + LOG.trace("getUnassignedOjConjunct: " + e.toSql()); + } + } + return result; + } + + /** + * Return rhs ref of last Join clause that outer-joined id. + */ + public TableRef getLastOjClause(TupleId id) { + return globalState_.outerJoinedTupleIds.get(id); + } + + /** + * Return slot descriptor corresponding to column referenced in the context of + * tupleDesc, or null if no such reference exists. + */ + public SlotDescriptor getColumnSlot(TupleDescriptor tupleDesc, Column col) { + for (SlotDescriptor slotDesc: tupleDesc.getSlots()) { + if (slotDesc.getColumn() == col) return slotDesc; + } + return null; + } + + public DescriptorTable getDescTbl() { return globalState_.descTbl; } + public ImpaladCatalog getCatalog() { return globalState_.catalog; } + public Set<String> getAliases() { return aliasMap_.keySet(); } + + /** + * Returns list of candidate equi-join conjuncts to be evaluated by the join node + * that is specified by the table ref ids of its left and right children. + * If the join to be performed is an outer join, then only equi-join conjuncts + * from its On-clause are returned. If an equi-join conjunct is full outer joined, + * then it is only added to the result if this join is the one to full-outer join it. + */ + public List<Expr> getEqJoinConjuncts(List<TupleId> lhsTblRefIds, + List<TupleId> rhsTblRefIds) { + // Contains all equi-join conjuncts that have one child fully bound by one of the + // rhs table ref ids (the other child is not bound by that rhs table ref id). + List<ExprId> conjunctIds = Lists.newArrayList(); + for (TupleId rhsId: rhsTblRefIds) { + List<ExprId> cids = globalState_.eqJoinConjuncts.get(rhsId); + if (cids == null) continue; + for (ExprId eid: cids) { + if (!conjunctIds.contains(eid)) conjunctIds.add(eid); + } + } + + // Since we currently prevent join re-reordering across outer joins, we can never + // have a bushy outer join with multiple rhs table ref ids. A busy outer join can + // only be constructed with an inline view (which has a single table ref id). + List<ExprId> ojClauseConjuncts = null; + if (rhsTblRefIds.size() == 1) { + ojClauseConjuncts = globalState_.conjunctsByOjClause.get(rhsTblRefIds.get(0)); + } + + // List of table ref ids that the join node will 'materialize'. + List<TupleId> nodeTblRefIds = Lists.newArrayList(lhsTblRefIds); + nodeTblRefIds.addAll(rhsTblRefIds); + List<Expr> result = Lists.newArrayList(); + for (ExprId conjunctId: conjunctIds) { + Expr e = globalState_.conjuncts.get(conjunctId); + Preconditions.checkState(e != null); + if (!canEvalFullOuterJoinedConjunct(e, nodeTblRefIds) || + !canEvalAntiJoinedConjunct(e, nodeTblRefIds)) { + continue; + } + if (ojClauseConjuncts != null && !ojClauseConjuncts.contains(conjunctId)) continue; + result.add(e); + } + return result; + } + + /** + * Checks if a conjunct can be evaluated at a node materializing a list of tuple ids + * 'tids'. + */ + public boolean canEvalFullOuterJoinedConjunct(Expr e, List<TupleId> tids) { + TableRef fullOuterJoin = getFullOuterJoinRef(e); + if (fullOuterJoin == null) return true; + return tids.containsAll(fullOuterJoin.getAllTableRefIds()); + } + + /** + * Returns true if predicate 'e' can be correctly evaluated by a tree materializing + * 'tupleIds', otherwise false: + * - the predicate needs to be bound by tupleIds + * - an On clause predicate against the non-nullable side of an Outer Join clause + * can only be correctly evaluated by the join node that materializes the + * Outer Join clause + * - otherwise, a predicate can only be correctly evaluated if for all outer-joined + * referenced tids the last join to outer-join this tid has been materialized + */ + public boolean canEvalPredicate(List<TupleId> tupleIds, Expr e) { + LOG.trace("canEval: " + e.toSql() + " " + e.debugString() + " " + + Id.printIds(tupleIds)); + if (!e.isBoundByTupleIds(tupleIds)) return false; + ArrayList<TupleId> tids = Lists.newArrayList(); + e.getIds(tids, null); + if (tids.isEmpty()) return true; + + if (e.isOnClauseConjunct()) { + if (tids.size() > 1) { + // If the conjunct is from the ON-clause of an anti join, check if we can + // assign it to this node. + if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds); + // bail if this is from an OJ On clause; the join node will pick + // it up later via getUnassignedOjConjuncts() + if (globalState_.ojClauseByConjunct.containsKey(e.getId())) return false; + // If this is not from an OJ On clause (e.g. where clause or On clause of an + // inner join) and is full-outer joined, we need to make sure it is not + // assigned below the full outer join node that outer-joined it. + return canEvalFullOuterJoinedConjunct(e, tupleIds); + } + + TupleId tid = tids.get(0); + if (globalState_.ojClauseByConjunct.containsKey(e.getId())) { + // OJ On-clause predicate: okay if it's from + // the same On clause that makes tid nullable + // (otherwise e needn't be true when that tuple is set) + if (!globalState_.outerJoinedTupleIds.containsKey(tid)) return false; + if (globalState_.ojClauseByConjunct.get(e.getId()) + != globalState_.outerJoinedTupleIds.get(tid)) { + return false; + } + // Single tuple id conjuncts specified in the FOJ On-clause are not allowed to be + // assigned below that full outer join in the operator tree. + TableRef tblRef = globalState_.ojClauseByConjunct.get(e.getId()); + if (tblRef.getJoinOp().isFullOuterJoin()) return false; + } else { + // Non-OJ On-clause conjunct. + if (isOuterJoined(tid)) { + // If the conjunct references an outer-joined tuple, then evaluate the + // conjunct at the join that the On-clause belongs to. + TableRef onClauseTableRef = globalState_.ijClauseByConjunct.get(e.getId()); + Preconditions.checkNotNull(onClauseTableRef); + return tupleIds.containsAll(onClauseTableRef.getAllTableRefIds()); + } + // If this single tid conjunct is from the On-clause of an anti-join, check if we + // can assign it to this node. + if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds); + } + // Single tid predicate that is not from an OJ On-clause and is outer-joined by a + // full outer join cannot be assigned below that full outer join in the + // operator tree. + return canEvalFullOuterJoinedConjunct(e, tupleIds); + } + if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds); + + for (TupleId tid: tids) { + LOG.trace("canEval: checking tid " + tid.toString()); + TableRef rhsRef = getLastOjClause(tid); + // this is not outer-joined; ignore + if (rhsRef == null) continue; + // check whether the last join to outer-join 'tid' is materialized by tupleIds + boolean contains = tupleIds.containsAll(rhsRef.getAllTableRefIds()); + LOG.trace("canEval: contains=" + (contains ? "true " : "false ") + + Id.printIds(tupleIds) + " " + Id.printIds(rhsRef.getAllTableRefIds())); + if (!tupleIds.containsAll(rhsRef.getAllTableRefIds())) return false; + } + return true; + } + + /** + * Checks if a conjunct from the On-clause of an anti join can be evaluated in a node + * that materializes a given list of tuple ids. + */ + public boolean canEvalAntiJoinedConjunct(Expr e, List<TupleId> nodeTupleIds) { + TableRef antiJoinRef = getAntiJoinRef(e); + if (antiJoinRef == null) return true; + List<TupleId> tids = Lists.newArrayList(); + e.getIds(tids, null); + if (tids.size() > 1) { + return nodeTupleIds.containsAll(antiJoinRef.getAllTableRefIds()) + && antiJoinRef.getAllTableRefIds().containsAll(nodeTupleIds); + } + // A single tid conjunct that is anti-joined can be safely assigned to a + // node below the anti join that specified it. + return globalState_.semiJoinedTupleIds.containsKey(tids.get(0)); + } + + /** + * Returns a list of predicates that are fully bound by destTid. Predicates are derived + * by replacing the slots of a source predicate with slots of the destTid, if for each + * source slot there is an equivalent slot in destTid. + * In particular, the returned list contains predicates that must be evaluated + * at a join node (bound to outer-joined tuple) but can also be safely evaluated by a + * plan node materializing destTid. Such predicates are not marked as assigned. + * All other inferred predicates are marked as assigned if 'markAssigned' + * is true. This function returns bound predicates regardless of whether the source + * predicated have been assigned. It is up to the caller to decide if a bound predicate + * should actually be used. + * Destination slots in destTid can be ignored by passing them in ignoreSlots. + * TODO: exclude UDFs from predicate propagation? their overloaded variants could + * have very different semantics + */ + public ArrayList<Expr> getBoundPredicates(TupleId destTid, Set<SlotId> ignoreSlots, + boolean markAssigned) { + ArrayList<Expr> result = Lists.newArrayList(); + for (ExprId srcConjunctId: globalState_.singleTidConjuncts) { + Expr srcConjunct = globalState_.conjuncts.get(srcConjunctId); + if (srcConjunct instanceof SlotRef) continue; + Preconditions.checkNotNull(srcConjunct); + List<TupleId> srcTids = Lists.newArrayList(); + List<SlotId> srcSids = Lists.newArrayList(); + srcConjunct.getIds(srcTids, srcSids); + Preconditions.checkState(srcTids.size() == 1); + + // Generate slot-mappings to bind srcConjunct to destTid. + TupleId srcTid = srcTids.get(0); + List<List<SlotId>> allDestSids = + getEquivDestSlotIds(srcTid, srcSids, destTid, ignoreSlots); + if (allDestSids.isEmpty()) continue; + + // Indicates whether the source slots have equivalent slots that belong + // to an outer-joined tuple. + boolean hasOuterJoinedTuple = false; + for (SlotId srcSid: srcSids) { + if (hasOuterJoinedTuple(globalState_.equivClassBySlotId.get(srcSid))) { + hasOuterJoinedTuple = true; + break; + } + } + + // It is incorrect to propagate predicates into a plan subtree that is on the + // nullable side of an outer join if the predicate evaluates to true when all + // its referenced tuples are NULL. The check below is conservative because the + // outer-joined tuple making 'hasOuterJoinedTuple' true could be in a parent block + // of 'srcConjunct', in which case it is safe to propagate 'srcConjunct' within + // child blocks of the outer-joined parent block. + // TODO: Make the check precise by considering the blocks (analyzers) where the + // outer-joined tuples in the dest slot's equivalence classes appear + // relative to 'srcConjunct'. + if (hasOuterJoinedTuple && isTrueWithNullSlots(srcConjunct)) continue; + + // if srcConjunct comes out of an OJ's On clause, we need to make sure it's the + // same as the one that makes destTid nullable + // (otherwise srcConjunct needn't be true when destTid is set) + if (globalState_.ojClauseByConjunct.containsKey(srcConjunct.getId())) { + if (!globalState_.outerJoinedTupleIds.containsKey(destTid)) continue; + if (globalState_.ojClauseByConjunct.get(srcConjunct.getId()) + != globalState_.outerJoinedTupleIds.get(destTid)) { + continue; + } + // Do not propagate conjuncts from the on-clause of full-outer or anti-joins. + TableRef tblRef = globalState_.ojClauseByConjunct.get(srcConjunct.getId()); + if (tblRef.getJoinOp().isFullOuterJoin()) continue; + } + + // Conjuncts specified in the ON-clause of an anti-join must be evaluated at that + // join node. + if (isAntiJoinedConjunct(srcConjunct)) continue; + + // Generate predicates for all src-to-dest slot mappings. + for (List<SlotId> destSids: allDestSids) { + Preconditions.checkState(destSids.size() == srcSids.size()); + Expr p; + if (srcSids.containsAll(destSids)) { + p = srcConjunct; + } else { + ExprSubstitutionMap smap = new ExprSubstitutionMap(); + for (int i = 0; i < srcSids.size(); ++i) { + smap.put( + new SlotRef(globalState_.descTbl.getSlotDesc(srcSids.get(i))), + new SlotRef(globalState_.descTbl.getSlotDesc(destSids.get(i)))); + } + try { + p = srcConjunct.trySubstitute(smap, this, false); + } catch (ImpalaException exc) { + // not an executable predicate; ignore + continue; + } + // Unset the id because this bound predicate itself is not registered, and + // to prevent callers from inadvertently marking the srcConjunct as assigned. + p.setId(null); + if (p instanceof BinaryPredicate) ((BinaryPredicate) p).setIsInferred(); + LOG.trace("new pred: " + p.toSql() + " " + p.debugString()); + } + + if (markAssigned) { + // predicate assignment doesn't hold if: + // - the application against slotId doesn't transfer the value back to its + // originating slot + // - the original predicate is on an OJ'd table but doesn't originate from + // that table's OJ clause's ON clause (if it comes from anywhere but that + // ON clause, it needs to be evaluated directly by the join node that + // materializes the OJ'd table) + boolean reverseValueTransfer = true; + for (int i = 0; i < srcSids.size(); ++i) { + if (!hasValueTransfer(destSids.get(i), srcSids.get(i))) { + reverseValueTransfer = false; + break; + } + } + + // Check if either srcConjunct or the generated predicate needs to be evaluated + // at a join node (IMPALA-2018). + boolean evalByJoin = + (evalByJoin(srcConjunct) + && (globalState_.ojClauseByConjunct.get(srcConjunct.getId()) + != globalState_.outerJoinedTupleIds.get(srcTid))) + || (evalByJoin(p) + && (globalState_.ojClauseByConjunct.get(p.getId()) + != globalState_.outerJoinedTupleIds.get(destTid))); + + // mark all bound predicates including duplicate ones + if (reverseValueTransfer && !evalByJoin) markConjunctAssigned(srcConjunct); + } + + // check if we already created this predicate + if (!result.contains(p)) result.add(p); + } + } + return result; + } + + public ArrayList<Expr> getBoundPredicates(TupleId destTid) { + return getBoundPredicates(destTid, new HashSet<SlotId>(), true); + } + + /** + * Modifies the analysis state associated with the rhs table ref of an outer join + * to accomodate a join inversion that changes the rhs table ref of the join from + * oldRhsTbl to newRhsTbl. + * TODO: Revisit this function and how outer joins are inverted. This function + * should not be necessary because the semantics of an inverted outer join do + * not change. This function will naturally become obsolete when we can transform + * outer joins with otherPredicates into inner joins. + */ + public void invertOuterJoinState(TableRef oldRhsTbl, TableRef newRhsTbl) { + Preconditions.checkState(oldRhsTbl.getJoinOp().isOuterJoin()); + // Invert analysis state for an outer join. + List<ExprId> conjunctIds = + globalState_.conjunctsByOjClause.remove(oldRhsTbl.getId()); + if (conjunctIds != null) { + globalState_.conjunctsByOjClause.put(newRhsTbl.getId(), conjunctIds); + for (ExprId eid: conjunctIds) { + globalState_.ojClauseByConjunct.put(eid, newRhsTbl); + } + } else { + // An outer join is allowed not to have an On-clause if the rhs table ref is + // correlated or relative. + Preconditions.checkState(oldRhsTbl.isCorrelated() || oldRhsTbl.isRelative()); + } + for (Map.Entry<TupleId, TableRef> e: globalState_.outerJoinedTupleIds.entrySet()) { + if (e.getValue() == oldRhsTbl) e.setValue(newRhsTbl); + } + } + + /** + * For each equivalence class, adds/removes predicates from conjuncts such that it + * contains a minimum set of <lhsSlot> = <rhsSlot> predicates that establish the known + * equivalences between slots in lhsTids and rhsTids which must be disjoint. + * Preserves original conjuncts when possible. Assumes that predicates for establishing + * equivalences among slots in only lhsTids and only rhsTids have already been + * established. This function adds the remaining predicates to "connect" the disjoint + * equivalent slot sets of lhsTids and rhsTids. + * The intent of this function is to enable construction of a minimum spanning tree + * to cover the known slot equivalences. This function should be called for join + * nodes during plan generation to (1) remove redundant join predicates, and (2) + * establish equivalences among slots materialized at that join node. + * TODO: Consider optimizing for the cheapest minimum set of predicates. + * TODO: Consider caching the DisjointSet during plan generation instead of + * re-creating it here on every invocation. + */ + public <T extends Expr> void createEquivConjuncts(List<TupleId> lhsTids, + List<TupleId> rhsTids, List<T> conjuncts) { + Preconditions.checkState(Collections.disjoint(lhsTids, rhsTids)); + + // Equivalence classes only containing slots belonging to lhsTids. + Map<EquivalenceClassId, List<SlotId>> lhsEquivClasses = + getEquivClasses(lhsTids); + + // Equivalence classes only containing slots belonging to rhsTids. + Map<EquivalenceClassId, List<SlotId>> rhsEquivClasses = + getEquivClasses(rhsTids); + + // Maps from a slot id to its set of equivalent slots. Used to track equivalences + // that have been established by predicates assigned/generated to plan nodes + // materializing lhsTids as well as the given conjuncts. + DisjointSet<SlotId> partialEquivSlots = new DisjointSet<SlotId>(); + // Add the partial equivalences to the partialEquivSlots map. The equivalent-slot + // sets of slots from lhsTids are disjoint from those of slots from rhsTids. + // We need to 'connect' the disjoint slot sets by constructing a new predicate + // for each equivalence class (unless there is already one in 'conjuncts'). + for (List<SlotId> partialEquivClass: lhsEquivClasses.values()) { + partialEquivSlots.bulkUnion(partialEquivClass); + } + for (List<SlotId> partialEquivClass: rhsEquivClasses.values()) { + partialEquivSlots.bulkUnion(partialEquivClass); + } + + // Set of outer-joined slots referenced by conjuncts. + Set<SlotId> outerJoinedSlots = Sets.newHashSet(); + + // Update partialEquivSlots based on equality predicates in 'conjuncts'. Removes + // redundant conjuncts, unless they reference outer-joined slots (see below). + Iterator<T> conjunctIter = conjuncts.iterator(); + while (conjunctIter.hasNext()) { + Expr conjunct = conjunctIter.next(); + Pair<SlotId, SlotId> eqSlots = BinaryPredicate.getEqSlots(conjunct); + if (eqSlots == null) continue; + EquivalenceClassId firstEqClassId = getEquivClassId(eqSlots.first); + EquivalenceClassId secondEqClassId = getEquivClassId(eqSlots.second); + // slots may not be in the same eq class due to outer joins + if (!firstEqClassId.equals(secondEqClassId)) continue; + + // Retain an otherwise redundant predicate if it references a slot of an + // outer-joined tuple that is not already referenced by another join predicate + // to maintain that the rows must satisfy outer-joined-slot IS NOT NULL + // (otherwise NULL tuples from outer joins could survive). + // TODO: Consider better fixes for outer-joined slots: (1) Create IS NOT NULL + // predicates and place them at the lowest possible plan node. (2) Convert outer + // joins into inner joins (or full outer joins into left/right outer joins). + boolean filtersOuterJoinNulls = false; + if (isOuterJoined(eqSlots.first) + && lhsTids.contains(getTupleId(eqSlots.first)) + && !outerJoinedSlots.contains(eqSlots.first)) { + outerJoinedSlots.add(eqSlots.first); + filtersOuterJoinNulls = true; + } + if (isOuterJoined(eqSlots.second) + && lhsTids.contains(getTupleId(eqSlots.second)) + && !outerJoinedSlots.contains(eqSlots.second)) { + outerJoinedSlots.add(eqSlots.second); + filtersOuterJoinNulls = true; + } + // retain conjunct if it connects two formerly unconnected equiv classes or + // it is required for outer-join semantics + if (!partialEquivSlots.union(eqSlots.first, eqSlots.second) + && !filtersOuterJoinNulls) { + conjunctIter.remove(); + } + } + + // For each equivalence class, construct a new predicate to 'connect' the disjoint + // slot sets. + for (Map.Entry<EquivalenceClassId, List<SlotId>> rhsEquivClass: + rhsEquivClasses.entrySet()) { + List<SlotId> lhsSlots = lhsEquivClasses.get(rhsEquivClass.getKey()); + if (lhsSlots == null) continue; + List<SlotId> rhsSlots = rhsEquivClass.getValue(); + Preconditions.checkState(!lhsSlots.isEmpty() && !rhsSlots.isEmpty()); + + if (!partialEquivSlots.union(lhsSlots.get(0), rhsSlots.get(0))) continue; + // Do not create a new predicate from slots that are full outer joined because that + // predicate may be incorrectly assigned to a node below the associated full outer + // join. + if (isFullOuterJoined(lhsSlots.get(0)) || isFullOuterJoined(rhsSlots.get(0))) { + continue; + } + T newEqPred = (T) createInferredEqPred(lhsSlots.get(0), rhsSlots.get(0)); + if (!hasMutualValueTransfer(lhsSlots.get(0), rhsSlots.get(0))) continue; + conjuncts.add(newEqPred); + } + } + + /** + * For each equivalence class, adds/removes predicates from conjuncts such that + * it contains a minimum set of <slot> = <slot> predicates that establish + * the known equivalences between slots belonging to tid. Preserves original + * conjuncts when possible. + * The intent of this function is to enable construction of a minimum spanning tree + * to cover the known slot equivalences. This function should be called to add + * conjuncts to plan nodes that materialize a new tuple, e.g., scans and aggregations. + * Does not enforce equivalence between slots in ignoreSlots. Equivalences (if any) + * among slots in ignoreSlots are assumed to have already been enforced. + * TODO: Consider optimizing for the cheapest minimum set of predicates. + */ + public <T extends Expr> void createEquivConjuncts(TupleId tid, List<T> conjuncts, + Set<SlotId> ignoreSlots) { + // Maps from a slot id to its set of equivalent slots. Used to track equivalences + // that have been established by 'conjuncts' and the 'ignoredsSlots'. + DisjointSet<SlotId> partialEquivSlots = new DisjointSet<SlotId>(); + + // Treat ignored slots as already connected. Add the ignored slots at this point + // such that redundant conjuncts are removed. + partialEquivSlots.bulkUnion(ignoreSlots); + partialEquivSlots.checkConsistency(); + + // Update partialEquivSlots based on equality predicates in 'conjuncts'. Removes + // redundant conjuncts, unless they reference outer-joined slots (see below). + Iterator<T> conjunctIter = conjuncts.iterator(); + while (conjunctIter.hasNext()) { + Expr conjunct = conjunctIter.next(); + Pair<SlotId, SlotId> eqSlots = BinaryPredicate.getEqSlots(conjunct); + if (eqSlots == null) continue; + EquivalenceClassId firstEqClassId = getEquivClassId(eqSlots.first); + EquivalenceClassId secondEqClassId = getEquivClassId(eqSlots.second); + // slots may not be in the same eq class due to outer joins + if (!firstEqClassId.equals(secondEqClassId)) continue; + // update equivalences and remove redundant conjuncts + if (!partialEquivSlots.union(eqSlots.first, eqSlots.second)) conjunctIter.remove(); + } + // Suppose conjuncts had these predicates belonging to equivalence classes e1 and e2: + // e1: s1 = s2, s3 = s4, s3 = s5 + // e2: s10 = s11 + // The conjunctsEquivSlots should contain the following entries at this point: + // s1 -> {s1, s2} + // s2 -> {s1, s2} + // s3 -> {s3, s4, s5} + // s4 -> {s3, s4, s5} + // s5 -> {s3, s4, s5} + // s10 -> {s10, s11} + // s11 -> {s10, s11} + // Assuming e1 = {s1, s2, s3, s4, s5} we need to generate one additional equality + // predicate to "connect" {s1, s2} and {s3, s4, s5}. + + // These are the equivalences that need to be established by constructing conjuncts + // to form a minimum spanning tree. + Map<EquivalenceClassId, List<SlotId>> targetEquivClasses = + getEquivClasses(Lists.newArrayList(tid)); + for (Map.Entry<EquivalenceClassId, List<SlotId>> targetEquivClass: + targetEquivClasses.entrySet()) { + // Loop over all pairs of equivalent slots and merge their disjoint slots sets, + // creating missing equality predicates as necessary. + List<SlotId> slotIds = targetEquivClass.getValue(); + boolean done = false; + for (int i = 1; i < slotIds.size(); ++i) { + SlotId rhs = slotIds.get(i); + for (int j = 0; j < i; ++j) { + SlotId lhs = slotIds.get(j); + if (!partialEquivSlots.union(lhs, rhs)) continue; + if (!hasMutualValueTransfer(lhs, rhs)) continue; + conjuncts.add((T) createInferredEqPred(lhs, rhs)); + // Check for early termination. + if (partialEquivSlots.get(lhs).size() == slotIds.size()) { + done = true; + break; + } + } + if (done) break; + } + } + } + + public <T extends Expr> void createEquivConjuncts(TupleId tid, List<T> conjuncts) { + createEquivConjuncts(tid, conjuncts, new HashSet<SlotId>()); + } + + /** + * Returns a map of partial equivalence classes that only contains slot ids belonging + * to the given tuple ids. Only contains equivalence classes with more than one member. + */ + private Map<EquivalenceClassId, List<SlotId>> getEquivClasses(List<TupleId> tids) { + Map<EquivalenceClassId, List<SlotId>> result = Maps.newHashMap(); + for (TupleId tid: tids) { + for (SlotDescriptor slotDesc: getTupleDesc(tid).getSlots()) { + EquivalenceClassId eqClassId = getEquivClassId(slotDesc.getId()); + // Ignore equivalence classes that are empty or only have a single member. + if (globalState_.equivClassMembers.get(eqClassId).size() <= 1) continue; + List<SlotId> slotIds = result.get(eqClassId); + if (slotIds == null) { + slotIds = Lists.newArrayList(); + result.put(eqClassId, slotIds); + } + slotIds.add(slotDesc.getId()); + } + } + return result; + } + + /** + * Returns a list of slot mappings from srcTid to destTid for the purpose of predicate + * propagation. Each mapping assigns every slot in srcSids to an equivalent slot in + * destTid. Does not generate all possible mappings, but limits the results to + * useful and/or non-redundant mappings, i.e., those mappings that would improve + * the performance of query execution. + */ + private List<List<SlotId>> getEquivDestSlotIds(TupleId srcTid, List<SlotId> srcSids, + TupleId destTid, Set<SlotId> ignoreSlots) { + List<List<SlotId>> allDestSids = Lists.newArrayList(); + TupleDescriptor destTupleDesc = getTupleDesc(destTid); + if (srcSids.size() == 1) { + // Generate all mappings to propagate predicates of the form <slot> <op> <constant> + // to as many destination slots as possible. + // TODO: If srcTid == destTid we could limit the mapping to partition + // columns because mappings to non-partition columns do not provide + // a performance benefit. + SlotId srcSid = srcSids.get(0); + for (SlotDescriptor destSlot: destTupleDesc.getSlots()) { + if (ignoreSlots.contains(destSlot.getId())) continue; + if (hasValueTransfer(srcSid, destSlot.getId())) { + allDestSids.add(Lists.newArrayList(destSlot.getId())); + } + } + } else if (srcTid.equals(destTid)) { + // Multiple source slot ids and srcTid == destTid. Inter-tuple transfers are + // already expressed by the original conjuncts. Any mapping would be redundant. + // Still add srcSids to the result because we rely on getBoundPredicates() to + // include predicates that can safely be evaluated below an outer join, but must + // also be evaluated by the join itself (evalByJoin() == true). + allDestSids.add(srcSids); + } else { + // Multiple source slot ids and srcTid != destTid. Pick the first mapping + // where each srcSid is mapped to a different destSid to avoid generating + // redundant and/or trivial predicates. + // TODO: This approach is not guaranteed to find the best slot mapping + // (e.g., against partition columns) or all non-redundant mappings. + // The limitations are show in predicate-propagation.test. + List<SlotId> destSids = Lists.newArrayList(); + for (SlotId srcSid: srcSids) { + for (SlotDescriptor destSlot: destTupleDesc.getSlots()) { + if (ignoreSlots.contains(destSlot.getId())) continue; + if (hasValueTransfer(srcSid, destSlot.getId()) + && !destSids.contains(destSlot.getId())) { + destSids.add(destSlot.getId()); + break; + } + } + } + if (destSids.size() == srcSids.size()) allDestSids.add(destSids); + } + return allDestSids; + } + + /** + * Returns true if the equivalence class identified by 'eqClassId' contains + * a slot belonging to an outer-joined tuple. + */ + private boolean hasOuterJoinedTuple(EquivalenceClassId eqClassId) { + ArrayList<SlotId> eqClass = globalState_.equivClassMembers.get(eqClassId); + for (SlotId s: eqClass) { + if (isOuterJoined(getTupleId(s))) return true; + } + return false; + } + + /** + * Returns true if 'p' evaluates to true when all its referenced slots are NULL, + * false otherwise. + * TODO: Can we avoid dealing with the exceptions thrown by analysis and eval? + */ + public boolean isTrueWithNullSlots(Expr p) { + // Construct predicate with all SlotRefs substituted by NullLiterals. + List<SlotRef> slotRefs = Lists.newArrayList(); + p.collect(Predicates.instanceOf(SlotRef.class), slotRefs); + + // Map for substituting SlotRefs with NullLiterals. + ExprSubstitutionMap nullSmap = new ExprSubstitutionMap(); + for (SlotRef slotRef: slotRefs) { + // Preserve the original SlotRef type to ensure all substituted + // subexpressions in the predicate have the same return type and + // function signature as in the original predicate. + nullSmap.put(slotRef.clone(), NullLiteral.create(slotRef.getType())); + } + Expr nullTuplePred = p.substitute(nullSmap, this, false); + try { + return FeSupport.EvalPredicate(nullTuplePred, getQueryCtx()); + } catch (InternalException e) { + Preconditions.checkState(false, "Failed to evaluate generated predicate: " + + nullTuplePred.toSql() + "." + e.getMessage()); + } + return true; + } + + public TupleId getTupleId(SlotId slotId) { + return globalState_.descTbl.getSlotDesc(slotId).getParent().getId(); + } + + public void registerValueTransfer(SlotId id1, SlotId id2) { + globalState_.registeredValueTransfers.add(new Pair(id1, id2)); + } + + public boolean isOuterJoined(TupleId tid) { + return globalState_.outerJoinedTupleIds.containsKey(tid); + } + + public boolean isOuterJoined(SlotId sid) { + return isOuterJoined(getTupleId(sid)); + } + + public boolean isSemiJoined(TupleId tid) { + return globalState_.semiJoinedTupleIds.containsKey(tid); + } + + public boolean isAntiJoinedConjunct(Expr e) { + return getAntiJoinRef(e) != null; + } + + public TableRef getAntiJoinRef(Expr e) { + TableRef tblRef = globalState_.sjClauseByConjunct.get(e.getId()); + if (tblRef == null) return null; + return (tblRef.getJoinOp().isAntiJoin()) ? tblRef : null; + } + + public boolean isFullOuterJoined(TupleId tid) { + return globalState_.fullOuterJoinedTupleIds.containsKey(tid); + } + + public boolean isFullOuterJoined(SlotId sid) { + return isFullOuterJoined(getTupleId(sid)); + } + + public boolean isVisible(TupleId tid) { + return tid == visibleSemiJoinedTupleId_ || !isSemiJoined(tid); + } + + public boolean containsOuterJoinedTid(List<TupleId> tids) { + for (TupleId tid: tids) { + if (isOuterJoined(tid)) return true; + } + return false; + } + + /** + * Populate globalState.valueTransfer based on the registered equi-join predicates + * of the form <slotref> = <slotref>. + */ + public void computeEquivClasses() { + globalState_.valueTransferGraph = new ValueTransferGraph(); + globalState_.valueTransferGraph.computeValueTransfers(); + + // we start out by assigning each slot to its own equiv class + int numSlots = globalState_.descTbl.getMaxSlotId().asInt() + 1; + for (int i = 0; i < numSlots; ++i) { + EquivalenceClassId id = globalState_.equivClassIdGenerator.getNextId(); + globalState_.equivClassMembers.put(id, Lists.newArrayList(new SlotId(i))); + } + + // merge two classes if there is a value transfer between all members of the + // combined class; do this until there's nothing left to merge + boolean merged; + do { + merged = false; + for (Map.Entry<EquivalenceClassId, ArrayList<SlotId>> e1: + globalState_.equivClassMembers.entrySet()) { + for (Map.Entry<EquivalenceClassId, ArrayList<SlotId>> e2: + globalState_.equivClassMembers.entrySet()) { + if (e1.getKey() == e2.getKey()) continue; + List<SlotId> class1Members = e1.getValue(); + if (class1Members.isEmpty()) continue; + List<SlotId> class2Members = e2.getValue(); + if (class2Members.isEmpty()) continue; + + // check whether we can transfer values between all members + boolean canMerge = true; + for (SlotId class1Slot: class1Members) { + for (SlotId class2Slot: class2Members) { + if (!hasValueTransfer(class1Slot, class2Slot) + && !hasValueTransfer(class2Slot, class1Slot)) { + canMerge = false; + break; + } + } + if (!canMerge) break; + } + if (!canMerge) continue; + + // merge classes 1 and 2 by transfering 2 into 1 + class1Members.addAll(class2Members); + class2Members.clear(); + merged = true; + } + } + } while (merged); + + // populate equivClassSmap +
<TRUNCATED>
