http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/org/apache/impala/analysis/Expr.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java b/fe/src/main/java/org/apache/impala/analysis/Expr.java new file mode 100644 index 0000000..fdc5bf1 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java @@ -0,0 +1,1258 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloudera.impala.analysis; + +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cloudera.impala.catalog.Catalog; +import com.cloudera.impala.catalog.Function; +import com.cloudera.impala.catalog.Function.CompareMode; +import com.cloudera.impala.catalog.PrimitiveType; +import com.cloudera.impala.catalog.ScalarType; +import com.cloudera.impala.catalog.Type; +import com.cloudera.impala.common.AnalysisException; +import com.cloudera.impala.common.TreeNode; +import com.cloudera.impala.thrift.TExpr; +import com.cloudera.impala.thrift.TExprNode; +import com.google.common.base.Joiner; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicates; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +/** + * Root of the expr node hierarchy. + * + */ +abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneable { + private final static Logger LOG = LoggerFactory.getLogger(Expr.class); + + // Limits on the number of expr children and the depth of an expr tree. These maximum + // values guard against crashes due to stack overflows (IMPALA-432) and were + // experimentally determined to be safe. + public final static int EXPR_CHILDREN_LIMIT = 10000; + // The expr depth limit is mostly due to our recursive implementation of clone(). + public final static int EXPR_DEPTH_LIMIT = 1000; + + // Name of the function that needs to be implemented by every Expr that + // supports negation. + private final static String NEGATE_FN = "negate"; + + // To be used where we cannot come up with a better estimate (selectivity_ is -1). + public static double DEFAULT_SELECTIVITY = 0.1; + + // The relative costs of different Exprs. These numbers are not intended as a precise + // reflection of running times, but as simple heuristics for ordering Exprs from cheap + // to expensive. + // TODO(tmwarshall): Get these costs in a more principled way, eg. with a benchmark. + public final static float ARITHMETIC_OP_COST = 1; + public final static float BINARY_PREDICATE_COST = 1; + public final static float VAR_LEN_BINARY_PREDICATE_COST = 5; + public final static float CAST_COST = 1; + public final static float COMPOUND_PREDICATE_COST = 1; + public final static float FUNCTION_CALL_COST = 10; + public final static float IS_NOT_EMPTY_COST = 1; + public final static float IS_NULL_COST = 1; + public final static float LIKE_COST = 10; + public final static float LITERAL_COST = 1; + public final static float SLOT_REF_COST = 1; + public final static float TIMESTAMP_ARITHMETIC_COST = 5; + + // To be used when estimating the cost of Exprs of type string where we don't otherwise + // have an estimate of how long the strings produced by that Expr are. + public final static int DEFAULT_AVG_STRING_LENGTH = 5; + + // returns true if an Expr is a non-analytic aggregate. + private final static com.google.common.base.Predicate<Expr> isAggregatePredicate_ = + new com.google.common.base.Predicate<Expr>() { + public boolean apply(Expr arg) { + return arg instanceof FunctionCallExpr && + ((FunctionCallExpr)arg).isAggregateFunction(); + } + }; + + // Returns true if an Expr is a NOT CompoundPredicate. + public final static com.google.common.base.Predicate<Expr> IS_NOT_PREDICATE = + new com.google.common.base.Predicate<Expr>() { + @Override + public boolean apply(Expr arg) { + return arg instanceof CompoundPredicate && + ((CompoundPredicate)arg).getOp() == CompoundPredicate.Operator.NOT; + } + }; + + // Returns true if an Expr is an OR CompoundPredicate. + public final static com.google.common.base.Predicate<Expr> IS_OR_PREDICATE = + new com.google.common.base.Predicate<Expr>() { + @Override + public boolean apply(Expr arg) { + return arg instanceof CompoundPredicate && + ((CompoundPredicate)arg).getOp() == CompoundPredicate.Operator.OR; + } + }; + + // Returns true if an Expr is a scalar subquery + public final static com.google.common.base.Predicate<Expr> IS_SCALAR_SUBQUERY = + new com.google.common.base.Predicate<Expr>() { + @Override + public boolean apply(Expr arg) { + return arg.isScalarSubquery(); + } + }; + + // Returns true if an Expr is an aggregate function that returns non-null on + // an empty set (e.g. count). + public final static com.google.common.base.Predicate<Expr> + NON_NULL_EMPTY_AGG = new com.google.common.base.Predicate<Expr>() { + @Override + public boolean apply(Expr arg) { + return arg instanceof FunctionCallExpr && + ((FunctionCallExpr)arg).returnsNonNullOnEmpty(); + } + }; + + // Returns true if an Expr is a builtin aggregate function. + public final static com.google.common.base.Predicate<Expr> IS_BUILTIN_AGG_FN = + new com.google.common.base.Predicate<Expr>() { + @Override + public boolean apply(Expr arg) { + return arg instanceof FunctionCallExpr && + ((FunctionCallExpr)arg).getFnName().isBuiltin(); + } + }; + + public final static com.google.common.base.Predicate<Expr> IS_TRUE_LITERAL = + new com.google.common.base.Predicate<Expr>() { + @Override + public boolean apply(Expr arg) { + return arg instanceof BoolLiteral && ((BoolLiteral)arg).getValue(); + } + }; + + public final static com.google.common.base.Predicate<Expr> IS_EQ_BINARY_PREDICATE = + new com.google.common.base.Predicate<Expr>() { + @Override + public boolean apply(Expr arg) { return BinaryPredicate.getEqSlots(arg) != null; } + }; + + public final static com.google.common.base.Predicate<Expr> IS_BINARY_PREDICATE = + new com.google.common.base.Predicate<Expr>() { + @Override + public boolean apply(Expr arg) { return arg instanceof BinaryPredicate; } + }; + + // id that's unique across the entire query statement and is assigned by + // Analyzer.registerConjuncts(); only assigned for the top-level terms of a + // conjunction, and therefore null for most Exprs + protected ExprId id_; + + // true if Expr is an auxiliary predicate that was generated by the plan generation + // process to facilitate predicate propagation; + // false if Expr originated with a query stmt directly + private boolean isAuxExpr_ = false; + + protected Type type_; // result of analysis + protected boolean isAnalyzed_; // true after analyze() has been called + protected boolean isOnClauseConjunct_; // set by analyzer + + // Flag to indicate whether to wrap this expr's toSql() in parenthesis. Set by parser. + // Needed for properly capturing expr precedences in the SQL string. + protected boolean printSqlInParens_ = false; + + // Estimated probability of a predicate evaluating to true. Set during analysis. + // Between 0 and 1, or set to -1 if the selectivity could not be estimated. + protected double selectivity_; + + // Estimated relative cost of evaluating this expression, including the costs of + // its children. Set during analysis and used to sort conjuncts within a PlanNode. + // Has a default value of -1 indicating unknown cost if the cost of this expression + // or any of its children was not set, but it is required to be set for any + // expression which may be part of a conjunct. + protected float evalCost_; + + // estimated number of distinct values produced by Expr; invalid: -1 + // set during analysis + protected long numDistinctValues_; + + // The function to call. This can either be a scalar or aggregate function. + // Set in analyze(). + protected Function fn_; + + protected Expr() { + super(); + type_ = Type.INVALID; + selectivity_ = -1.0; + evalCost_ = -1.0f; + numDistinctValues_ = -1; + } + + /** + * Copy c'tor used in clone(). + */ + protected Expr(Expr other) { + id_ = other.id_; + isAuxExpr_ = other.isAuxExpr_; + type_ = other.type_; + isAnalyzed_ = other.isAnalyzed_; + isOnClauseConjunct_ = other.isOnClauseConjunct_; + printSqlInParens_ = other.printSqlInParens_; + selectivity_ = other.selectivity_; + evalCost_ = other.evalCost_; + numDistinctValues_ = other.numDistinctValues_; + fn_ = other.fn_; + children_ = Expr.cloneList(other.children_); + } + + public ExprId getId() { return id_; } + protected void setId(ExprId id) { id_ = id; } + public Type getType() { return type_; } + public double getSelectivity() { return selectivity_; } + public boolean hasSelectivity() { return selectivity_ >= 0; } + public float getCost() { + Preconditions.checkState(isAnalyzed_); + return evalCost_; + } + public boolean hasCost() { return evalCost_ >= 0; } + public long getNumDistinctValues() { return numDistinctValues_; } + public void setPrintSqlInParens(boolean b) { printSqlInParens_ = b; } + public boolean isOnClauseConjunct() { return isOnClauseConjunct_; } + public void setIsOnClauseConjunct(boolean b) { isOnClauseConjunct_ = b; } + public boolean isAuxExpr() { return isAuxExpr_; } + public boolean isRegisteredPredicate() { return id_ != null; } + public void setIsAuxExpr() { isAuxExpr_ = true; } + public Function getFn() { return fn_; } + + /** + * Perform semantic analysis of node and all of its children. + * Throws exception if any errors found. + * @see com.cloudera.impala.parser.ParseNode#analyze(com.cloudera.impala.parser.Analyzer) + */ + public void analyze(Analyzer analyzer) throws AnalysisException { + // Check the expr child limit. + if (children_.size() > EXPR_CHILDREN_LIMIT) { + String sql = toSql(); + String sqlSubstr = sql.substring(0, Math.min(80, sql.length())); + throw new AnalysisException(String.format("Exceeded the maximum number of child " + + "expressions (%s).\nExpression has %s children:\n%s...", + EXPR_CHILDREN_LIMIT, children_.size(), sqlSubstr)); + } + + // analyzer may be null for certain literal constructions (e.g. IntLiteral). + if (analyzer != null) { + analyzer.incrementCallDepth(); + // Check the expr depth limit. Do not print the toSql() to not overflow the stack. + if (analyzer.getCallDepth() > EXPR_DEPTH_LIMIT) { + throw new AnalysisException(String.format("Exceeded the maximum depth of an " + + "expression tree (%s).", EXPR_DEPTH_LIMIT)); + } + } + for (Expr child: children_) { + child.analyze(analyzer); + } + isAnalyzed_ = true; + computeNumDistinctValues(); + + if (analyzer != null) analyzer.decrementCallDepth(); + } + + /** + * Helper function to analyze this expr and assert that the analysis was successful. + * TODO: This function could be used in many more places to clean up. Consider + * adding an IAnalyzable interface or similar to and move this helper into Analyzer + * such that non-Expr things can use the helper also. + */ + public void analyzeNoThrow(Analyzer analyzer) { + try { + analyze(analyzer); + } catch (AnalysisException e) { + throw new IllegalStateException(e); + } + } + + protected void computeNumDistinctValues() { + if (isConstant()) { + numDistinctValues_ = 1; + } else { + // if this Expr contains slotrefs, we estimate the # of distinct values + // to be the maximum such number for any of the slotrefs; + // the subclass analyze() function may well want to override this, if it + // knows better + List<SlotRef> slotRefs = Lists.newArrayList(); + this.collect(Predicates.instanceOf(SlotRef.class), slotRefs); + numDistinctValues_ = -1; + for (SlotRef slotRef: slotRefs) { + numDistinctValues_ = Math.max(numDistinctValues_, slotRef.numDistinctValues_); + } + } + } + + /** + * Collects the returns types of the child nodes in an array. + */ + protected Type[] collectChildReturnTypes() { + Type[] childTypes = new Type[children_.size()]; + for (int i = 0; i < children_.size(); ++i) { + childTypes[i] = children_.get(i).type_; + } + return childTypes; + } + + /** + * Looks up in the catalog the builtin for 'name' and 'argTypes'. + * Returns null if the function is not found. + */ + protected Function getBuiltinFunction(Analyzer analyzer, String name, + Type[] argTypes, CompareMode mode) throws AnalysisException { + FunctionName fnName = new FunctionName(Catalog.BUILTINS_DB, name); + Function searchDesc = new Function(fnName, argTypes, Type.INVALID, false); + return analyzer.getCatalog().getFunction(searchDesc, mode); + } + + /** + * Generates the necessary casts for the children of this expr to call fn_. + * child(0) is cast to the function's first argument, child(1) to the second etc. + * This does not do any validation and the casts are assumed to be safe. + * + * If ignoreWildcardDecimals is true, the function will not cast arguments that + * are wildcard decimals. This is used for builtins where the cast is done within + * the BE function. + * Otherwise, if the function signature contains wildcard decimals, each wildcard child + * argument will be cast to the highest resolution that can contain all of the child + * wildcard arguments. + * e.g. fn(decimal(*), decimal(*)) + * called with fn(decimal(10,2), decimal(5,3)) + * both children will be cast to (11, 3). + */ + protected void castForFunctionCall(boolean ignoreWildcardDecimals) + throws AnalysisException { + Preconditions.checkState(fn_ != null); + Type[] fnArgs = fn_.getArgs(); + Type resolvedWildcardType = getResolvedWildCardType(); + for (int i = 0; i < children_.size(); ++i) { + // For varargs, we must compare with the last type in fnArgs.argTypes. + int ix = Math.min(fnArgs.length - 1, i); + if (fnArgs[ix].isWildcardDecimal()) { + if (children_.get(i).type_.isDecimal() && ignoreWildcardDecimals) continue; + Preconditions.checkState(resolvedWildcardType != null); + if (!children_.get(i).type_.equals(resolvedWildcardType)) { + castChild(resolvedWildcardType, i); + } + } else if (!children_.get(i).type_.matchesType(fnArgs[ix])) { + castChild(fnArgs[ix], i); + } + } + } + + /** + * Returns the max resolution type of all the wild card decimal types. + * Returns null if there are no wild card types. + */ + Type getResolvedWildCardType() throws AnalysisException { + Type result = null; + Type[] fnArgs = fn_.getArgs(); + for (int i = 0; i < children_.size(); ++i) { + // For varargs, we must compare with the last type in fnArgs.argTypes. + int ix = Math.min(fnArgs.length - 1, i); + if (!fnArgs[ix].isWildcardDecimal()) continue; + + Type childType = children_.get(i).type_; + Preconditions.checkState(!childType.isWildcardDecimal(), + "Child expr should have been resolved."); + Preconditions.checkState(childType.isScalarType(), + "Function should not have resolved with a non-scalar child type."); + ScalarType decimalType = (ScalarType) childType; + if (result == null) { + result = decimalType.getMinResolutionDecimal(); + } else { + result = Type.getAssignmentCompatibleType(result, childType, false); + } + } + if (result != null) { + if (result.isNull()) { + throw new AnalysisException( + "Cannot resolve DECIMAL precision and scale from NULL type."); + } + Preconditions.checkState(result.isDecimal() && !result.isWildcardDecimal()); + } + return result; + } + + /** + * Returns true if e is a CastExpr and the target type is a decimal. + */ + private boolean isExplicitCastToDecimal(Expr e) { + if (!(e instanceof CastExpr)) return false; + CastExpr c = (CastExpr)e; + return !c.isImplicit() && c.getType().isDecimal(); + } + + /** + * Returns a clone of child with all decimal-typed NumericLiterals in it explicitly + * cast to targetType. + */ + private Expr convertDecimalLiteralsToFloat(Analyzer analyzer, Expr child, + Type targetType) throws AnalysisException { + if (!targetType.isFloatingPointType() && !targetType.isIntegerType()) return child; + if (targetType.isIntegerType()) targetType = Type.DOUBLE; + List<NumericLiteral> literals = Lists.newArrayList(); + child.collectAll(Predicates.instanceOf(NumericLiteral.class), literals); + ExprSubstitutionMap smap = new ExprSubstitutionMap(); + for (NumericLiteral l: literals) { + if (!l.getType().isDecimal()) continue; + NumericLiteral castLiteral = (NumericLiteral) l.clone(); + castLiteral.explicitlyCastToFloat(targetType); + smap.put(l, castLiteral); + } + return child.substitute(smap, analyzer, false); + } + + /** + * Converts numeric literal in the expr tree rooted at this expr to return floating + * point types instead of decimals, if possible. + * + * Decimal has a higher processing cost than floating point and we should not pay + * the cost if the user does not require the accuracy. For example: + * "select float_col + 1.1" would start out with 1.1 as a decimal(2,1) and the + * float_col would be promoted to a high accuracy decimal. This function will identify + * this case and treat 1.1 as a float. + * In the case of "decimal_col + 1.1", 1.1 would remain a decimal. + * In the case of "float_col + cast(1.1 as decimal(2,1))", the result would be a + * decimal. + * + * Another way to think about it is that DecimalLiterals are analyzed as returning + * decimals (of the narrowest precision/scale) and we later convert them to a floating + * point type when it is consistent with the user's intent. + * + * TODO: another option is to do constant folding in the FE and then apply this rule. + */ + protected void convertNumericLiteralsFromDecimal(Analyzer analyzer) + throws AnalysisException { + Preconditions.checkState(this instanceof ArithmeticExpr || + this instanceof BinaryPredicate); + if (children_.size() == 1) return; // Do not attempt to convert for unary ops + Preconditions.checkState(children_.size() == 2); + Type t0 = getChild(0).getType(); + Type t1 = getChild(1).getType(); + boolean c0IsConstantDecimal = getChild(0).isConstant() && t0.isDecimal(); + boolean c1IsConstantDecimal = getChild(1).isConstant() && t1.isDecimal(); + if (c0IsConstantDecimal && c1IsConstantDecimal) return; + if (!c0IsConstantDecimal && !c1IsConstantDecimal) return; + + // Only child(0) or child(1) is a const decimal. See if we can cast it to + // the type of the other child. + if (c0IsConstantDecimal && !isExplicitCastToDecimal(getChild(0))) { + Expr c0 = convertDecimalLiteralsToFloat(analyzer, getChild(0), t1); + setChild(0, c0); + } + if (c1IsConstantDecimal && !isExplicitCastToDecimal(getChild(1))) { + Expr c1 = convertDecimalLiteralsToFloat(analyzer, getChild(1), t0); + setChild(1, c1); + } + } + + /** + * Helper function: analyze list of exprs + */ + public static void analyze(List<? extends Expr> exprs, Analyzer analyzer) + throws AnalysisException { + if (exprs == null) return; + for (Expr expr: exprs) { + expr.analyze(analyzer); + } + } + + @Override + public String toSql() { + return (printSqlInParens_) ? "(" + toSqlImpl() + ")" : toSqlImpl(); + } + + /** + * Returns a SQL string representing this expr. Subclasses should override this method + * instead of toSql() to ensure that parenthesis are properly added around the toSql(). + */ + protected abstract String toSqlImpl(); + + // Convert this expr, including all children, to its Thrift representation. + public TExpr treeToThrift() { + if (type_.isNull()) { + // Hack to ensure BE never sees TYPE_NULL. If an expr makes it this far without + // being cast to a non-NULL type, the type doesn't matter and we can cast it + // arbitrarily. + Preconditions.checkState(this instanceof NullLiteral || this instanceof SlotRef); + return NullLiteral.create(ScalarType.BOOLEAN).treeToThrift(); + } + TExpr result = new TExpr(); + treeToThriftHelper(result); + return result; + } + + // Append a flattened version of this expr, including all children, to 'container'. + protected void treeToThriftHelper(TExpr container) { + Preconditions.checkState(isAnalyzed_, + "Must be analyzed before serializing to thrift. %s", this); + Preconditions.checkState(!type_.isWildcardDecimal()); + // The BE should never see TYPE_NULL + Preconditions.checkState(!type_.isNull(), "Expr has type null!"); + TExprNode msg = new TExprNode(); + msg.type = type_.toThrift(); + msg.num_children = children_.size(); + if (fn_ != null) { + msg.setFn(fn_.toThrift()); + if (fn_.hasVarArgs()) msg.setVararg_start_idx(fn_.getNumArgs() - 1); + } + toThrift(msg); + container.addToNodes(msg); + for (Expr child: children_) { + child.treeToThriftHelper(container); + } + } + + // Convert this expr into msg (excluding children), which requires setting + // msg.op as well as the expr-specific field. + protected abstract void toThrift(TExprNode msg); + + /** + * Returns the product of the given exprs' number of distinct values or -1 if any of + * the exprs have an invalid number of distinct values. + */ + public static long getNumDistinctValues(List<Expr> exprs) { + if (exprs == null || exprs.isEmpty()) return 0; + long numDistinctValues = 1; + for (Expr expr: exprs) { + if (expr.getNumDistinctValues() == -1) { + numDistinctValues = -1; + break; + } + numDistinctValues *= expr.getNumDistinctValues(); + } + return numDistinctValues; + } + + public static List<TExpr> treesToThrift(List<? extends Expr> exprs) { + List<TExpr> result = Lists.newArrayList(); + for (Expr expr: exprs) { + result.add(expr.treeToThrift()); + } + return result; + } + + public static com.google.common.base.Predicate<Expr> isAggregatePredicate() { + return isAggregatePredicate_; + } + + public boolean isAggregate() { + return isAggregatePredicate_.apply(this); + } + + public List<String> childrenToSql() { + List<String> result = Lists.newArrayList(); + for (Expr child: children_) { + result.add(child.toSql()); + } + return result; + } + + public String debugString() { + return (id_ != null ? "exprid=" + id_.toString() + " " : "") + debugString(children_); + } + + public static String debugString(List<? extends Expr> exprs) { + if (exprs == null || exprs.isEmpty()) return ""; + List<String> strings = Lists.newArrayList(); + for (Expr expr: exprs) { + strings.add(expr.debugString()); + } + return Joiner.on(" ").join(strings); + } + + public static String toSql(List<? extends Expr> exprs) { + if (exprs == null || exprs.isEmpty()) return ""; + List<String> strings = Lists.newArrayList(); + for (Expr expr: exprs) { + strings.add(expr.toSql()); + } + return Joiner.on(", ").join(strings); + } + + /** + * Returns true if two expressions are equal. The equality comparison works on analyzed + * as well as unanalyzed exprs by ignoring implicit casts (see CastExpr.equals()). + */ + @Override + public boolean equals(Object obj) { + if (obj == null) return false; + if (obj.getClass() != this.getClass()) return false; + // don't compare type, this could be called pre-analysis + Expr expr = (Expr) obj; + if (children_.size() != expr.children_.size()) return false; + for (int i = 0; i < children_.size(); ++i) { + if (!children_.get(i).equals(expr.children_.get(i))) return false; + } + if (fn_ == null && expr.fn_ == null) return true; + if (fn_ == null || expr.fn_ == null) return false; // One null, one not + // Both fn_'s are not null + return fn_.equals(expr.fn_); + } + + /** + * Return true if l1[i].equals(l2[i]) for all i. + */ + public static <C extends Expr> boolean equalLists(List<C> l1, List<C> l2) { + if (l1.size() != l2.size()) return false; + Iterator<C> l1Iter = l1.iterator(); + Iterator<C> l2Iter = l2.iterator(); + while (l1Iter.hasNext()) { + if (!l1Iter.next().equals(l2Iter.next())) return false; + } + return true; + } + + /** + * Return true if l1 equals l2 when both lists are interpreted as sets. + * TODO: come up with something better than O(n^2)? + */ + public static <C extends Expr> boolean equalSets(List<C> l1, List<C> l2) { + if (l1.size() != l2.size()) return false; + return l1.containsAll(l2) && l2.containsAll(l1); + } + + /** + * Return true if l1 is a subset of l2. + */ + public static <C extends Expr> boolean isSubset(List<C> l1, List<C> l2) { + if (l1.size() > l2.size()) return false; + return l2.containsAll(l1); + } + + /** + * Return the intersection of l1 and l2.599 + */ + public static <C extends Expr> List<C> intersect(List<C> l1, List<C> l2) { + List<C> result = new ArrayList<C>(); + for (C element: l1) { + if (l2.contains(element)) result.add(element); + } + return result; + } + + /** + * Compute the intersection of l1 and l2, given the smap, and + * return the intersecting l1 elements in i1 and the intersecting l2 elements in i2. + */ + public static void intersect(Analyzer analyzer, + List<Expr> l1, List<Expr> l2, ExprSubstitutionMap smap, + List<Expr> i1, List<Expr> i2) { + i1.clear(); + i2.clear(); + List<Expr> s1List = Expr.substituteList(l1, smap, analyzer, false); + Preconditions.checkState(s1List.size() == l1.size()); + List<Expr> s2List = Expr.substituteList(l2, smap, analyzer, false); + Preconditions.checkState(s2List.size() == l2.size()); + for (int i = 0; i < s1List.size(); ++i) { + Expr s1 = s1List.get(i); + for (int j = 0; j < s2List.size(); ++j) { + Expr s2 = s2List.get(j); + if (s1.equals(s2)) { + i1.add(l1.get(i)); + i2.add(l2.get(j)); + break; + } + } + } + } + + @Override + public int hashCode() { + if (id_ == null) { + throw new UnsupportedOperationException("Expr.hashCode() is not implemented"); + } else { + return id_.asInt(); + } + } + + /** + * Gather conjuncts from this expr and return them in a list. + * A conjunct is an expr that returns a boolean, e.g., Predicates, function calls, + * SlotRefs, etc. Hence, this method is placed here and not in Predicate. + */ + public List<Expr> getConjuncts() { + List<Expr> list = Lists.newArrayList(); + if (this instanceof CompoundPredicate + && ((CompoundPredicate) this).getOp() == CompoundPredicate.Operator.AND) { + // TODO: we have to convert CompoundPredicate.AND to two expr trees for + // conjuncts because NULLs are handled differently for CompoundPredicate.AND + // and conjunct evaluation. This is not optimal for jitted exprs because it + // will result in two functions instead of one. Create a new CompoundPredicate + // Operator (i.e. CONJUNCT_AND) with the right NULL semantics and use that + // instead + list.addAll((getChild(0)).getConjuncts()); + list.addAll((getChild(1)).getConjuncts()); + } else { + list.add(this); + } + return list; + } + + /** + * Returns an analyzed clone of 'this' with exprs substituted according to smap. + * Removes implicit casts and analysis state while cloning/substituting exprs within + * this tree, such that the returned result has minimal implicit casts and types. + * Throws if analyzing the post-substitution expr tree failed. + * If smap is null, this function is equivalent to clone(). + * If preserveRootType is true, the resulting expr tree will be cast if necessary to + * the type of 'this'. + */ + public Expr trySubstitute(ExprSubstitutionMap smap, Analyzer analyzer, + boolean preserveRootType) + throws AnalysisException { + Expr result = clone(); + // Return clone to avoid removing casts. + if (smap == null) return result; + result = result.substituteImpl(smap, analyzer); + result.analyze(analyzer); + if (preserveRootType && !type_.equals(result.getType())) result = result.castTo(type_); + return result; + } + + /** + * Returns an analyzed clone of 'this' with exprs substituted according to smap. + * Removes implicit casts and analysis state while cloning/substituting exprs within + * this tree, such that the returned result has minimal implicit casts and types. + * Expects the analysis of the post-substitution expr to succeed. + * If smap is null, this function is equivalent to clone(). + * If preserveRootType is true, the resulting expr tree will be cast if necessary to + * the type of 'this'. + */ + public Expr substitute(ExprSubstitutionMap smap, Analyzer analyzer, + boolean preserveRootType) { + try { + return trySubstitute(smap, analyzer, preserveRootType); + } catch (Exception e) { + throw new IllegalStateException("Failed analysis after expr substitution.", e); + } + } + + public static ArrayList<Expr> trySubstituteList(Iterable<? extends Expr> exprs, + ExprSubstitutionMap smap, Analyzer analyzer, boolean preserveRootTypes) + throws AnalysisException { + if (exprs == null) return null; + ArrayList<Expr> result = new ArrayList<Expr>(); + for (Expr e: exprs) { + result.add(e.trySubstitute(smap, analyzer, preserveRootTypes)); + } + return result; + } + + public static ArrayList<Expr> substituteList(Iterable<? extends Expr> exprs, + ExprSubstitutionMap smap, Analyzer analyzer, boolean preserveRootTypes) { + try { + return trySubstituteList(exprs, smap, analyzer, preserveRootTypes); + } catch (Exception e) { + throw new IllegalStateException("Failed analysis after expr substitution.", e); + } + } + + /** + * Recursive method that performs the actual substitution for try/substitute() while + * removing implicit casts. Resets the analysis state in all non-SlotRef expressions. + * Exprs that have non-child exprs which should be affected by substitutions must + * override this method and apply the substitution to such exprs as well. + */ + protected Expr substituteImpl(ExprSubstitutionMap smap, Analyzer analyzer) + throws AnalysisException { + if (isImplicitCast()) return getChild(0).substituteImpl(smap, analyzer); + if (smap != null) { + Expr substExpr = smap.get(this); + if (substExpr != null) return substExpr.clone(); + } + for (int i = 0; i < children_.size(); ++i) { + children_.set(i, children_.get(i).substituteImpl(smap, analyzer)); + } + // SlotRefs must remain analyzed to support substitution across query blocks. All + // other exprs must be analyzed again after the substitution to add implicit casts + // and for resolving their correct function signature. + if (!(this instanceof SlotRef)) resetAnalysisState(); + return this; + } + + /** + * Resets the internal state of this expr produced by analyze(). + * Only modifies this expr, and not its child exprs. + */ + protected void resetAnalysisState() { isAnalyzed_ = false; } + + /** + * Resets the internal analysis state of this expr tree. Removes implicit casts. + */ + public Expr reset() { + if (isImplicitCast()) return getChild(0).reset(); + for (int i = 0; i < children_.size(); ++i) { + children_.set(i, children_.get(i).reset()); + } + resetAnalysisState(); + return this; + } + + public static ArrayList<Expr> resetList(ArrayList<Expr> l) { + for (int i = 0; i < l.size(); ++i) { + l.set(i, l.get(i).reset()); + } + return l; + } + + /** + * Creates a deep copy of this expr including its analysis state. The method is + * abstract in this class to force new Exprs to implement it. + */ + @Override + public abstract Expr clone(); + + /** + * Create a deep copy of 'l'. The elements of the returned list are of the same + * type as the input list. + */ + public static <C extends Expr> ArrayList<C> cloneList(List<C> l) { + Preconditions.checkNotNull(l); + ArrayList<C> result = new ArrayList<C>(l.size()); + for (Expr element: l) { + result.add((C) element.clone()); + } + return result; + } + + /** + * Removes duplicate exprs (according to equals()). + */ + public static <C extends Expr> void removeDuplicates(List<C> l) { + if (l == null) return; + ListIterator<C> it1 = l.listIterator(); + while (it1.hasNext()) { + C e1 = it1.next(); + ListIterator<C> it2 = l.listIterator(); + boolean duplicate = false; + while (it2.hasNext()) { + C e2 = it2.next(); + // only check up to but excluding e1 + if (e1 == e2) break; + if (e1.equals(e2)) { + duplicate = true; + break; + } + } + if (duplicate) it1.remove(); + } + } + + /** + * Removes constant exprs + */ + public static <C extends Expr> void removeConstants(List<C> l) { + if (l == null) return; + ListIterator<C> it = l.listIterator(); + while (it.hasNext()) { + C e = it.next(); + if (e.isConstant()) it.remove(); + } + } + + /** + * Returns true if expr is fully bound by tid, otherwise false. + */ + public boolean isBound(TupleId tid) { + return isBoundByTupleIds(Lists.newArrayList(tid)); + } + + /** + * Returns true if expr is fully bound by tids, otherwise false. + */ + public boolean isBoundByTupleIds(List<TupleId> tids) { + for (Expr child: children_) { + if (!child.isBoundByTupleIds(tids)) return false; + } + return true; + } + + /** + * Returns true if expr is fully bound by slotId, otherwise false. + */ + public boolean isBound(SlotId slotId) { + return isBoundBySlotIds(Lists.newArrayList(slotId)); + } + + /** + * Returns true if expr is fully bound by slotIds, otherwise false. + */ + public boolean isBoundBySlotIds(List<SlotId> slotIds) { + for (Expr child: children_) { + if (!child.isBoundBySlotIds(slotIds)) return false; + } + return true; + } + + public static boolean isBound(List<? extends Expr> exprs, List<TupleId> tids) { + for (Expr expr: exprs) { + if (!expr.isBoundByTupleIds(tids)) return false; + } + return true; + } + + public static Expr getFirstBoundChild(Expr expr, List<TupleId> tids) { + for (Expr child: expr.getChildren()) { + if (child.isBoundByTupleIds(tids)) return child; + } + return null; + } + + public void getIds(List<TupleId> tupleIds, List<SlotId> slotIds) { + Set<TupleId> tupleIdSet = Sets.newHashSet(); + Set<SlotId> slotIdSet = Sets.newHashSet(); + getIdsHelper(tupleIdSet, slotIdSet); + if (tupleIds != null) tupleIds.addAll(tupleIdSet); + if (slotIds != null) slotIds.addAll(slotIdSet); + } + + protected void getIdsHelper(Set<TupleId> tupleIds, Set<SlotId> slotIds) { + for (Expr child: children_) { + child.getIdsHelper(tupleIds, slotIds); + } + } + + public static <C extends Expr> void getIds(List<? extends Expr> exprs, + List<TupleId> tupleIds, List<SlotId> slotIds) { + if (exprs == null) return; + for (Expr e: exprs) { + e.getIds(tupleIds, slotIds); + } + } + + /** + * @return true if this is an instance of LiteralExpr + */ + public boolean isLiteral() { + return this instanceof LiteralExpr; + } + + /** + * @return true if this expr can be evaluated with Expr::GetValue(NULL), + * i.e. if it doesn't contain any references to runtime variables (e.g. slot refs). + * Expr subclasses should override this if necessary (e.g. SlotRef, Subquery, etc. + * always return false). + */ + public boolean isConstant() { + for (Expr expr : children_) { + if (!expr.isConstant()) return false; + } + return true; + } + + /** + * @return true if this expr is either a null literal or a cast from + * a null literal. + */ + public boolean isNullLiteral() { + if (this instanceof NullLiteral) return true; + if (!(this instanceof CastExpr)) return false; + Preconditions.checkState(children_.size() == 1); + return children_.get(0).isNullLiteral(); + } + + /** + * Return true if this expr is a scalar subquery. + */ + public boolean isScalarSubquery() { + Preconditions.checkState(isAnalyzed_); + return this instanceof Subquery && getType().isScalarType(); + } + + /** + * Checks whether this expr returns a boolean type or NULL type. + * If not, throws an AnalysisException with an appropriate error message using + * 'name' as a prefix. For example, 'name' could be "WHERE clause". + * The error message only contains this.toSql() if printExpr is true. + */ + public void checkReturnsBool(String name, boolean printExpr) throws AnalysisException { + if (!type_.isBoolean() && !type_.isNull()) { + throw new AnalysisException( + String.format("%s%s requires return type 'BOOLEAN'. " + + "Actual type is '%s'.", name, (printExpr) ? " '" + toSql() + "'" : "", + type_.toString())); + } + } + + /** + * Casts this expr to a specific target type. It checks the validity of the cast and + * calls uncheckedCastTo(). + * @param targetType + * type to be cast to + * @return cast expression, or converted literal, + * should never return null + * @throws AnalysisException + * when an invalid cast is asked for, for example, + * failure to convert a string literal to a date literal + */ + public final Expr castTo(Type targetType) throws AnalysisException { + Type type = Type.getAssignmentCompatibleType(this.type_, targetType, false); + Preconditions.checkState(type.isValid(), "cast %s to %s", this.type_, targetType); + // If the targetType is NULL_TYPE then ignore the cast because NULL_TYPE + // is compatible with all types and no cast is necessary. + if (targetType.isNull()) return this; + if (!targetType.isDecimal()) { + // requested cast must be to assignment-compatible type + // (which implies no loss of precision) + Preconditions.checkArgument(targetType.equals(type), + "targetType=" + targetType + " type=" + type); + } + return uncheckedCastTo(targetType); + } + + /** + * Create an expression equivalent to 'this' but returning targetType; + * possibly by inserting an implicit cast, + * or by returning an altogether new expression + * or by returning 'this' with a modified return type'. + * @param targetType + * type to be cast to + * @return cast expression, or converted literal, + * should never return null + * @throws AnalysisException + * when an invalid cast is asked for, for example, + * failure to convert a string literal to a date literal + */ + protected Expr uncheckedCastTo(Type targetType) throws AnalysisException { + return new CastExpr(targetType, this); + } + + /** + * Add a cast expression above child. + * If child is a literal expression, we attempt to + * convert the value of the child directly, and not insert a cast node. + * @param targetType + * type to be cast to + * @param childIndex + * index of child to be cast + */ + public void castChild(Type targetType, int childIndex) throws AnalysisException { + Expr child = getChild(childIndex); + Expr newChild = child.castTo(targetType); + setChild(childIndex, newChild); + } + + + /** + * Convert child to to targetType, possibly by inserting an implicit cast, or by + * returning an altogether new expression, or by returning 'this' with a modified + * return type'. + * @param targetType + * type to be cast to + * @param childIndex + * index of child to be cast + */ + protected void uncheckedCastChild(Type targetType, int childIndex) + throws AnalysisException { + Expr child = getChild(childIndex); + Expr newChild = child.uncheckedCastTo(targetType); + setChild(childIndex, newChild); + } + + /** + * Returns child expr if this expr is an implicit cast, otherwise returns 'this'. + */ + public Expr ignoreImplicitCast() { + if (isImplicitCast()) return getChild(0).ignoreImplicitCast(); + return this; + } + + /** + * Returns true if 'this' is an implicit cast expr. + */ + public boolean isImplicitCast() { + return this instanceof CastExpr && ((CastExpr) this).isImplicit(); + } + + @Override + public String toString() { + return Objects.toStringHelper(this.getClass()) + .add("id", id_) + .add("type", type_) + .add("sel", selectivity_) + .add("evalCost", evalCost_) + .add("#distinct", numDistinctValues_) + .toString(); + } + + /** + * If 'this' is a SlotRef or a Cast that wraps a SlotRef, returns that SlotRef. + * Otherwise returns null. + */ + public SlotRef unwrapSlotRef(boolean implicitOnly) { + if (this instanceof SlotRef) { + return (SlotRef) this; + } else if (this instanceof CastExpr + && (!implicitOnly || ((CastExpr) this).isImplicit()) + && getChild(0) instanceof SlotRef) { + return (SlotRef) getChild(0); + } else { + return null; + } + } + + /** + * Returns the descriptor of the scan slot that directly or indirectly produces + * the values of 'this' SlotRef. Traverses the source exprs of intermediate slot + * descriptors to resolve materialization points (e.g., aggregations). + * Returns null if 'e' or any source expr of 'e' is not a SlotRef or cast SlotRef. + */ + public SlotDescriptor findSrcScanSlot() { + SlotRef slotRef = unwrapSlotRef(false); + if (slotRef == null) return null; + SlotDescriptor slotDesc = slotRef.getDesc(); + if (slotDesc.isScanSlot()) return slotDesc; + if (slotDesc.getSourceExprs().size() == 1) { + return slotDesc.getSourceExprs().get(0).findSrcScanSlot(); + } + // No known source expr, or there are several source exprs meaning the slot is + // has no single source table. + return null; + } + + /** + * Pushes negation to the individual operands of a predicate + * tree rooted at 'root'. + */ + public static Expr pushNegationToOperands(Expr root) { + Preconditions.checkNotNull(root); + if (Expr.IS_NOT_PREDICATE.apply(root)) { + try { + // Make sure we call function 'negate' only on classes that support it, + // otherwise we may recurse infinitely. + Method m = root.getChild(0).getClass().getDeclaredMethod(NEGATE_FN); + return pushNegationToOperands(root.getChild(0).negate()); + } catch (NoSuchMethodException e) { + // The 'negate' function is not implemented. Break the recursion. + return root; + } + } + + if (root instanceof CompoundPredicate) { + Expr left = pushNegationToOperands(root.getChild(0)); + Expr right = pushNegationToOperands(root.getChild(1)); + return new CompoundPredicate(((CompoundPredicate)root).getOp(), left, right); + } + + return root; + } + + /** + * Negates a boolean Expr. + */ + public Expr negate() { + Preconditions.checkState(type_.getPrimitiveType() == PrimitiveType.BOOLEAN); + return new CompoundPredicate(CompoundPredicate.Operator.NOT, this, null); + } + + /** + * Returns the subquery of an expr. Returns null if this expr does not contain + * a subquery. + * + * TODO: Support predicates with more that one subqueries when we implement + * the independent subquery evaluation. + */ + public Subquery getSubquery() { + if (!contains(Subquery.class)) return null; + List<Subquery> subqueries = Lists.newArrayList(); + collect(Subquery.class, subqueries); + Preconditions.checkState(subqueries.size() == 1); + return subqueries.get(0); + } + + /** + * For children of 'this' that are constant expressions and the type of which has a + * LiteralExpr subclass, evaluate them in the BE and substitute the child with the + * resulting LiteralExpr. Modifies 'this' in place and does not re-analyze it. Hence, + * it is not safe to evaluate the modified expr in the BE as the resolved fn_ may be + * incorrect given the new arguments. + * + * Throws an AnalysisException if the evaluation fails in the BE. + * + * TODO: Convert to a generic constant expr folding function to be used during analysis. + */ + public void foldConstantChildren(Analyzer analyzer) throws AnalysisException { + Preconditions.checkState(isAnalyzed_); + Preconditions.checkNotNull(analyzer); + for (int i = 0; i < children_.size(); ++i) { + Expr child = getChild(i); + if (child.isLiteral() || !child.isConstant()) continue; + LiteralExpr literalExpr = LiteralExpr.create(child, analyzer.getQueryCtx()); + if (literalExpr == null) continue; + setChild(i, literalExpr); + } + isAnalyzed_ = false; + } + + /** + * Returns true iff all of this Expr's children have their costs set. + */ + protected boolean hasChildCosts() { + for (Expr child : children_) { + if (!child.hasCost()) return false; + } + return true; + } + + /** + * Computes and returns the sum of the costs of all of this Expr's children. + */ + protected float getChildCosts() { + float cost = 0; + for (Expr child : children_) cost += child.getCost(); + return cost; + } + + /** + * Returns the average length of the values produced by an Expr + * of type string. Returns a default for unknown lengths. + */ + protected static double getAvgStringLength(Expr e) { + Preconditions.checkState(e.getType().isStringType()); + Preconditions.checkState(e.isAnalyzed_); + + SlotRef ref = e.unwrapSlotRef(false); + if (ref != null) { + if (ref.getDesc() != null && ref.getDesc().getStats().getAvgSize() > 0) { + return ref.getDesc().getStats().getAvgSize(); + } else { + return DEFAULT_AVG_STRING_LENGTH; + } + } else if (e instanceof StringLiteral) { + return ((StringLiteral) e).getValue().length(); + } else { + // TODO(tmarshall): Extend this to support other string Exprs, such as + // function calls that return string. + return DEFAULT_AVG_STRING_LENGTH; + } + } +}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/org/apache/impala/analysis/ExprId.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/ExprId.java b/fe/src/main/java/org/apache/impala/analysis/ExprId.java new file mode 100644 index 0000000..52292f5 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/analysis/ExprId.java @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloudera.impala.analysis; + +import com.cloudera.impala.common.Id; +import com.cloudera.impala.common.IdGenerator; + +public class ExprId extends Id<ExprId> { + // Construction only allowed via an IdGenerator. + protected ExprId(int id) { + super(id); + } + + public static IdGenerator<ExprId> createGenerator() { + return new IdGenerator<ExprId>() { + @Override + public ExprId getNextId() { return new ExprId(nextId_++); } + @Override + public ExprId getMaxId() { return new ExprId(nextId_ - 1); } + }; + } +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java new file mode 100644 index 0000000..cbff71a --- /dev/null +++ b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java @@ -0,0 +1,176 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloudera.impala.analysis; + +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +/** + * Map of expression substitutions: lhs[i] gets substituted with rhs[i]. + * To support expression substitution across query blocks, rhs exprs must already be + * analyzed when added to this map. Otherwise, analysis of a SlotRef may fail after + * substitution, e.g., because the table it refers to is in a different query block + * that is not visible. + * See Expr.substitute() and related functions for details on the actual substitution. + */ +public final class ExprSubstitutionMap { + private final static Logger LOG = LoggerFactory.getLogger(ExprSubstitutionMap.class); + + private List<Expr> lhs_; // left-hand side + private List<Expr> rhs_; // right-hand side + + public ExprSubstitutionMap() { + this(Lists.<Expr>newArrayList(), Lists.<Expr>newArrayList()); + } + + public ExprSubstitutionMap(List<Expr> lhs, List<Expr> rhs) { + lhs_ = lhs; + rhs_ = rhs; + } + + /** + * Add an expr mapping. The rhsExpr must be analyzed to support correct substitution + * across query blocks. It is not required that the lhsExpr is analyzed. + */ + public void put(Expr lhsExpr, Expr rhsExpr) { + Preconditions.checkState(rhsExpr.isAnalyzed_, "Rhs expr must be analyzed."); + lhs_.add(lhsExpr); + rhs_.add(rhsExpr); + } + + /** + * Returns the expr mapped to lhsExpr or null if no mapping to lhsExpr exists. + */ + public Expr get(Expr lhsExpr) { + for (int i = 0; i < lhs_.size(); ++i) { + if (lhsExpr.equals(lhs_.get(i))) return rhs_.get(i); + } + return null; + } + + /** + * Returns true if the smap contains a mapping for lhsExpr. + */ + public boolean containsMappingFor(Expr lhsExpr) { + return lhs_.contains(lhsExpr); + } + + /** + * Return a map which is equivalent to applying f followed by g, + * i.e., g(f()). + * Always returns a non-null map. + */ + public static ExprSubstitutionMap compose(ExprSubstitutionMap f, ExprSubstitutionMap g, + Analyzer analyzer) { + if (f == null && g == null) return new ExprSubstitutionMap(); + if (f == null) return g; + if (g == null) return f; + ExprSubstitutionMap result = new ExprSubstitutionMap(); + // f's substitution targets need to be substituted via g + result.lhs_ = Expr.cloneList(f.lhs_); + result.rhs_ = Expr.substituteList(f.rhs_, g, analyzer, false); + + // substitution maps are cumulative: the combined map contains all + // substitutions from f and g. + for (int i = 0; i < g.lhs_.size(); i++) { + // If f contains expr1->fn(expr2) and g contains expr2->expr3, + // then result must contain expr1->fn(expr3). + // The check before adding to result.lhs is to ensure that cases + // where expr2.equals(expr1) are handled correctly. + // For example f: count(*) -> zeroifnull(count(*)) + // and g: count(*) -> slotref + // result.lhs must only have: count(*) -> zeroifnull(slotref) from f above, + // and not count(*) -> slotref from g as well. + if (!result.lhs_.contains(g.lhs_.get(i))) { + result.lhs_.add(g.lhs_.get(i).clone()); + result.rhs_.add(g.rhs_.get(i).clone()); + } + } + + result.verify(); + return result; + } + + /** + * Returns the union of two substitution maps. Always returns a non-null map. + */ + public static ExprSubstitutionMap combine(ExprSubstitutionMap f, + ExprSubstitutionMap g) { + if (f == null && g == null) return new ExprSubstitutionMap(); + if (f == null) return g; + if (g == null) return f; + ExprSubstitutionMap result = new ExprSubstitutionMap(); + result.lhs_ = Lists.newArrayList(f.lhs_); + result.lhs_.addAll(g.lhs_); + result.rhs_ = Lists.newArrayList(f.rhs_); + result.rhs_.addAll(g.rhs_); + result.verify(); + return result; + } + + public void substituteLhs(ExprSubstitutionMap lhsSmap, Analyzer analyzer) { + lhs_ = Expr.substituteList(lhs_, lhsSmap, analyzer, false); + } + + public List<Expr> getLhs() { return lhs_; } + public List<Expr> getRhs() { return rhs_; } + + public int size() { return lhs_.size(); } + + public String debugString() { + Preconditions.checkState(lhs_.size() == rhs_.size()); + List<String> output = Lists.newArrayList(); + for (int i = 0; i < lhs_.size(); ++i) { + output.add(lhs_.get(i).toSql() + ":" + rhs_.get(i).toSql()); + output.add("(" + lhs_.get(i).debugString() + ":" + rhs_.get(i).debugString() + ")"); + } + return "smap(" + Joiner.on(" ").join(output) + ")"; + } + + /** + * Verifies the internal state of this smap: Checks that the lhs_ has no duplicates, + * and that all rhs exprs are analyzed. + */ + private void verify() { + for (int i = 0; i < lhs_.size(); ++i) { + for (int j = i + 1; j < lhs_.size(); ++j) { + if (lhs_.get(i).equals(lhs_.get(j))) { + LOG.info("verify: smap=" + this.debugString()); + Preconditions.checkState(false); + } + } + Preconditions.checkState(rhs_.get(i).isAnalyzed_); + } + } + + public void clear() { + lhs_.clear(); + rhs_.clear(); + } + + @Override + public ExprSubstitutionMap clone() { + return new ExprSubstitutionMap(Expr.cloneList(lhs_), Expr.cloneList(rhs_)); + } +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/org/apache/impala/analysis/ExtractFromExpr.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/ExtractFromExpr.java b/fe/src/main/java/org/apache/impala/analysis/ExtractFromExpr.java new file mode 100644 index 0000000..48b9fb3 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/analysis/ExtractFromExpr.java @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloudera.impala.analysis; + +import java.util.Set; + +import com.cloudera.impala.catalog.Catalog; +import com.cloudera.impala.catalog.Type; +import com.cloudera.impala.common.AnalysisException; +import com.cloudera.impala.thrift.TExtractField; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +/** + * Representation of an EXTRACT(<Time Unit> FROM <Datetime Expr>) expression. EXTRACT( + * <Datetime Expr>, <String>) is not handled by FunctionCallExpr. + */ +public class ExtractFromExpr extends FunctionCallExpr { + + // Behaves like an immutable linked hash set containing the TExtractFields in the same + // order as declared. + private static final Set<String> EXTRACT_FIELDS; + static { + ImmutableSet.Builder<String> builder = new ImmutableSet.Builder<String>(); + for (TExtractField extractField: TExtractField.values()) { + if (extractField != TExtractField.INVALID_FIELD) { + builder.add(extractField.name()); + } + } + EXTRACT_FIELDS = builder.build(); + } + + public ExtractFromExpr(FunctionName fnName, String extractFieldIdent, Expr e) { + // Note that the arguments are swapped so that they align with the EXTRACT function. + // There is no EXTRACT(STRING, TIMESTAMP) function because it conflicts with + // EXTRACT(TIMESTAMP, STRING) if STRINGs are used for TIMESTAMPs with implicit + // casting. + super(fnName, Lists.newArrayList(e, new StringLiteral(extractFieldIdent))); + type_ = Type.INT; + } + + /** + * Copy c'tor used in clone(). + */ + protected ExtractFromExpr(ExtractFromExpr other) { + super(other); + } + + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + getFnName().analyze(analyzer); + if (!getFnName().getFunction().equals("extract")) { + throw new AnalysisException("Function " + getFnName().getFunction().toUpperCase() + + " does not accept the keyword FROM."); + } + if ((getFnName().getDb() != null) + && !getFnName().getDb().equals(Catalog.BUILTINS_DB)) { + throw new AnalysisException("Function " + getFnName().toString() + " conflicts " + + "with the EXTRACT builtin."); + } + if (isAnalyzed_) return; + super.analyze(analyzer); + + String extractFieldIdent = ((StringLiteral)children_.get(1)).getValue(); + Preconditions.checkNotNull(extractFieldIdent); + if (!EXTRACT_FIELDS.contains(extractFieldIdent.toUpperCase())) { + throw new AnalysisException("Time unit '" + extractFieldIdent + "' in expression '" + + toSql() + "' is invalid. Expected one of " + + Joiner.on(", ").join(EXTRACT_FIELDS) + "."); + } + } + + @Override + protected String getFunctionNotFoundError(Type[] argTypes) { + Expr e = children_.get(0); + return "Expression '" + e.toSql() + "' in '" + toSql() + "' has a return type of " + + e.getType().toSql() + " but a TIMESTAMP is required."; + } + + @Override + public String toSqlImpl() { + StringBuilder strBuilder = new StringBuilder(); + strBuilder.append(getFnName().toString().toUpperCase()); + strBuilder.append("("); + strBuilder.append(((StringLiteral)getChild(1)).getValue()); + strBuilder.append(" FROM "); + strBuilder.append(getChild(0).toSql()); + strBuilder.append(")"); + return strBuilder.toString(); + } + + @Override + public Expr clone() { return new ExtractFromExpr(this); } +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/org/apache/impala/analysis/FromClause.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/FromClause.java b/fe/src/main/java/org/apache/impala/analysis/FromClause.java new file mode 100644 index 0000000..bbe6f23 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/analysis/FromClause.java @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloudera.impala.analysis; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import com.cloudera.impala.common.AnalysisException; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +/** + * Wraps a list of TableRef instances that form a FROM clause, allowing them to be + * analyzed independently of the statement using them. To increase the flexibility of + * the class it implements the Iterable interface. + */ +public class FromClause implements ParseNode, Iterable<TableRef> { + + private final ArrayList<TableRef> tableRefs_; + + private boolean analyzed_ = false; + + public FromClause(List<TableRef> tableRefs) { + tableRefs_ = Lists.newArrayList(tableRefs); + // Set left table refs to ensure correct toSql() before analysis. + for (int i = 1; i < tableRefs_.size(); ++i) { + tableRefs_.get(i).setLeftTblRef(tableRefs_.get(i - 1)); + } + } + + public FromClause() { tableRefs_ = Lists.newArrayList(); } + public List<TableRef> getTableRefs() { return tableRefs_; } + + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + if (analyzed_) return; + + if (tableRefs_.isEmpty()) { + analyzed_ = true; + return; + } + + // Start out with table refs to establish aliases. + TableRef leftTblRef = null; // the one to the left of tblRef + for (int i = 0; i < tableRefs_.size(); ++i) { + // Resolve and replace non-InlineViewRef table refs with a BaseTableRef or ViewRef. + TableRef tblRef = tableRefs_.get(i); + tblRef = analyzer.resolveTableRef(tblRef); + tableRefs_.set(i, Preconditions.checkNotNull(tblRef)); + tblRef.setLeftTblRef(leftTblRef); + try { + tblRef.analyze(analyzer); + } catch (AnalysisException e) { + // Only re-throw the exception if no tables are missing. + if (analyzer.getMissingTbls().isEmpty()) throw e; + } + leftTblRef = tblRef; + } + + // All tableRefs have been analyzed, but at least one table is missing metadata. + if (!analyzer.getMissingTbls().isEmpty()) { + throw new AnalysisException("Found missing tables. Aborting analysis."); + } + analyzed_ = true; + } + + public FromClause clone() { + ArrayList<TableRef> clone = Lists.newArrayList(); + for (TableRef tblRef: tableRefs_) clone.add(tblRef.clone()); + return new FromClause(clone); + } + + public void reset() { + for (int i = 0; i < size(); ++i) { + TableRef origTblRef = get(i); + if (origTblRef.isResolved() && !(origTblRef instanceof InlineViewRef)) { + // Replace resolved table refs with unresolved ones. + TableRef newTblRef = new TableRef(origTblRef); + // Use the fully qualified raw path to preserve the original resolution. + // Otherwise, non-fully qualified paths might incorrectly match a local view. + // TODO for 2.3: This full qualification preserves analysis state which is + // contrary to the intended semantics of reset(). We could address this issue by + // changing the WITH-clause analysis to register local views that have + // fully-qualified table refs, and then remove the full qualification here. + newTblRef.rawPath_ = origTblRef.getResolvedPath().getFullyQualifiedRawPath(); + set(i, newTblRef); + } + get(i).reset(); + } + this.analyzed_ = false; + } + + @Override + public String toSql() { + StringBuilder builder = new StringBuilder(); + if (!tableRefs_.isEmpty()) { + builder.append(" FROM "); + for (int i = 0; i < tableRefs_.size(); ++i) { + builder.append(tableRefs_.get(i).toSql()); + } + } + return builder.toString(); + } + + public boolean isEmpty() { return tableRefs_.isEmpty(); } + + @Override + public Iterator<TableRef> iterator() { return tableRefs_.iterator(); } + public int size() { return tableRefs_.size(); } + public TableRef get(int i) { return tableRefs_.get(i); } + public void set(int i, TableRef tableRef) { tableRefs_.set(i, tableRef); } + public void add(TableRef t) { tableRefs_.add(t); } +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/org/apache/impala/analysis/FunctionArgs.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/FunctionArgs.java b/fe/src/main/java/org/apache/impala/analysis/FunctionArgs.java new file mode 100644 index 0000000..998c5fc --- /dev/null +++ b/fe/src/main/java/org/apache/impala/analysis/FunctionArgs.java @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloudera.impala.analysis; + +import java.util.ArrayList; + +import com.cloudera.impala.catalog.Type; +import com.cloudera.impala.common.AnalysisException; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +// Wrapper class around argument types and if it has varArgs +public class FunctionArgs implements ParseNode { + private final ArrayList<TypeDef> argTypeDefs_; + private boolean hasVarArgs_; + + // Result of analysis. + private ArrayList<Type> argTypes_; + + public FunctionArgs() { + argTypeDefs_ = Lists.newArrayList(); + hasVarArgs_ = false; + } + + public FunctionArgs(ArrayList<TypeDef> argTypeDefs, boolean varArgs) { + argTypeDefs_ = argTypeDefs; + hasVarArgs_ = varArgs; + if (varArgs) Preconditions.checkState(argTypeDefs.size() > 0); + } + + public void setHasVarArgs(boolean b) { { + Preconditions.checkState(argTypeDefs_.size() > 0); + hasVarArgs_ = b; } + } + + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + ArrayList<Type> argTypes = Lists.newArrayListWithCapacity(argTypeDefs_.size()); + for (TypeDef typeDef: argTypeDefs_) { + typeDef.analyze(analyzer); + argTypes.add(typeDef.getType()); + } + argTypes_ = argTypes; + } + + public ArrayList<TypeDef> getArgTypeDefs() { return argTypeDefs_; } + public ArrayList<Type> getArgTypes() { return argTypes_; } + public boolean hasVarArgs() { return hasVarArgs_; } + + @Override + public String toSql() { return null; } +}
