HIVE-11310: Avoid expensive AST tree conversion to String for expressions in WHERE clause (Jesus Camacho Rodriguez, reviewed by Hari Sankar Sivarama Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/57242e34 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/57242e34 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/57242e34 Branch: refs/heads/hbase-metastore Commit: 57242e3430decfc9984230e3bf9d26f0304855d6 Parents: 83be12f Author: Jesus Camacho Rodriguez <[email protected]> Authored: Tue Jul 21 10:33:22 2015 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Wed Jul 22 16:24:52 2015 +0100 ---------------------------------------------------------------------- .../calcite/translator/JoinTypeCheckCtx.java | 2 +- .../apache/hadoop/hive/ql/parse/ParseUtils.java | 51 +++++++++++++++++++- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 32 +++++++----- .../hadoop/hive/ql/parse/TypeCheckCtx.java | 15 +++++- .../hive/ql/parse/TypeCheckProcFactory.java | 4 ++ 5 files changed, 87 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/57242e34/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java index bbd4723..dccd1d9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java @@ -53,7 +53,7 @@ public class JoinTypeCheckCtx extends TypeCheckCtx { public JoinTypeCheckCtx(RowResolver leftRR, RowResolver rightRR, JoinType hiveJoinType) throws SemanticException { - super(RowResolver.getCombinedRR(leftRR, rightRR), false, false, false, false, false, false, + super(RowResolver.getCombinedRR(leftRR, rightRR), true, false, false, false, false, false, false, false, false); this.inputRRLst = ImmutableList.of(leftRR, rightRR); this.outerJoin = (hiveJoinType == JoinType.LEFTOUTER) || (hiveJoinType == JoinType.RIGHTOUTER) http://git-wip-us.apache.org/repos/asf/hive/blob/57242e34/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 18f0180..bf1b5d4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -18,9 +18,16 @@ package org.apache.hadoop.hive.ql.parse; -import java.util.*; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Queue; +import java.util.Set; +import java.util.Stack; -import org.apache.hadoop.hive.common.JavaUtils; +import org.antlr.runtime.tree.Tree; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -266,4 +273,44 @@ public final class ParseUtils { return false; } + + public static boolean sameTree(ASTNode node, ASTNode otherNode) { + if (node == null && otherNode == null) { + return true; + } + if ((node == null && otherNode != null) || + (node != null && otherNode == null)) { + return false; + } + + Stack<Tree> stack = new Stack<Tree>(); + stack.push(node); + Stack<Tree> otherStack = new Stack<Tree>(); + otherStack.push(otherNode); + + while (!stack.empty() && !otherStack.empty()) { + Tree p = stack.pop(); + Tree otherP = otherStack.pop(); + + if (p.isNil() != otherP.isNil()) { + return false; + } + if (!p.isNil()) { + if (!p.toString().equals(otherP.toString())) { + return false; + } + } + if (p.getChildCount() != otherP.getChildCount()) { + return false; + } + for (int i = p.getChildCount()-1; i >= 0; i--) { + Tree t = p.getChild(i); + stack.push(t); + Tree otherT = otherP.getChild(i); + otherStack.push(otherT); + } + } + + return stack.empty() && otherStack.empty(); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/57242e34/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index aab4250..0c191da 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2625,7 +2625,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { * so we invoke genFilterPlan to handle SubQuery algebraic transformation, * just as is done for SubQuery predicates appearing in the Where Clause. */ - Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true); + Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true, false); output = putOpInsertMap(output, inputRR); return output; } @@ -2644,7 +2644,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { @SuppressWarnings("nls") private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, Map<String, Operator> aliasToOpInfo, - boolean forHavingClause) + boolean forHavingClause, boolean forGroupByClause) throws SemanticException { OpParseContext inputCtx = opParseCtx.get(input); @@ -2786,7 +2786,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } } - return genFilterPlan(qb, searchCond, input); + return genFilterPlan(qb, searchCond, input, forHavingClause || forGroupByClause); } /** @@ -2800,13 +2800,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { * the input operator */ @SuppressWarnings("nls") - private Operator genFilterPlan(QB qb, ASTNode condn, Operator input) + private Operator genFilterPlan(QB qb, ASTNode condn, Operator input, boolean useCaching) throws SemanticException { OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema( + new FilterDesc(genExprNodeDesc(condn, inputRR, useCaching), false), new RowSchema( inputRR.getColumnInfos()), input), inputRR); if (LOG.isDebugEnabled()) { @@ -5414,7 +5414,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { if (parseInfo.getWhrForClause(dest) != null) { ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); - curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, forwardOp, aliasToOpInfo, false); + curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, forwardOp, aliasToOpInfo, false, true); } // Generate GroupbyOperator @@ -7559,7 +7559,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { if ( joinSrcOp != null ) { ArrayList<ASTNode> filter = joinTree.getFiltersForPushing().get(0); for (ASTNode cond : filter) { - joinSrcOp = genFilterPlan(qb, cond, joinSrcOp); + joinSrcOp = genFilterPlan(qb, cond, joinSrcOp, false); } } @@ -7615,7 +7615,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { Operator op = joinOp; for(ASTNode condn : joinTree.getPostJoinFilters() ) { - op = genFilterPlan(qb, condn, op); + op = genFilterPlan(qb, condn, op, false); } return op; } @@ -7788,7 +7788,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { Operator srcOp = map.get(src); ArrayList<ASTNode> filter = filters.get(pos); for (ASTNode cond : filter) { - srcOp = genFilterPlan(qb, cond, srcOp); + srcOp = genFilterPlan(qb, cond, srcOp, false); } map.put(src, srcOp); } @@ -8831,7 +8831,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { if (qbp.getWhrForClause(dest) != null) { ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); - curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, curr, aliasToOpInfo, false); + curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, curr, aliasToOpInfo, false, false); } // Preserve operator before the GBY - we'll use it to resolve '*' Operator<?> gbySource = curr; @@ -10425,7 +10425,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { throws SemanticException { // Since the user didn't supply a customized type-checking context, // use default settings. - TypeCheckCtx tcCtx = new TypeCheckCtx(input); + return genExprNodeDesc(expr, input, true); + } + + public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, boolean useCaching) + throws SemanticException { + TypeCheckCtx tcCtx = new TypeCheckCtx(input, useCaching); return genExprNodeDesc(expr, input, tcCtx); } @@ -10453,7 +10458,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // build the exprNodeFuncDesc with recursively built children. // If the current subExpression is pre-calculated, as in Group-By etc. - ExprNodeDesc cached = getExprNodeDescCached(expr, input); + ExprNodeDesc cached = null; + if (tcCtx.isUseCaching()) { + cached = getExprNodeDescCached(expr, input); + } if (cached == null) { Map<ASTNode, ExprNodeDesc> allExprs = genAllExprNodeDesc(expr, input, tcCtx); return allExprs.get(expr); http://git-wip-us.apache.org/repos/asf/hive/blob/57242e34/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java index b19e2bf..8ad28be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java @@ -35,6 +35,8 @@ public class TypeCheckCtx implements NodeProcessorCtx { */ private RowResolver inputRR; + private final boolean useCaching; + /** * Receives translations which will need to be applied during unparse. */ @@ -77,15 +79,20 @@ public class TypeCheckCtx implements NodeProcessorCtx { * The input row resolver of the previous operator. */ public TypeCheckCtx(RowResolver inputRR) { - this(inputRR, false, true, true, true, true, true, true, true); + this(inputRR, true); + } + + public TypeCheckCtx(RowResolver inputRR, boolean useCaching) { + this(inputRR, useCaching, false, true, true, true, true, true, true, true); } - public TypeCheckCtx(RowResolver inputRR, boolean allowStatefulFunctions, + public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean allowStatefulFunctions, boolean allowDistinctFunctions, boolean allowGBExprElimination, boolean allowAllColRef, boolean allowFunctionStar, boolean allowWindowing, boolean allowIndexExpr, boolean allowSubQueryExpr) { setInputRR(inputRR); error = null; + this.useCaching = useCaching; this.allowStatefulFunctions = allowStatefulFunctions; this.allowDistinctFunctions = allowDistinctFunctions; this.allowGBExprElimination = allowGBExprElimination; @@ -198,4 +205,8 @@ public class TypeCheckCtx implements NodeProcessorCtx { public boolean getallowSubQueryExpr() { return allowSubQueryExpr; } + + public boolean isUseCaching() { + return useCaching; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/57242e34/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 0e97530..d823f03 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -124,6 +124,10 @@ public class TypeCheckProcFactory { ASTNode expr = (ASTNode) nd; TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + if (!ctx.isUseCaching()) { + return null; + } + RowResolver input = ctx.getInputRR(); ExprNodeDesc desc = null;
