kgyrtkirk commented on a change in pull request #1553:
URL: https://github.com/apache/hive/pull/1553#discussion_r503821169
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
##########
@@ -386,125 +456,81 @@ public boolean sharedWorkOptimization(ParseContext pctx,
SharedWorkOptimizerCach
LOG.debug("Merging subtree starting at {} into subtree starting
at {}",
discardableTsOp, retainableTsOp);
} else {
- ExprNodeDesc newRetainableTsFilterExpr = null;
- List<ExprNodeDesc> semijoinExprNodes = new ArrayList<>();
- if (retainableTsOp.getConf().getFilterExpr() != null) {
- // Gather SJ expressions and normal expressions
- List<ExprNodeDesc> allExprNodesExceptSemijoin = new
ArrayList<>();
- splitExpressions(retainableTsOp.getConf().getFilterExpr(),
- allExprNodesExceptSemijoin, semijoinExprNodes);
- // Create new expressions
- if (allExprNodesExceptSemijoin.size() > 1) {
- newRetainableTsFilterExpr =
ExprNodeGenericFuncDesc.newInstance(
- new GenericUDFOPAnd(), allExprNodesExceptSemijoin);
- } else if (allExprNodesExceptSemijoin.size() > 0 &&
- allExprNodesExceptSemijoin.get(0) instanceof
ExprNodeGenericFuncDesc) {
- newRetainableTsFilterExpr =
allExprNodesExceptSemijoin.get(0);
- }
- // Push filter on top of children for retainable
- pushFilterToTopOfTableScan(optimizerCache, retainableTsOp);
+
+ if (sr.discardableOps.size() > 1) {
+ throw new RuntimeException("we can't discard more in this
path");
}
- ExprNodeDesc newDiscardableTsFilterExpr = null;
- if (discardableTsOp.getConf().getFilterExpr() != null) {
- // If there is a single discardable operator, it is a
TableScanOperator
- // and it means that we will merge filter expressions for it.
Thus, we
- // might need to remove DPP predicates before doing that
- List<ExprNodeDesc> allExprNodesExceptSemijoin = new
ArrayList<>();
- splitExpressions(discardableTsOp.getConf().getFilterExpr(),
- allExprNodesExceptSemijoin, new ArrayList<>());
- // Create new expressions
- if (allExprNodesExceptSemijoin.size() > 1) {
- newDiscardableTsFilterExpr =
ExprNodeGenericFuncDesc.newInstance(
- new GenericUDFOPAnd(), allExprNodesExceptSemijoin);
- } else if (allExprNodesExceptSemijoin.size() > 0 &&
- allExprNodesExceptSemijoin.get(0) instanceof
ExprNodeGenericFuncDesc) {
- newDiscardableTsFilterExpr =
allExprNodesExceptSemijoin.get(0);
- }
- // Remove and add semijoin filter from expressions
- replaceSemijoinExpressions(discardableTsOp, semijoinExprNodes);
- // Push filter on top of children for discardable
- pushFilterToTopOfTableScan(optimizerCache, discardableTsOp);
+
+ SharedWorkModel modelR = new SharedWorkModel(retainableTsOp);
+ SharedWorkModel modelD = new SharedWorkModel(discardableTsOp);
+
+ // Push filter on top of children for retainable
+ pushFilterToTopOfTableScan(optimizerCache, retainableTsOp);
+
+ if (mode == Mode.RemoveSemijoin || mode == Mode.SubtreeMerge) {
+ // FIXME: I think idea here is to clear the discardable's
semijoin filter
+ // - by using the retainable's (which should be empty in case
of this mode)
+ replaceSemijoinExpressions(discardableTsOp,
modelR.getSemiJoinFilter());
}
+ // Push filter on top of children for discardable
+ pushFilterToTopOfTableScan(optimizerCache, discardableTsOp);
+
// Obtain filter for shared TS operator
- ExprNodeGenericFuncDesc exprNode = null;
- if (newRetainableTsFilterExpr != null &&
newDiscardableTsFilterExpr != null) {
- // Combine
- exprNode = (ExprNodeGenericFuncDesc) newRetainableTsFilterExpr;
- if (!exprNode.isSame(newDiscardableTsFilterExpr)) {
- // We merge filters from previous scan by ORing with filters
from current scan
- if (exprNode.getGenericUDF() instanceof GenericUDFOPOr) {
- List<ExprNodeDesc> newChildren = new
ArrayList<>(exprNode.getChildren().size() + 1);
- for (ExprNodeDesc childExprNode : exprNode.getChildren()) {
- if (childExprNode.isSame(newDiscardableTsFilterExpr)) {
- // We do not need to do anything, it is in the OR
expression
- break;
- }
- newChildren.add(childExprNode);
- }
- if (exprNode.getChildren().size() == newChildren.size()) {
- newChildren.add(newDiscardableTsFilterExpr);
- exprNode = ExprNodeGenericFuncDesc.newInstance(
- new GenericUDFOPOr(),
- newChildren);
- }
- } else {
- exprNode = ExprNodeGenericFuncDesc.newInstance(
- new GenericUDFOPOr(),
- Arrays.asList(exprNode, newDiscardableTsFilterExpr));
- }
- }
+ ExprNodeDesc exprNode = null;
+ if (modelR.normalFilterExpr != null && modelD.normalFilterExpr
!= null) {
+ exprNode = disjunction(modelR.normalFilterExpr,
modelD.normalFilterExpr);
}
- // Create expression node that will be used for the retainable
table scan
- if (!semijoinExprNodes.isEmpty()) {
- if (exprNode != null) {
- semijoinExprNodes.add(0, exprNode);
- }
- if (semijoinExprNodes.size() > 1) {
- exprNode = ExprNodeGenericFuncDesc.newInstance(
- new GenericUDFOPAnd(), semijoinExprNodes);
- } else {
- exprNode = (ExprNodeGenericFuncDesc)
semijoinExprNodes.get(0);
- }
+ List<ExprNodeDesc> semiJoinExpr = null;
+ if (mode == Mode.DPPUnion) {
+ assert modelR.semijoinExprNodes != null;
+ assert modelD.semijoinExprNodes != null;
+ ExprNodeDesc disjunction =
disjunction(conjunction(modelR.semijoinExprNodes),
conjunction(modelD.semijoinExprNodes));
+ semiJoinExpr = disjunction == null ? null :
Lists.newArrayList(disjunction);
+ } else {
+ semiJoinExpr = modelR.semijoinExprNodes;
}
+
+ // Create expression node that will be used for the retainable
table scan
+ exprNode = conjunction(semiJoinExpr, exprNode);
// Replace filter
- retainableTsOp.getConf().setFilterExpr(exprNode);
+ retainableTsOp.getConf().setFilterExpr((ExprNodeGenericFuncDesc)
exprNode);
// Replace table scan operator
List<Operator<? extends OperatorDesc>> allChildren =
Lists.newArrayList(discardableTsOp.getChildOperators());
for (Operator<? extends OperatorDesc> op : allChildren) {
- discardableTsOp.getChildOperators().remove(op);
op.replaceParent(discardableTsOp, retainableTsOp);
retainableTsOp.getChildOperators().add(op);
}
+ discardableTsOp.getChildOperators().clear();
LOG.debug("Merging {} into {}", discardableTsOp, retainableTsOp);
}
// First we remove the input operators of the expression that
Review comment:
this small block can be done with a function(`adoptChildren`) from the
next patch; I've not yet noticed that I can use it here as well - I've added
the method and called it
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]