asolimando commented on code in PR #2966:
URL: https://github.com/apache/hive/pull/2966#discussion_r857573180
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java:
##########
@@ -143,28 +138,82 @@ private ImmutableList<RexNode>
getValidPreds(RelOptCluster cluster, RelNode chil
}
}
- // We need to filter i) those that have been pushed already as stored in
the join,
- // and ii) those that were already in the subtree rooted at child
- ImmutableList<RexNode> toPush =
HiveCalciteUtil.getPredsNotPushedAlready(predicatesToExclude,
- child, valids);
- return toPush;
+ // We need to filter:
+ // i) those that have been pushed already as stored in the join,
+ // ii) those that were already in the subtree rooted at child.
+ List<RexNode> toPush =
HiveCalciteUtil.getPredsNotPushedAlready(predicatesToExclude, child, valids);
+
+ // If we run the rule in conservative mode, we also filter:
+ // iii) predicates that are not safe for transitive inference.
+ //
+ // There is no formal definition of safety for predicate inference, only
an empirical one.
+ // An unsafe predicate in this context is one that when pushed across join
operands, can lead
+ // to redundant predicates that cannot be simplified (by means of
predicates merging with other existing ones).
+ // This situation can lead to an OOM for cases where lack of
simplification allows inferring new predicates
+ // (from LHS to RHS and vice-versa) recursively, predicates which are
redundant, but that RexSimplify cannot handle.
+ // This notion can be relaxed as soon as RexSimplify gets more powerful,
and it can handle such cases.
+ if (HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_JOIN_PUSH_TRANSITIVE_PREDICATES_CONSERVATIVE)) {
+ toPush = toPush.stream()
+ .filter(unsafeOperatorsFinder::isSafe)
+ .collect(Collectors.toList());
+ }
+
+ return ImmutableList.copyOf(toPush);
}
- private RexNode getTypeSafePred(RelOptCluster cluster, RexNode rex,
RelDataType rType) {
- RexNode typeSafeRex = rex;
- if ((typeSafeRex instanceof RexCall) &&
HiveCalciteUtil.isComparisonOp((RexCall) typeSafeRex)) {
- RexBuilder rb = cluster.getRexBuilder();
- List<RexNode> fixedPredElems = new ArrayList<RexNode>();
- RelDataType commonType = cluster.getTypeFactory().leastRestrictive(
- RexUtil.types(((RexCall) rex).getOperands()));
- for (RexNode rn : ((RexCall) rex).getOperands()) {
- fixedPredElems.add(rb.ensureType(commonType, rn, true));
- }
+ //~ Inner Classes ----------------------------------------------------------
+
+ /**
+ * Finds unsafe operators in an expression (at any level of nesting).
+ * At the moment, the only unsafe operator is OR.
+ *
+ * Example 1: OR(=($0, $1), IS NOT NULL($2))):INTEGER (OR in the top-level
expression)
+ * Example 2: NOT(AND(=($0, $1), IS NOT NULL($2))
+ * this is equivalent to OR((<>($0, $1), IS NULL($2))
+ * Example 3: AND(OR(=($0, $1), IS NOT NULL($2)))) (OR in inner expression)
+ */
+ private static class UnsafeOperatorsFinder extends RexVisitorImpl<Void> {
Review Comment:
Agreed, it's better to start with what we have right now, we can always make
it more generic if needed later on.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]