kgyrtkirk commented on a change in pull request #1492:
URL: https://github.com/apache/hive/pull/1492#discussion_r489412449
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
##########
@@ -678,100 +679,135 @@ private RexNode useStructIfNeeded(List<? extends
RexNode> columns) {
}
@Override public RexNode visitCall(RexCall call) {
- final RexNode node;
- final List<RexNode> operands;
- final List<RexNode> newOperands;
- final Multimap<RexNode,RexNode> inLHSExprToRHSExprs =
LinkedHashMultimap.create();
switch (call.getKind()) {
case AND:
- // IN clauses need to be combined by keeping only common elements
- operands = new ArrayList<>(RexUtil.flattenAnd(call.getOperands()));
- for (int i = 0; i < operands.size(); i++) {
- RexNode operand = operands.get(i);
- if (operand.getKind() == SqlKind.IN) {
- RexCall inCall = (RexCall) operand;
- if
(!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
- continue;
- }
- RexNode ref = inCall.getOperands().get(0);
- if (inLHSExprToRHSExprs.containsKey(ref)) {
- Set<RexNode> expressions = Sets.newHashSet();
- for (int j = 1; j < inCall.getOperands().size(); j++) {
- expressions.add(inCall.getOperands().get(j));
- }
- inLHSExprToRHSExprs.get(ref).retainAll(expressions);
- if (!inLHSExprToRHSExprs.containsKey(ref)) {
- // Note that Multimap does not keep a key if all its values
are removed.
- // Hence, since there are no common expressions and it is
within an AND,
- // we should return false
- return rexBuilder.makeLiteral(false);
- }
- } else {
- for (int j = 1; j < inCall.getOperands().size(); j++) {
- inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j));
- }
- }
- operands.remove(i);
- --i;
- } else if (operand.getKind() == SqlKind.EQUALS) {
- Constraint c = Constraint.of(operand);
- if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) {
- continue;
+ return handleAND(rexBuilder, call);
+ case OR:
+ return handleOR(rexBuilder, call);
+ default:
+ return super.visitCall(call);
+ }
+ }
+
+ private static RexNode handleAND(RexBuilder rexBuilder, RexCall call) {
+ // IN clauses need to be combined by keeping only common elements
+ final Multimap<RexNode,RexNode> inLHSExprToRHSExprs =
LinkedHashMultimap.create();
+ // We will use this set to keep those expressions that may evaluate
+ // into a null value.
+ final Multimap<RexNode,RexNode> inLHSExprToRHSNullableExprs =
LinkedHashMultimap.create();
+ final List<RexNode> operands = new
ArrayList<>(RexUtil.flattenAnd(call.getOperands()));
+ for (int i = 0; i < operands.size(); i++) {
+ RexNode operand = operands.get(i);
+ if (operand.getKind() == SqlKind.IN) {
+ RexCall inCall = (RexCall) operand;
+ if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+ continue;
+ }
+ RexNode ref = inCall.getOperands().get(0);
+ if (ref.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(ref, ref);
+ }
+ if (inLHSExprToRHSExprs.containsKey(ref)) {
+ Set<RexNode> expressions = Sets.newHashSet();
+ for (int j = 1; j < inCall.getOperands().size(); j++) {
+ RexNode constNode = inCall.getOperands().get(j);
+ expressions.add(constNode);
+ if (constNode.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(ref, constNode);
}
- RexNode ref = c.exprNode;
- if (inLHSExprToRHSExprs.containsKey(ref)) {
-
inLHSExprToRHSExprs.get(ref).retainAll(Collections.singleton(c.constNode));
- if (!inLHSExprToRHSExprs.containsKey(ref)) {
- // Note that Multimap does not keep a key if all its values
are removed.
- // Hence, since there are no common expressions and it is
within an AND,
- // we should return false
- return rexBuilder.makeLiteral(false);
- }
- } else {
- inLHSExprToRHSExprs.put(ref, c.constNode);
+ }
+ inLHSExprToRHSExprs.get(ref).retainAll(expressions);
+ if (!inLHSExprToRHSExprs.containsKey(ref)) {
+ // Note that Multimap does not keep a key if all its values are
removed.
+ return createResultFromEmptySet(rexBuilder, ref,
inLHSExprToRHSNullableExprs);
+ }
+ } else {
+ for (int j = 1; j < inCall.getOperands().size(); j++) {
+ RexNode constNode = inCall.getOperands().get(j);
+ inLHSExprToRHSExprs.put(ref, constNode);
+ if (constNode.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(ref, constNode);
}
- operands.remove(i);
- --i;
}
}
- // Create IN clauses
- newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs);
- newOperands.addAll(operands);
- // Return node
- node = RexUtil.composeConjunction(rexBuilder, newOperands, false);
- break;
- case OR:
- // IN clauses need to be combined by keeping all elements
- operands = new ArrayList<>(RexUtil.flattenOr(call.getOperands()));
- for (int i = 0; i < operands.size(); i++) {
- RexNode operand = operands.get(i);
- if (operand.getKind() == SqlKind.IN) {
- RexCall inCall = (RexCall) operand;
- if
(!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
- continue;
- }
- RexNode ref = inCall.getOperands().get(0);
- for (int j = 1; j < inCall.getOperands().size(); j++) {
- inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j));
- }
- operands.remove(i);
- --i;
+ operands.remove(i);
+ --i;
+ } else if (operand.getKind() == SqlKind.EQUALS) {
+ Constraint c = Constraint.of(operand);
+ if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) {
+ continue;
+ }
+ if (c.exprNode.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(c.exprNode, c.exprNode);
+ }
+ if (c.constNode.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(c.exprNode, c.constNode);
+ }
+ if (inLHSExprToRHSExprs.containsKey(c.exprNode)) {
+
inLHSExprToRHSExprs.get(c.exprNode).retainAll(Collections.singleton(c.constNode));
+ if (!inLHSExprToRHSExprs.containsKey(c.exprNode)) {
+ // Note that Multimap does not keep a key if all its values are
removed.
+ return createResultFromEmptySet(rexBuilder, c.exprNode,
inLHSExprToRHSNullableExprs);
}
+ } else {
+ inLHSExprToRHSExprs.put(c.exprNode, c.constNode);
}
- // Create IN clauses
- newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs);
- newOperands.addAll(operands);
- // Return node
- node = RexUtil.composeDisjunction(rexBuilder, newOperands, false);
- break;
- default:
- return super.visitCall(call);
+ operands.remove(i);
+ --i;
+ }
}
- return node;
+ // Create IN clauses
+ final List<RexNode> newOperands = createInClauses(rexBuilder,
inLHSExprToRHSExprs);
+ newOperands.addAll(operands);
+ // Return node
+ return RexUtil.composeConjunction(rexBuilder, newOperands, false);
+ }
+
+ private static RexNode handleOR(RexBuilder rexBuilder, RexCall call) {
+ // IN clauses need to be combined by keeping all elements
+ final List<RexNode> operands = new
ArrayList<>(RexUtil.flattenOr(call.getOperands()));
+ final Multimap<RexNode,RexNode> inLHSExprToRHSExprs =
LinkedHashMultimap.create();
+ for (int i = 0; i < operands.size(); i++) {
+ RexNode operand = operands.get(i);
+ if (operand.getKind() == SqlKind.IN) {
+ RexCall inCall = (RexCall) operand;
+ if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+ continue;
+ }
+ RexNode ref = inCall.getOperands().get(0);
+ for (int j = 1; j < inCall.getOperands().size(); j++) {
+ inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j));
+ }
+ operands.remove(i);
+ --i;
+ }
+ }
+ // Create IN clauses
+ final List<RexNode> newOperands = createInClauses(rexBuilder,
inLHSExprToRHSExprs);
+ newOperands.addAll(operands);
+ // Return node
+ return RexUtil.composeDisjunction(rexBuilder, newOperands, false);
+ }
+
+ private static RexNode createResultFromEmptySet(RexBuilder rexBuilder,
+ RexNode ref, Multimap<RexNode, RexNode> inLHSExprToRHSNullableExprs) {
+ if (inLHSExprToRHSNullableExprs.containsKey(ref)) {
+ // We handle possible null values in the expressions.
+ List<RexNode> nullableExprs =
+ inLHSExprToRHSNullableExprs.get(ref)
+ .stream()
+ .map(n -> rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL,
ImmutableList.of(n)))
+ .collect(Collectors.toList());
+ return rexBuilder.makeCall(SqlStdOperatorTable.CASE,
Review comment:
I believe this would not need to be a `CASE` it could be an `AND` as well
```
(x IS NULL OR y IS NULL) AND null
```
but I know - it won't make much difference - the `CASE` might be more
readbale :)
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
##########
@@ -678,100 +679,135 @@ private RexNode useStructIfNeeded(List<? extends
RexNode> columns) {
}
@Override public RexNode visitCall(RexCall call) {
- final RexNode node;
- final List<RexNode> operands;
- final List<RexNode> newOperands;
- final Multimap<RexNode,RexNode> inLHSExprToRHSExprs =
LinkedHashMultimap.create();
switch (call.getKind()) {
case AND:
- // IN clauses need to be combined by keeping only common elements
- operands = new ArrayList<>(RexUtil.flattenAnd(call.getOperands()));
- for (int i = 0; i < operands.size(); i++) {
- RexNode operand = operands.get(i);
- if (operand.getKind() == SqlKind.IN) {
- RexCall inCall = (RexCall) operand;
- if
(!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
- continue;
- }
- RexNode ref = inCall.getOperands().get(0);
- if (inLHSExprToRHSExprs.containsKey(ref)) {
- Set<RexNode> expressions = Sets.newHashSet();
- for (int j = 1; j < inCall.getOperands().size(); j++) {
- expressions.add(inCall.getOperands().get(j));
- }
- inLHSExprToRHSExprs.get(ref).retainAll(expressions);
- if (!inLHSExprToRHSExprs.containsKey(ref)) {
- // Note that Multimap does not keep a key if all its values
are removed.
- // Hence, since there are no common expressions and it is
within an AND,
- // we should return false
- return rexBuilder.makeLiteral(false);
- }
- } else {
- for (int j = 1; j < inCall.getOperands().size(); j++) {
- inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j));
- }
- }
- operands.remove(i);
- --i;
- } else if (operand.getKind() == SqlKind.EQUALS) {
- Constraint c = Constraint.of(operand);
- if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) {
- continue;
+ return handleAND(rexBuilder, call);
+ case OR:
+ return handleOR(rexBuilder, call);
+ default:
+ return super.visitCall(call);
+ }
+ }
+
+ private static RexNode handleAND(RexBuilder rexBuilder, RexCall call) {
+ // IN clauses need to be combined by keeping only common elements
+ final Multimap<RexNode,RexNode> inLHSExprToRHSExprs =
LinkedHashMultimap.create();
+ // We will use this set to keep those expressions that may evaluate
+ // into a null value.
+ final Multimap<RexNode,RexNode> inLHSExprToRHSNullableExprs =
LinkedHashMultimap.create();
+ final List<RexNode> operands = new
ArrayList<>(RexUtil.flattenAnd(call.getOperands()));
+ for (int i = 0; i < operands.size(); i++) {
+ RexNode operand = operands.get(i);
+ if (operand.getKind() == SqlKind.IN) {
+ RexCall inCall = (RexCall) operand;
+ if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+ continue;
+ }
+ RexNode ref = inCall.getOperands().get(0);
+ if (ref.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(ref, ref);
+ }
+ if (inLHSExprToRHSExprs.containsKey(ref)) {
+ Set<RexNode> expressions = Sets.newHashSet();
+ for (int j = 1; j < inCall.getOperands().size(); j++) {
+ RexNode constNode = inCall.getOperands().get(j);
+ expressions.add(constNode);
+ if (constNode.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(ref, constNode);
}
- RexNode ref = c.exprNode;
- if (inLHSExprToRHSExprs.containsKey(ref)) {
-
inLHSExprToRHSExprs.get(ref).retainAll(Collections.singleton(c.constNode));
- if (!inLHSExprToRHSExprs.containsKey(ref)) {
- // Note that Multimap does not keep a key if all its values
are removed.
- // Hence, since there are no common expressions and it is
within an AND,
- // we should return false
- return rexBuilder.makeLiteral(false);
- }
- } else {
- inLHSExprToRHSExprs.put(ref, c.constNode);
+ }
+ inLHSExprToRHSExprs.get(ref).retainAll(expressions);
+ if (!inLHSExprToRHSExprs.containsKey(ref)) {
+ // Note that Multimap does not keep a key if all its values are
removed.
+ return createResultFromEmptySet(rexBuilder, ref,
inLHSExprToRHSNullableExprs);
Review comment:
at this point - aren't we in the middle of processing all the operands
of the AND?
```
x IN (1,2) AND x IN (3,4) AND y IN (1,2)
```
I suspect for the above expression we don't know anything about `y` (yet) -
isn't that a problem?
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
##########
@@ -678,100 +679,135 @@ private RexNode useStructIfNeeded(List<? extends
RexNode> columns) {
}
@Override public RexNode visitCall(RexCall call) {
- final RexNode node;
- final List<RexNode> operands;
- final List<RexNode> newOperands;
- final Multimap<RexNode,RexNode> inLHSExprToRHSExprs =
LinkedHashMultimap.create();
switch (call.getKind()) {
case AND:
- // IN clauses need to be combined by keeping only common elements
- operands = new ArrayList<>(RexUtil.flattenAnd(call.getOperands()));
- for (int i = 0; i < operands.size(); i++) {
- RexNode operand = operands.get(i);
- if (operand.getKind() == SqlKind.IN) {
- RexCall inCall = (RexCall) operand;
- if
(!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
- continue;
- }
- RexNode ref = inCall.getOperands().get(0);
- if (inLHSExprToRHSExprs.containsKey(ref)) {
- Set<RexNode> expressions = Sets.newHashSet();
- for (int j = 1; j < inCall.getOperands().size(); j++) {
- expressions.add(inCall.getOperands().get(j));
- }
- inLHSExprToRHSExprs.get(ref).retainAll(expressions);
- if (!inLHSExprToRHSExprs.containsKey(ref)) {
- // Note that Multimap does not keep a key if all its values
are removed.
- // Hence, since there are no common expressions and it is
within an AND,
- // we should return false
- return rexBuilder.makeLiteral(false);
- }
- } else {
- for (int j = 1; j < inCall.getOperands().size(); j++) {
- inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j));
- }
- }
- operands.remove(i);
- --i;
- } else if (operand.getKind() == SqlKind.EQUALS) {
- Constraint c = Constraint.of(operand);
- if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) {
- continue;
+ return handleAND(rexBuilder, call);
+ case OR:
+ return handleOR(rexBuilder, call);
+ default:
+ return super.visitCall(call);
+ }
+ }
+
+ private static RexNode handleAND(RexBuilder rexBuilder, RexCall call) {
+ // IN clauses need to be combined by keeping only common elements
+ final Multimap<RexNode,RexNode> inLHSExprToRHSExprs =
LinkedHashMultimap.create();
+ // We will use this set to keep those expressions that may evaluate
+ // into a null value.
+ final Multimap<RexNode,RexNode> inLHSExprToRHSNullableExprs =
LinkedHashMultimap.create();
+ final List<RexNode> operands = new
ArrayList<>(RexUtil.flattenAnd(call.getOperands()));
+ for (int i = 0; i < operands.size(); i++) {
+ RexNode operand = operands.get(i);
+ if (operand.getKind() == SqlKind.IN) {
+ RexCall inCall = (RexCall) operand;
+ if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+ continue;
+ }
+ RexNode ref = inCall.getOperands().get(0);
+ if (ref.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(ref, ref);
+ }
+ if (inLHSExprToRHSExprs.containsKey(ref)) {
+ Set<RexNode> expressions = Sets.newHashSet();
+ for (int j = 1; j < inCall.getOperands().size(); j++) {
+ RexNode constNode = inCall.getOperands().get(j);
+ expressions.add(constNode);
+ if (constNode.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(ref, constNode);
}
- RexNode ref = c.exprNode;
- if (inLHSExprToRHSExprs.containsKey(ref)) {
-
inLHSExprToRHSExprs.get(ref).retainAll(Collections.singleton(c.constNode));
- if (!inLHSExprToRHSExprs.containsKey(ref)) {
- // Note that Multimap does not keep a key if all its values
are removed.
- // Hence, since there are no common expressions and it is
within an AND,
- // we should return false
- return rexBuilder.makeLiteral(false);
- }
- } else {
- inLHSExprToRHSExprs.put(ref, c.constNode);
+ }
+ inLHSExprToRHSExprs.get(ref).retainAll(expressions);
+ if (!inLHSExprToRHSExprs.containsKey(ref)) {
+ // Note that Multimap does not keep a key if all its values are
removed.
+ return createResultFromEmptySet(rexBuilder, ref,
inLHSExprToRHSNullableExprs);
+ }
+ } else {
+ for (int j = 1; j < inCall.getOperands().size(); j++) {
+ RexNode constNode = inCall.getOperands().get(j);
+ inLHSExprToRHSExprs.put(ref, constNode);
+ if (constNode.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(ref, constNode);
}
- operands.remove(i);
- --i;
}
}
- // Create IN clauses
- newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs);
- newOperands.addAll(operands);
- // Return node
- node = RexUtil.composeConjunction(rexBuilder, newOperands, false);
- break;
- case OR:
- // IN clauses need to be combined by keeping all elements
- operands = new ArrayList<>(RexUtil.flattenOr(call.getOperands()));
- for (int i = 0; i < operands.size(); i++) {
- RexNode operand = operands.get(i);
- if (operand.getKind() == SqlKind.IN) {
- RexCall inCall = (RexCall) operand;
- if
(!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
- continue;
- }
- RexNode ref = inCall.getOperands().get(0);
- for (int j = 1; j < inCall.getOperands().size(); j++) {
- inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j));
- }
- operands.remove(i);
- --i;
+ operands.remove(i);
+ --i;
+ } else if (operand.getKind() == SqlKind.EQUALS) {
+ Constraint c = Constraint.of(operand);
+ if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) {
+ continue;
+ }
+ if (c.exprNode.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(c.exprNode, c.exprNode);
+ }
+ if (c.constNode.getType().isNullable()) {
+ inLHSExprToRHSNullableExprs.put(c.exprNode, c.constNode);
+ }
+ if (inLHSExprToRHSExprs.containsKey(c.exprNode)) {
+
inLHSExprToRHSExprs.get(c.exprNode).retainAll(Collections.singleton(c.constNode));
+ if (!inLHSExprToRHSExprs.containsKey(c.exprNode)) {
+ // Note that Multimap does not keep a key if all its values are
removed.
+ return createResultFromEmptySet(rexBuilder, c.exprNode,
inLHSExprToRHSNullableExprs);
}
+ } else {
+ inLHSExprToRHSExprs.put(c.exprNode, c.constNode);
}
- // Create IN clauses
- newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs);
- newOperands.addAll(operands);
- // Return node
- node = RexUtil.composeDisjunction(rexBuilder, newOperands, false);
- break;
- default:
- return super.visitCall(call);
+ operands.remove(i);
+ --i;
+ }
}
- return node;
+ // Create IN clauses
+ final List<RexNode> newOperands = createInClauses(rexBuilder,
inLHSExprToRHSExprs);
+ newOperands.addAll(operands);
+ // Return node
+ return RexUtil.composeConjunction(rexBuilder, newOperands, false);
+ }
+
+ private static RexNode handleOR(RexBuilder rexBuilder, RexCall call) {
+ // IN clauses need to be combined by keeping all elements
+ final List<RexNode> operands = new
ArrayList<>(RexUtil.flattenOr(call.getOperands()));
+ final Multimap<RexNode,RexNode> inLHSExprToRHSExprs =
LinkedHashMultimap.create();
+ for (int i = 0; i < operands.size(); i++) {
+ RexNode operand = operands.get(i);
+ if (operand.getKind() == SqlKind.IN) {
+ RexCall inCall = (RexCall) operand;
+ if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+ continue;
+ }
+ RexNode ref = inCall.getOperands().get(0);
+ for (int j = 1; j < inCall.getOperands().size(); j++) {
+ inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j));
+ }
+ operands.remove(i);
+ --i;
+ }
+ }
+ // Create IN clauses
+ final List<RexNode> newOperands = createInClauses(rexBuilder,
inLHSExprToRHSExprs);
+ newOperands.addAll(operands);
+ // Return node
+ return RexUtil.composeDisjunction(rexBuilder, newOperands, false);
+ }
+
+ private static RexNode createResultFromEmptySet(RexBuilder rexBuilder,
+ RexNode ref, Multimap<RexNode, RexNode> inLHSExprToRHSNullableExprs) {
+ if (inLHSExprToRHSNullableExprs.containsKey(ref)) {
Review comment:
right now I don't fully agree with this logic - if we have an expression
like:
```
x IN (1,2) AND x in (3,4) and y IN (5,6)
```
it's clear that we may only return `false` or `null`; but I think we may
still need to choose `false` based on the other expressions; for the above:
* when `x is null, y=5` then the result will be `null`
* but when `x is null, y=1` then the result will be `false`
in light of this reasoning it seem to me that this starts looking more and
more sophisticated....
I think it might worth a try to see if these "early-return-type"
simplifications kick-in a lot or not - and as an alternative approach consider
removing them instead of saving them.
I think at some point we should run the simplification earlier when the
`OR`s are still open - so that it could analyze-it properly
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]