kasakrisz commented on code in PR #3761:
URL: https://github.com/apache/hive/pull/3761#discussion_r1026238377
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java:
##########
@@ -104,44 +106,43 @@ private HiveSubQueryRemoveRule(RelOptRuleOperand operand,
String description, Hi
// if subquery is in FILTER
if (relNode instanceof HiveFilter) {
final HiveFilter filter = call.rel(0);
- final RexSubQuery e = RexUtil.SubQueryFinder.find(filter.getCondition());
- assert e != null;
-
- final RelOptUtil.Logic logic =
- LogicVisitor.find(RelOptUtil.Logic.TRUE,
ImmutableList.of(filter.getCondition()), e);
+ // Since there is a RexSubQuery, there should be a HiveCorrelationInfo
+ // in the RelNode
+ Preconditions.checkState(filter.getCorrelationInfos().size() > 0);
Review Comment:
nit. Can `!filter.getCorrelationInfos().isEmpty()! used here?
##########
ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query1.q.out:
##########
@@ -19,8 +19,8 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100])
HiveProject(s_store_sk=[$0])
HiveFilter(condition=[=($24, _UTF-16LE'NM')])
HiveTableScan(table=[[default, store]], table:alias=[store])
- HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2,
1))], ctr_store_sk=[$0])
- HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))])
+ HiveProject(ctr_store_sk=[$0], CAST=[CAST(*(CAST(/($1, $2)):DECIMAL(21,
6), 1.2:DECIMAL(2, 1))):DECIMAL(24, 7)])
+ HiveFilter(condition=[IS NOT NULL(CAST(*(CAST(/($1, $2)):DECIMAL(21,
6), 1.2:DECIMAL(2, 1))):DECIMAL(24, 7))])
Review Comment:
The predicate expression in this filter is more complex than the original.
An extra multiply by 1.2 is added. It should not change the overall query
result because only the nullability matters but could you please verify why was
the expression changed?
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java:
##########
@@ -104,44 +106,43 @@ private HiveSubQueryRemoveRule(RelOptRuleOperand operand,
String description, Hi
// if subquery is in FILTER
if (relNode instanceof HiveFilter) {
final HiveFilter filter = call.rel(0);
- final RexSubQuery e = RexUtil.SubQueryFinder.find(filter.getCondition());
- assert e != null;
-
- final RelOptUtil.Logic logic =
- LogicVisitor.find(RelOptUtil.Logic.TRUE,
ImmutableList.of(filter.getCondition()), e);
+ // Since there is a RexSubQuery, there should be a HiveCorrelationInfo
+ // in the RelNode
+ Preconditions.checkState(filter.getCorrelationInfos().size() > 0);
+ HiveCorrelationInfo correlationInfo =
filter.getCorrelationInfos().get(0);
+ final RelOptUtil.Logic logic = LogicVisitor.find(RelOptUtil.Logic.TRUE,
+ ImmutableList.of(filter.getCondition()),
correlationInfo.rexSubQuery);
builder.push(filter.getInput());
final int fieldCount = builder.peek().getRowType().getFieldCount();
- SubqueryConf subqueryConfig =
filter.getCluster().getPlanner().getContext().unwrap(SubqueryConf.class);
- boolean isCorrScalarQuery =
subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel);
+ boolean isCorrScalarQuery = correlationInfo.isCorrScalarQuery();
final RexNode target =
- apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e),
logic, builder, 1,
- fieldCount, isCorrScalarQuery);
- final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
+ apply(call.getMetadataQuery(), correlationInfo.rexSubQuery,
+ correlationInfo.correlationIds, logic, builder, 1, fieldCount,
isCorrScalarQuery);
+ final RexShuttle shuttle = new
ReplaceSubQueryShuttle(correlationInfo.rexSubQuery, target);
builder.filter(shuttle.apply(filter.getCondition()));
builder.project(fields(builder, filter.getRowType().getFieldCount()));
RelNode newRel = builder.build();
call.transformTo(newRel);
} else if (relNode instanceof HiveProject) {
- // if subquery is in PROJECT
final HiveProject project = call.rel(0);
- final RexSubQuery e = RexUtil.SubQueryFinder.find(project.getProjects());
- assert e != null;
+ // Since there is a RexSubQuery, there should be a HiveCorrelationInfo
+ // in the RelNode
+ Preconditions.checkState(project.getCorrelationInfos().size() > 0);
+ HiveCorrelationInfo correlationInfo =
project.getCorrelationInfos().get(0);
Review Comment:
Is there any use case when all CorrelationInfo instances of a project is
required?
If not can we return just the first one and stop traversing the plan.
##########
ql/src/test/queries/clientpositive/subquery_with_corr_and_mv.q:
##########
@@ -0,0 +1,32 @@
+--! qt:dataset:src
Review Comment:
Please remove this line `src` table is not used in this test.
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/correlation/CorrelationInfoVisitor.java:
##########
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.correlation;
+
+
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.CorrelationId;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexCorrelVariable;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexOver;
+import org.apache.calcite.rex.RexSubQuery;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttleImpl;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.LinkedHashSet;
+
+/**
+ * CorrelationIdVisitor digs out information from a given RexNode about all
the RexSubQuery
+ * RexNodes in the top layer. It does not go into RexSubQuery nodes within a
RexSubQuery node.
+ */
+public class CorrelationInfoVisitor extends RexShuttle {
+ // List of CorrelationInfo for all the found RexSubQuery, one
HiveCorrelationInfo for each
+ // RexSubQuery
+ private List<HiveCorrelationInfo> correlationInfos = new ArrayList<>();
+
+ // True if there is a "NOT" outside of the subquery
+ private boolean notFlag;
+
+ // Prevent direct instantiation. Information should be retrieved through
the static
+ // getCorrelationInfos method at the bottom of the file.
+ private CorrelationInfoVisitor() {
+ }
+
+ // Private method called by static methoc to retrieve information after
RexNode was visited
+ private List<HiveCorrelationInfo> getCorrelationInfos() {
+ return correlationInfos;
+ }
+
+ @Override
+ public RexNode visitCall(RexCall call) {
+ // Record if we see not flag before seeing the subquery.
+ // Can handle NOT(NOT(...)) if it exists
+ if (call.getOperator().getKind() == SqlKind.NOT) {
+ notFlag = !notFlag;
+ }
+ RexNode returnNode = super.visitCall(call);
+ // reset flag after visit.
+ if (call.getOperator().getKind() == SqlKind.NOT) {
+ notFlag = !notFlag;
+ }
+ return returnNode;
+ }
Review Comment:
Could you please add tests when a not expression has multiple subqueries as
operands like
```
explain cbo
select id, int_col, year, month from t_test1 s where not (
s.int_col in (select count(*) from t_test2 t2 where s.id = t2.id) and
s.int_col in (select count(*) from t_test3 t3 where s.id = t.3id)
);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]