[
https://issues.apache.org/jira/browse/HIVE-26524?focusedWorklogId=810902&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-810902
]
ASF GitHub Bot logged work on HIVE-26524:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 21/Sep/22 17:12
Start Date: 21/Sep/22 17:12
Worklog Time Spent: 10m
Work Description: asolimando commented on code in PR #3588:
URL: https://github.com/apache/hive/pull/3588#discussion_r976632375
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java:
##########
@@ -308,17 +390,26 @@ private void convertOrderLimitToASTNode(HiveSortLimit
hiveSortLimit) {
private void convertSortToASTNode(HiveSortExchange hiveSortExchange) {
List<RelFieldCollation> fieldCollations =
hiveSortExchange.getCollation().getFieldCollations();
- convertFieldCollationsToASTNode(hiveSortExchange, new
Schema(hiveSortExchange), fieldCollations,
+ convertFieldCollationsAndSetOrderAST(hiveSortExchange, new
Schema(hiveSortExchange), fieldCollations,
null, HiveParser.TOK_SORTBY, "TOK_SORTBY");
}
- private void convertFieldCollationsToASTNode(
+ private void convertFieldCollationsAndSetOrderAST(
RelNode node, Schema schema, List<RelFieldCollation>
fieldCollations, Map<Integer, RexNode> obRefToCallMap,
int astToken, String astText) {
+
if (fieldCollations.isEmpty()) {
return;
}
+ hiveAST.order = convertFieldCollationsToASTNode(
+ node.getCluster().getRexBuilder(), schema, fieldCollations,
obRefToCallMap, astToken, astText);
+ }
+
+ public static ASTNode convertFieldCollationsToASTNode(
Review Comment:
Does it have to be `public`?
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRewriteToDataSketchesRules.java:
##########
@@ -336,16 +336,13 @@ protected VBuilderPAP(Aggregate aggregate, Project
project, String sketchClass)
@Override
boolean isApplicable(AggregateCall aggCall) {
- if ((aggInput instanceof Project)
- && !aggCall.isDistinct() && aggCall.getArgList().size() == 4
+ if ((aggInput != null)
Review Comment:
We don't need the parentheses anymore:
```suggestion
if (aggInput != null
```
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java:
##########
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.rules.PruneEmptyRules;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class provides access to Calcite's {@link PruneEmptyRules}.
+ * The instances of the rules use {@link
org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}.
+ */
+public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
+
+ public static final RelOptRule PROJECT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyProject")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Project.class, project -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule FILTER_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyFilter")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Filter.class, singleRel -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule JOIN_LEFT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withOperandSupplier(b0 ->
+ b0.operand(Join.class).inputs(
+ b1 -> b1.operand(Values.class)
+
.predicate(Values::isEmpty).noInputs(),
+ b2 -> b2.operand(RelNode.class).anyInputs()))
+ .withDescription("HivePruneEmptyJoin(left)")
+ .as(JoinLeftEmptyRuleConfig.class)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ /**
+ * Improved version of Calcite's {@link
PruneEmptyRules.JoinLeftEmptyRuleConfig}.
+ * In case of right outer join if the left branch is empty the join operator
can be removed
+ * and take the right branch only.
+ *
+ * select * from (select * from emp where 1=0) right join dept
+ * to
+ * select null as emp.col0 ... null as emp.coln, dept.* from dept
+ */
+ public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config {
+ @Override default PruneEmptyRule toRule() {
+ return new PruneEmptyRule(this) {
+ @Override public void onMatch(RelOptRuleCall call) {
+ Join join = call.rel(0);
+ HiveValues empty = call.rel(1);
+ RelNode right = call.rel(2);
+ RexBuilder rexBuilder = call.builder().getRexBuilder();
+ if (join.getJoinType().generatesNullsOnLeft()) {
+ // "select * from emp right join dept" is not necessarily empty if
+ // emp is empty
+ List<RexNode> projects = new ArrayList<>(
+ empty.getRowType().getFieldCount() +
right.getRowType().getFieldCount());
+ List<String> columnNames = new ArrayList<>(
+ empty.getRowType().getFieldCount() +
right.getRowType().getFieldCount());
+ // left
+ addNullLiterals(rexBuilder, empty, projects, columnNames);
+ // right
+ copyProjects(rexBuilder, right.getRowType(),
+ join.getRowType(), empty.getRowType().getFieldCount(),
projects, columnNames);
+
+ RelNode project = call.builder().push(right).project(projects,
columnNames).build();
+ call.transformTo(project);
+ return;
+ }
+ call.transformTo(call.builder().push(join).empty().build());
+ }
+ };
+ }
+ }
+
+ private static void addNullLiterals(
+ RexBuilder rexBuilder, HiveValues empty, List<RexNode>
projectFields, List<String> newColumnNames) {
+ for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) {
+ RelDataTypeField relDataTypeField =
empty.getRowType().getFieldList().get(i);
+ RexNode nullLiteral =
rexBuilder.makeNullLiteral(relDataTypeField.getType());
+ projectFields.add(nullLiteral);
+ newColumnNames.add(empty.getRowType().getFieldList().get(i).getName());
+ }
+ }
+
+ public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType,
Review Comment:
I guess this methods can be `private`. If not, please add some javadoc to it.
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java:
##########
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.rules.PruneEmptyRules;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class provides access to Calcite's {@link PruneEmptyRules}.
+ * The instances of the rules use {@link
org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}.
+ */
+public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
+
+ public static final RelOptRule PROJECT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyProject")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Project.class, project -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule FILTER_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyFilter")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Filter.class, singleRel -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule JOIN_LEFT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withOperandSupplier(b0 ->
+ b0.operand(Join.class).inputs(
+ b1 -> b1.operand(Values.class)
+
.predicate(Values::isEmpty).noInputs(),
+ b2 -> b2.operand(RelNode.class).anyInputs()))
+ .withDescription("HivePruneEmptyJoin(left)")
+ .as(JoinLeftEmptyRuleConfig.class)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ /**
+ * Improved version of Calcite's {@link
PruneEmptyRules.JoinLeftEmptyRuleConfig}.
+ * In case of right outer join if the left branch is empty the join operator
can be removed
+ * and take the right branch only.
+ *
+ * select * from (select * from emp where 1=0) right join dept
+ * to
+ * select null as emp.col0 ... null as emp.coln, dept.* from dept
+ */
+ public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config {
+ @Override default PruneEmptyRule toRule() {
+ return new PruneEmptyRule(this) {
+ @Override public void onMatch(RelOptRuleCall call) {
+ Join join = call.rel(0);
+ HiveValues empty = call.rel(1);
+ RelNode right = call.rel(2);
+ RexBuilder rexBuilder = call.builder().getRexBuilder();
+ if (join.getJoinType().generatesNullsOnLeft()) {
+ // "select * from emp right join dept" is not necessarily empty if
+ // emp is empty
+ List<RexNode> projects = new ArrayList<>(
+ empty.getRowType().getFieldCount() +
right.getRowType().getFieldCount());
+ List<String> columnNames = new ArrayList<>(
+ empty.getRowType().getFieldCount() +
right.getRowType().getFieldCount());
+ // left
+ addNullLiterals(rexBuilder, empty, projects, columnNames);
+ // right
+ copyProjects(rexBuilder, right.getRowType(),
+ join.getRowType(), empty.getRowType().getFieldCount(),
projects, columnNames);
+
+ RelNode project = call.builder().push(right).project(projects,
columnNames).build();
+ call.transformTo(project);
+ return;
+ }
+ call.transformTo(call.builder().push(join).empty().build());
+ }
+ };
+ }
+ }
+
+ private static void addNullLiterals(
+ RexBuilder rexBuilder, HiveValues empty, List<RexNode>
projectFields, List<String> newColumnNames) {
+ for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) {
+ RelDataTypeField relDataTypeField =
empty.getRowType().getFieldList().get(i);
+ RexNode nullLiteral =
rexBuilder.makeNullLiteral(relDataTypeField.getType());
+ projectFields.add(nullLiteral);
+ newColumnNames.add(empty.getRowType().getFieldList().get(i).getName());
+ }
+ }
+
+ public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType,
+ RelDataType castRowType, int
castRowTypeOffset,
+ List<RexNode> outProjects, List<String>
outProjectNames) {
+
+ for (int i = 0; i < inRowType.getFieldCount(); ++i) {
+ RelDataTypeField relDataTypeField = inRowType.getFieldList().get(i);
+ RexInputRef inputRef =
rexBuilder.makeInputRef(relDataTypeField.getType(), i);
+ RexNode cast = rexBuilder.makeCast(
+ castRowType.getFieldList().get(castRowTypeOffset + i).getType(),
inputRef);
+ outProjects.add(cast);
+ outProjectNames.add(relDataTypeField.getName());
+ }
+ }
+
+ public static final RelOptRule JOIN_RIGHT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withOperandSupplier(b0 ->
+ b0.operand(Join.class).inputs(
+ b1 -> b1.operand(RelNode.class).anyInputs(),
+ b2 ->
b2.operand(Values.class).predicate(Values::isEmpty)
+ .noInputs()))
+ .withDescription("HivePruneEmptyJoin(right)")
+ .as(JoinRightEmptyRuleConfig.class)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ /**
+ * Improved version of Calcite's {@link
PruneEmptyRules.JoinRightEmptyRuleConfig}.
+ * In case of left outer join if the right branch is empty the join operator
can be removed
+ * and take the left branch only.
+ *
+ * select * from emp right join (select * from dept where 1=0)
+ * to
+ * select emp.*, null as dept.col0 ... null as dept.coln from emp
+ */
+ public interface JoinRightEmptyRuleConfig extends PruneEmptyRule.Config {
+ @Override default PruneEmptyRule toRule() {
+ return new PruneEmptyRule(this) {
+ @Override public void onMatch(RelOptRuleCall call) {
+ Join join = call.rel(0);
+ RelNode left = call.rel(1);
+ HiveValues empty = call.rel(2);
+ RexBuilder rexBuilder = call.builder().getRexBuilder();
+ if (join.getJoinType().generatesNullsOnRight()) {
+ // "select * from emp left join dept" is not necessarily empty if
+ // dept is empty
+ List<RexNode> projects = new ArrayList<>(
+ left.getRowType().getFieldCount() +
empty.getRowType().getFieldCount());
+ List<String> columnNames = new ArrayList<>(
+ left.getRowType().getFieldCount() +
empty.getRowType().getFieldCount());
+ // left
+ copyProjects(rexBuilder, left.getRowType(), join.getRowType(), 0,
projects, columnNames);
+ // right
+ addNullLiterals(rexBuilder, empty, projects, columnNames);
+
+ RelNode project = call.builder().push(left).project(projects,
columnNames).build();
+ call.transformTo(project);
+ return;
+ }
+ if (join.getJoinType() == JoinRelType.ANTI) {
+ // In case of anti join: Join(X, Empty, ANTI) becomes X
+ call.transformTo(join.getLeft());
+ return;
+ }
+ call.transformTo(call.builder().push(join).empty().build());
+ }
+ };
+ }
+ }
+
+ public static final RelOptRule SORT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptySort")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Sort.class, singleRel -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule SORT_FETCH_ZERO_INSTANCE =
+ RelRule.Config.EMPTY
+ .withOperandSupplier(b ->
+ b.operand(Sort.class).anyInputs())
+ .withDescription("HivePruneSortLimit0")
+ .as(PruneEmptyRules.SortFetchZeroRuleConfig.class)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule AGGREGATE_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyAggregate")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Aggregate.class, Aggregate::isNotGrandTotal)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule UNION_INSTANCE =
+ RelRule.Config.EMPTY
+ .withOperandSupplier(b0 ->
+ b0.operand(Union.class).unorderedInputs(b1 ->
+ b1.operand(Values.class)
+
.predicate(Values::isEmpty).noInputs()))
+ .withDescription("HivePruneEmptyUnionBranch")
+ .as(HiveUnionEmptyPruneRuleConfig.class)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ /**
+ * Copy of {@link PruneEmptyRules.UnionEmptyPruneRuleConfig} but this
version expects {@link Union}.
+ */
+ public interface HiveUnionEmptyPruneRuleConfig extends
PruneEmptyRules.PruneEmptyRule.Config {
+ @Override default PruneEmptyRules.PruneEmptyRule toRule() {
+ return new PruneEmptyRules.PruneEmptyRule(this) {
+ @Override public void onMatch(RelOptRuleCall call) {
+ final Union union = call.rel(0);
+ final List<RelNode> inputs = union.getInputs();
+ assert inputs != null;
+ final RelBuilder builder = call.builder();
+ int nonEmptyInputs = 0;
+ for (RelNode input : inputs) {
+ if (!isEmpty(input)) {
+ builder.push(input);
+ nonEmptyInputs++;
+ }
+ }
+ assert nonEmptyInputs < inputs.size()
+ : "planner promised us at least one Empty child: "
+ + RelOptUtil.toString(union);
+ if (nonEmptyInputs == 0) {
+ builder.push(union).empty();
+ } else {
+ builder.union(union.all, nonEmptyInputs);
+ builder.convert(union.getRowType(), true);
+ }
+ call.transformTo(builder.build());
+ }
+ };
+ }
+ }
+
+ private static boolean isEmpty(RelNode node) {
+ if (node instanceof Values) {
+ return ((Values) node).getTuples().isEmpty();
+ }
+ if (node instanceof HepRelVertex) {
+ return isEmpty(((HepRelVertex) node).getCurrentRel());
+ }
+ // Note: relation input might be a RelSubset, so we just iterate over the
relations
+ // in order to check if the subset is equivalent to an empty relation.
+ if (!(node instanceof RelSubset)) {
+ return false;
+ }
+ RelSubset subset = (RelSubset) node;
+ for (RelNode rel : subset.getRels()) {
+ if (isEmpty(rel)) {
+ return true;
+ }
+ }
+ return false;
+ }
Review Comment:
Minor: What if we got rid of the negated condition in this way?
```suggestion
if (node instanceof RelSubset) {
RelSubset subset = (RelSubset) node;
for (RelNode rel : subset.getRels()) {
if (isEmpty(rel)) {
return true;
}
}
}
return false;
}
```
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveValues.java:
##########
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexLiteral;
+
+import java.util.List;
+
+/**
+ * Subclass of {@link org.apache.calcite.rel.core.Values}.
+ * Specialized to Hive engine.
+ */
+public class HiveValues extends Values {
+
+ public HiveValues(
+ RelOptCluster cluster,
+ RelDataType rowType,
+ ImmutableList<ImmutableList<RexLiteral>> tuples,
+ RelTraitSet traits) {
Review Comment:
Minor: I don't think we align parameters vertically in Hive in general, we
could also try to have them fit a single line like:
```suggestion
public HiveValues(
RelOptCluster cluster, RelDataType rowType,
ImmutableList<ImmutableList<RexLiteral>> tuples, RelTraitSet traits) {
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRewriteToDataSketchesRules.java:
##########
@@ -336,16 +336,13 @@ protected VBuilderPAP(Aggregate aggregate, Project
project, String sketchClass)
@Override
boolean isApplicable(AggregateCall aggCall) {
- if ((aggInput instanceof Project)
- && !aggCall.isDistinct() && aggCall.getArgList().size() == 4
+ if ((aggInput != null)
+ && !aggCall.isDistinct() && aggCall.getArgList().size() == 1
Review Comment:
I am having a hard time understanding this change, do you have a high-level
explanation that might help?
Since others down the line might be puzzled, we could probably add few
comments here.
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java:
##########
@@ -717,9 +719,12 @@ public TrimResult trimFields(Aggregate aggregate,
ImmutableBitSet fieldsUsed, Se
final RexNode filterArg = aggCall.filterArg < 0 ? null
: relBuilder.field(Mappings.apply(inputMapping,
aggCall.filterArg));
RelBuilder.AggCall newAggCall =
- relBuilder.aggregateCall(aggCall.getAggregation(),
- aggCall.isDistinct(), aggCall.isApproximate(),
- filterArg, aggCall.name, args);
+ relBuilder.aggregateCall(aggCall.getAggregation(), args)
+ .distinct(aggCall.isDistinct())
+ .filter(filterArg)
+ .approximate(aggCall.isApproximate())
+ .sort(relBuilder.fields(aggCall.collation))
+ .as(aggCall.name);
Review Comment:
Can you give a high level overview of the change here?
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java:
##########
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.rules.PruneEmptyRules;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class provides access to Calcite's {@link PruneEmptyRules}.
+ * The instances of the rules use {@link
org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}.
+ */
+public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
+
+ public static final RelOptRule PROJECT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyProject")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Project.class, project -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule FILTER_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyFilter")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Filter.class, singleRel -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule JOIN_LEFT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withOperandSupplier(b0 ->
+ b0.operand(Join.class).inputs(
+ b1 -> b1.operand(Values.class)
+
.predicate(Values::isEmpty).noInputs(),
+ b2 -> b2.operand(RelNode.class).anyInputs()))
+ .withDescription("HivePruneEmptyJoin(left)")
+ .as(JoinLeftEmptyRuleConfig.class)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ /**
+ * Improved version of Calcite's {@link
PruneEmptyRules.JoinLeftEmptyRuleConfig}.
+ * In case of right outer join if the left branch is empty the join operator
can be removed
+ * and take the right branch only.
+ *
+ * select * from (select * from emp where 1=0) right join dept
+ * to
+ * select null as emp.col0 ... null as emp.coln, dept.* from dept
+ */
+ public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config {
+ @Override default PruneEmptyRule toRule() {
+ return new PruneEmptyRule(this) {
+ @Override public void onMatch(RelOptRuleCall call) {
+ Join join = call.rel(0);
+ HiveValues empty = call.rel(1);
+ RelNode right = call.rel(2);
+ RexBuilder rexBuilder = call.builder().getRexBuilder();
+ if (join.getJoinType().generatesNullsOnLeft()) {
+ // "select * from emp right join dept" is not necessarily empty if
+ // emp is empty
+ List<RexNode> projects = new ArrayList<>(
+ empty.getRowType().getFieldCount() +
right.getRowType().getFieldCount());
+ List<String> columnNames = new ArrayList<>(
+ empty.getRowType().getFieldCount() +
right.getRowType().getFieldCount());
+ // left
+ addNullLiterals(rexBuilder, empty, projects, columnNames);
+ // right
+ copyProjects(rexBuilder, right.getRowType(),
+ join.getRowType(), empty.getRowType().getFieldCount(),
projects, columnNames);
+
+ RelNode project = call.builder().push(right).project(projects,
columnNames).build();
+ call.transformTo(project);
+ return;
+ }
+ call.transformTo(call.builder().push(join).empty().build());
+ }
+ };
+ }
+ }
+
+ private static void addNullLiterals(
+ RexBuilder rexBuilder, HiveValues empty, List<RexNode>
projectFields, List<String> newColumnNames) {
+ for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) {
+ RelDataTypeField relDataTypeField =
empty.getRowType().getFieldList().get(i);
+ RexNode nullLiteral =
rexBuilder.makeNullLiteral(relDataTypeField.getType());
+ projectFields.add(nullLiteral);
+ newColumnNames.add(empty.getRowType().getFieldList().get(i).getName());
+ }
+ }
+
+ public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType,
+ RelDataType castRowType, int
castRowTypeOffset,
+ List<RexNode> outProjects, List<String>
outProjectNames) {
+
+ for (int i = 0; i < inRowType.getFieldCount(); ++i) {
+ RelDataTypeField relDataTypeField = inRowType.getFieldList().get(i);
+ RexInputRef inputRef =
rexBuilder.makeInputRef(relDataTypeField.getType(), i);
+ RexNode cast = rexBuilder.makeCast(
+ castRowType.getFieldList().get(castRowTypeOffset + i).getType(),
inputRef);
Review Comment:
Since those two methods are used for both left and right join rules, I'd
rather place them before the two rules or after them, not in the middle.
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java:
##########
@@ -121,7 +127,83 @@ public static ASTNode convert(final RelNode relNode,
List<FieldSchema> resultSch
return c.convert();
}
+ // TOK_QUERY
+ // TOK_INSERT
+ // TOK_DESTINATION
+ // TOK_DIR
+ // TOK_TMP_FILE
+ // TOK_SELECT
+ // TOK_SELEXPR
+ // TOK_FUNCTION
+ // TOK_<type>
+ // TOK_NULL
+ // alias0
+ // ...
+ // TOK_SELEXPR
+ // TOK_FUNCTION
+ // TOK_<type>
+ // TOK_NULL
+ // aliasn
+ // TOK_LIMIT
+ // 0
+ // 0
+ private static ASTNode emptyPlan(RelDataType dataType) {
Review Comment:
As discussed off-line, can we have a unit test covering the translation
(especially the typed null bit)?
I am positive that the method is doing what it should, I am more worried
about future regressions if it gets touched.
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/HiveValuesVisitor.java:
##########
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer;
+
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+class HiveValuesVisitor extends HiveRelNodeVisitor<HiveValues> {
+ HiveValuesVisitor(HiveOpConverter hiveOpConverter) {
+ super(hiveOpConverter);
+ }
+
+ @Override
+ OpAttr visit(HiveValues valuesRel) throws SemanticException {
+
+ LOG.debug("Translating operator rel#{}:{} with row type: [{}]",
+ valuesRel.getId(), valuesRel.getRelTypeName(),
valuesRel.getRowType());
+ LOG.debug("Operator rel#{}:{} has {} tuples.",
+ valuesRel.getId(), valuesRel.getRelTypeName(),
valuesRel.tuples.size());
+
+ if (!Values.isEmpty(valuesRel)) {
+ LOG.error("Empty {} operator translation not supported yet in return
path.",
+ valuesRel.getClass().getCanonicalName());
+ return null;
+ }
+
+ // 1. collect columns for project row schema
+ List<String> columnNames = new ArrayList<>();
+ List<ExprNodeDesc> exprNodeDescList = new ArrayList<>();
+ Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
+
+ ArrayList<ColumnInfo> colInfoList = new ArrayList<>();
Review Comment:
Nitpick, prefer the interface if possible:
```suggestion
List<ColumnInfo> colInfoList = new ArrayList<>();
```
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java:
##########
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.rules.PruneEmptyRules;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class provides access to Calcite's {@link PruneEmptyRules}.
+ * The instances of the rules use {@link
org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}.
+ */
+public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
+
+ public static final RelOptRule PROJECT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyProject")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Project.class, project -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule FILTER_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyFilter")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Filter.class, singleRel -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule JOIN_LEFT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withOperandSupplier(b0 ->
+ b0.operand(Join.class).inputs(
+ b1 -> b1.operand(Values.class)
+
.predicate(Values::isEmpty).noInputs(),
+ b2 -> b2.operand(RelNode.class).anyInputs()))
+ .withDescription("HivePruneEmptyJoin(left)")
+ .as(JoinLeftEmptyRuleConfig.class)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ /**
+ * Improved version of Calcite's {@link
PruneEmptyRules.JoinLeftEmptyRuleConfig}.
+ * In case of right outer join if the left branch is empty the join operator
can be removed
+ * and take the right branch only.
+ *
+ * select * from (select * from emp where 1=0) right join dept
+ * to
+ * select null as emp.col0 ... null as emp.coln, dept.* from dept
+ */
+ public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config {
+ @Override default PruneEmptyRule toRule() {
+ return new PruneEmptyRule(this) {
+ @Override public void onMatch(RelOptRuleCall call) {
+ Join join = call.rel(0);
+ HiveValues empty = call.rel(1);
+ RelNode right = call.rel(2);
+ RexBuilder rexBuilder = call.builder().getRexBuilder();
+ if (join.getJoinType().generatesNullsOnLeft()) {
+ // "select * from emp right join dept" is not necessarily empty if
Review Comment:
Can you update following Ruben's suggestion from the Calcite's PR here and
for the corresponding "right" version of the rule?
Issue Time Tracking
-------------------
Worklog Id: (was: 810902)
Time Spent: 1.5h (was: 1h 20m)
> Use Calcite to remove sections of a query plan known never produces rows
> ------------------------------------------------------------------------
>
> Key: HIVE-26524
> URL: https://issues.apache.org/jira/browse/HIVE-26524
> Project: Hive
> Issue Type: Improvement
> Components: CBO
> Reporter: Krisztian Kasa
> Assignee: Krisztian Kasa
> Priority: Major
> Labels: pull-request-available
> Time Spent: 1.5h
> Remaining Estimate: 0h
>
> Calcite has a set of rules to remove sections of a query plan known never
> produces any rows. In some cases the whole plan can be removed. Such plans
> are represented with a single {{Values}} operators with no tuples. ex.:
> {code:java}
> select y + 1 from (select a1 y, b1 z from t1 where b1 > 10) q WHERE 1=0
> {code}
> {code:java}
> HiveValues(tuples=[[]])
> {code}
> Other cases when plan has outer join or set operators some branches can be
> replaced with empty values moving forward in some cases the join/set operator
> can be removed
> {code:java}
> select a2, b2 from t2 where 1=0
> union
> select a1, b1 from t1
> {code}
> {code:java}
> HiveAggregate(group=[{0, 1}])
> HiveTableScan(table=[[default, t1]], table:alias=[t1])
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)