This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 6f3826d HIVE-23878: Aggregate after join throws off MV rewrite (Jesus
Camacho Rodriguez, reviewed by Vineet Garg)
6f3826d is described below
commit 6f3826d73f6462e33cf38656d68a0e6230565c05
Author: Jesús Camacho Rodríguez <[email protected]>
AuthorDate: Fri Jul 24 17:28:48 2020 -0700
HIVE-23878: Aggregate after join throws off MV rewrite (Jesus Camacho
Rodriguez, reviewed by Vineet Garg)
Closes apache/hive#1300
---
.../test/resources/testconfiguration.properties | 8 +-
.../calcite/HiveDefaultRelMetadataProvider.java | 2 -
.../ql/optimizer/calcite/HiveRelShuttleImpl.java | 10 +-
...r.java => HiveTezModelRelMetadataProvider.java} | 29 +--
.../optimizer/calcite/cost/HiveVolcanoPlanner.java | 9 +-
.../rules/HiveCardinalityPreservingJoinRule.java | 8 +-
.../HiveMaterializationRelMetadataProvider.java | 120 ++++++++++++
.../rules/views/HiveMaterializedViewBoxing.java | 213 +++++++++++++++++++++
.../rules/views/HiveMaterializedViewRule.java | 4 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 15 +-
.../test/queries/clientpositive/perf/mv_query30.q | 38 ++++
.../test/queries/clientpositive/perf/mv_query45.q | 27 +++
.../test/queries/clientpositive/perf/mv_query68.q | 49 +++++
.../schema_evol_text_vec_part_all_primitive.q.out | 2 +-
.../perf/tez/constraints/mv_query30.q.out | 136 +++++++++++++
.../perf/tez/constraints/mv_query45.q.out | 108 +++++++++++
.../perf/tez/constraints/mv_query68.q.out | 149 ++++++++++++++
17 files changed, 880 insertions(+), 47 deletions(-)
diff --git a/itests/src/test/resources/testconfiguration.properties
b/itests/src/test/resources/testconfiguration.properties
index 74beeaa..e2cee9c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1081,8 +1081,11 @@ spark.only.query.negative.files=\
spark_task_failure.q
tez.perf.disabled.query.files=\
+ mv_query30.q,\
mv_query44.q,\
- mv_query67.q
+ mv_query45.q,\
+ mv_query67.q,\
+ mv_query68.q
spark.perf.disabled.query.files=\
cbo_ext_query1.q,\
@@ -1183,8 +1186,11 @@ spark.perf.disabled.query.files=\
cbo_query97.q,\
cbo_query98.q,\
cbo_query99.q,\
+ mv_query30.q,\
mv_query44.q,\
+ mv_query45.q,\
mv_query67.q,\
+ mv_query68.q,\
query14.q,\
query1b.q,\
query64.q
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
index c4b153e..4ef85c5 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -122,8 +122,6 @@ public class HiveDefaultRelMetadataProvider {
* be visited during the planning phase.
*/
public static void initializeMetadataProviderClass(List<Class<? extends
RelNode>> nodeClasses) {
- // This will register the classes in the default Janino implementation
- JaninoRelMetadataProvider.DEFAULT.register(nodeClasses);
// This will register the classes in the default Hive implementation
DEFAULT.register(nodeClasses);
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
index 67f222b..13cd837 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite;
+import java.util.ArrayDeque;
+import java.util.Deque;
import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.TableFunctionScan;
@@ -33,7 +35,6 @@ import org.apache.calcite.rel.logical.LogicalProject;
import org.apache.calcite.rel.logical.LogicalSort;
import org.apache.calcite.rel.logical.LogicalUnion;
import org.apache.calcite.rel.logical.LogicalValues;
-import org.apache.calcite.util.Stacks;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
@@ -43,13 +44,13 @@ import java.util.ArrayList;
import java.util.List;
public class HiveRelShuttleImpl implements HiveRelShuttle {
- protected final List<RelNode> stack = new ArrayList<RelNode>();
+ protected final Deque<RelNode> stack = new ArrayDeque<>();
/**
* Visits a particular child of a parent.
*/
protected RelNode visitChild(RelNode parent, int i, RelNode child) {
- Stacks.push(stack, parent);
+ stack.push(parent);
try {
RelNode child2 = child.accept(this);
if (child2 != child) {
@@ -60,7 +61,7 @@ public class HiveRelShuttleImpl implements HiveRelShuttle {
}
return parent;
} finally {
- Stacks.pop(stack, parent);
+ stack.pop();
}
}
@@ -94,6 +95,7 @@ public class HiveRelShuttleImpl implements HiveRelShuttle {
public RelNode visit(HiveFilter filter) {
return visitChild(filter, 0, filter.getInput());
}
+
public RelNode visit(LogicalFilter filter) {
return visitChild(filter, 0, filter.getInput());
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultTezModelRelMetadataProvider.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
similarity index 73%
rename from
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultTezModelRelMetadataProvider.java
rename to
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
index 39de521..cc65544 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultTezModelRelMetadataProvider.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
@@ -17,9 +17,6 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite;
-import java.util.List;
-
-import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -39,9 +36,9 @@ import
org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdUniqueKeys;
import com.google.common.collect.ImmutableList;
-public class HiveDefaultTezModelRelMetadataProvider {
+public class HiveTezModelRelMetadataProvider {
- private static final JaninoRelMetadataProvider DEFAULT_TEZ_COST_MODEL =
+ public static final JaninoRelMetadataProvider DEFAULT =
JaninoRelMetadataProvider.of(
ChainedRelMetadataProvider.of(
ImmutableList.of(
@@ -59,26 +56,4 @@ public class HiveDefaultTezModelRelMetadataProvider {
HiveRelMdPredicates.SOURCE,
JaninoRelMetadataProvider.DEFAULT)));
- private final JaninoRelMetadataProvider metadataProvider;
-
-
- public HiveDefaultTezModelRelMetadataProvider() {
- metadataProvider = DEFAULT_TEZ_COST_MODEL;
- }
-
- public JaninoRelMetadataProvider getMetadataProvider() {
- return metadataProvider;
- }
-
- /**
- * This method can be called at startup time to pre-register all the
- * additional Hive classes (compared to Calcite core classes) that may
- * be visited during the planning phase.
- */
- public static void initializeMetadataProviderClass(List<Class<? extends
RelNode>> nodeClasses) {
- // This will register the classes in the default Janino implementation
- JaninoRelMetadataProvider.DEFAULT.register(nodeClasses);
- // This will register the classes in the default Hive implementation
- DEFAULT_TEZ_COST_MODEL.register(nodeClasses);
- }
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
index 208fb15..a50dbb0 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
@@ -123,9 +123,12 @@ public class HiveVolcanoPlanner extends VolcanoPlanner {
Multimap<Class<? extends RelNode>, RelNode> nodeTypes =
mq.getNodeTypes(rel);
for (RelNode scan : nodeTypes.get(TableScan.class)) {
- if (((RelOptHiveTable)
scan.getTable()).getHiveTableMD().isMaterializedView()) {
- usesMaterializedViews = true;
- break;
+ if (scan.getTable() instanceof RelOptHiveTable) {
+ RelOptHiveTable relOptHiveTable = (RelOptHiveTable) scan.getTable();
+ if (relOptHiveTable.getHiveTableMD().isMaterializedView()) {
+ usesMaterializedViews = true;
+ break;
+ }
}
}
if (isHeuristic && usesMaterializedViews) {
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java
index 2828bc1..c49270a 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java
@@ -23,7 +23,7 @@ import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
-import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultTezModelRelMetadataProvider;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -49,7 +49,8 @@ public class HiveCardinalityPreservingJoinRule extends
HiveFieldTrimmerRule {
JaninoRelMetadataProvider original =
RelMetadataQuery.THREAD_PROVIDERS.get();
try {
- RelMetadataQuery.THREAD_PROVIDERS.set(getJaninoRelMetadataProvider());
+ RelMetadataQuery.THREAD_PROVIDERS.set(
+ HiveTezModelRelMetadataProvider.DEFAULT);
RelMetadataQuery metadataQuery = RelMetadataQuery.instance();
RelOptCost optimizedCost = metadataQuery.getCumulativeCost(optimized);
@@ -70,7 +71,4 @@ public class HiveCardinalityPreservingJoinRule extends
HiveFieldTrimmerRule {
}
}
- private JaninoRelMetadataProvider getJaninoRelMetadataProvider() {
- return new HiveDefaultTezModelRelMetadataProvider().getMetadataProvider();
- }
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializationRelMetadataProvider.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializationRelMetadataProvider.java
new file mode 100644
index 0000000..084df0a
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializationRelMetadataProvider.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules.views;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Multimap;
+import java.util.Set;
+import org.apache.calcite.plan.RelOptPredicateList;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdAllPredicates;
+import org.apache.calcite.rel.metadata.RelMdExpressionLineage;
+import org.apache.calcite.rel.metadata.RelMdNodeTypes;
+import org.apache.calcite.rel.metadata.RelMdTableReferences;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexTableInputRef.RelTableRef;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Util;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewBoxing.Box;
+
+/**
+ * Metadata provider implementation that is only used for MV rewriting.
+ */
+public class HiveMaterializationRelMetadataProvider {
+
+ private static final RelMetadataProvider SOURCE_NODE_TYPES =
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.NODE_TYPES.method, new HiveRelMdNodeTypes());
+
+ private static final RelMetadataProvider SOURCE_EXPRESSION_LINEAGE =
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.EXPRESSION_LINEAGE.method, new
HiveRelMdExpressionLineage());
+
+ private static final RelMetadataProvider SOURCE_ALL_PREDICATES =
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.ALL_PREDICATES.method, new HiveRelMdAllPredicates());
+
+ private static final RelMetadataProvider SOURCE_TABLE_REFERENCES =
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.TABLE_REFERENCES.method, new
HiveRelMdTableReferences());
+
+ public static final JaninoRelMetadataProvider DEFAULT =
+ JaninoRelMetadataProvider.of(
+ ChainedRelMetadataProvider.of(
+ ImmutableList.of(
+ SOURCE_NODE_TYPES,
+ SOURCE_EXPRESSION_LINEAGE,
+ SOURCE_ALL_PREDICATES,
+ SOURCE_TABLE_REFERENCES,
+ JaninoRelMetadataProvider.DEFAULT)));
+
+
+ private static class HiveRelMdNodeTypes extends RelMdNodeTypes {
+ public Multimap<Class<? extends RelNode>, RelNode> getNodeTypes(RelSubset
rel,
+ RelMetadataQuery mq) {
+ for (RelNode node : rel.getRelList()) {
+ if (node instanceof Box) {
+ return mq.getNodeTypes(node);
+ }
+ }
+ return mq.getNodeTypes(Util.first(rel.getBest(), rel.getOriginal()));
+ }
+ }
+
+ private static class HiveRelMdExpressionLineage extends
RelMdExpressionLineage {
+ public Set<RexNode> getExpressionLineage(RelSubset rel,
+ RelMetadataQuery mq, RexNode outputExpression) {
+ for (RelNode node : rel.getRelList()) {
+ if (node instanceof Box) {
+ return mq.getExpressionLineage(node, outputExpression);
+ }
+ }
+ return mq.getExpressionLineage(Util.first(rel.getBest(),
rel.getOriginal()),
+ outputExpression);
+ }
+ }
+
+ private static class HiveRelMdAllPredicates extends RelMdAllPredicates {
+ public RelOptPredicateList getAllPredicates(RelSubset rel,
+ RelMetadataQuery mq) {
+ for (RelNode node : rel.getRelList()) {
+ if (node instanceof Box) {
+ return mq.getAllPredicates(node);
+ }
+ }
+ return mq.getAllPredicates(Util.first(rel.getBest(), rel.getOriginal()));
+ }
+ }
+
+ private static class HiveRelMdTableReferences extends RelMdTableReferences {
+ public Set<RelTableRef> getTableReferences(RelSubset rel, RelMetadataQuery
mq) {
+ for (RelNode node : rel.getRelList()) {
+ if (node instanceof Box) {
+ return mq.getTableReferences(node);
+ }
+ }
+ return mq.getTableReferences(Util.first(rel.getBest(),
rel.getOriginal()));
+ }
+ }
+}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewBoxing.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewBoxing.java
new file mode 100644
index 0000000..2b5809b
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewBoxing.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules.views;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Multimap;
+import java.util.Collection;
+import java.util.Map.Entry;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.calcite.plan.RelOptAbstractTable;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptSchema;
+import org.apache.calcite.plan.RelOptTable;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttleImpl;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
+
+/**
+ * This class contains logic that is useful to trigger additional materialized
+ * view rewritings.
+ *
+ * In particular, in Hive we do not combine the MV rewriting rules with other
+ * rules that may generate additional rewritings in the planner, e.g., join
+ * reordering rules. In fact, the rewriting has some built-in logic to add
+ * compensation joins on top of the MV scan which make adding these rules
+ * unnecessary. This leads to faster planning, however it can also lead to some
+ * missing rewriting opportunities if we are not careful. For instance, the
+ * rewriting algorithm will bail out if an input of a join contains operators
+ * that are not supported. Consider the following example where Union is not
+ * supported by the rewriting algorithm:
+ * MV:
+ * Join
+ * / \
+ * A B
+ *
+ * Query:
+ * Join
+ * / \
+ * Join B
+ * / \
+ * Union A
+ * |
+ * S
+ *
+ * The rewriting can only be triggered at the root of the plan since that is
+ * the operator where A and B are visible. However, after checking the
+ * operators in the plan, the rewriting bails out since Union is not supported.
+ *
+ * This class contains boxing/unboxing logic that aims at fixing this. The
+ * boxing logic will do a traversal of the query plan and introduce Box
+ * operators when it detects an unsupported operator. For the former query,
+ * this will be the rewritten plan:
+ * Query:
+ * Join
+ * / \
+ * Join B
+ * / \
+ * Box$0 A
+ *
+ * Box extends TableScan, and thus, MV rewriting will proceed as expected. In
+ * addition, the Box node keeps a internal pointer to the former subplan that
+ * it replaced. During MV rewriting, we include the unboxing rule in the
+ * planner, which will transform the Box node into the original subplan. The
+ * Box node has an infinite cost: Though it helps the rewriting to be
+ * triggered, it will never be part of the final plan, i.e., the original
+ * subplan (possibly with other MV rewritings) will be chosen.
+ */
+public class HiveMaterializedViewBoxing {
+
+ /**
+ * Create Box operators in plan where necessary.
+ */
+ public static RelNode boxPlan(RelNode plan) {
+ return plan.accept(new BoxingRelShuttle());
+ }
+
+ /**
+ * Rule that replaces Box operators by the plan they hold.
+ */
+ public static final HiveMaterializedViewUnboxingRule INSTANCE_UNBOXING =
+ new HiveMaterializedViewUnboxingRule();
+
+ /* Counter for unique identifiers for Box operator. */
+ private static final AtomicInteger UNIQUE_IDENTIFIER = new AtomicInteger(0);
+
+
+ private static final class BoxingRelShuttle extends HiveRelShuttleImpl {
+
+ @Override
+ public RelNode visit(HiveJoin join) {
+ HiveJoin newJoin = (HiveJoin) visitChildren(join);
+ if (newJoin.getJoinType() != JoinRelType.INNER && !newJoin.isSemiJoin())
{
+ // Nothing to do
+ return newJoin;
+ }
+ RelMetadataQuery mq = newJoin.getCluster().getMetadataQuery();
+ boolean leftValid = isValidRelNodePlan(newJoin.getLeft(), mq);
+ boolean rightValid = isValidRelNodePlan(newJoin.getRight(), mq);
+ if (leftValid == rightValid) {
+ // Both are valid or invalid, nothing to do here
+ return newJoin;
+ }
+ RelNode leftInput = newJoin.getLeft();
+ RelNode rightInput = newJoin.getRight();
+ if (!leftValid) {
+ leftInput = createBoxOperator(newJoin.getCluster(), leftInput);
+ } else {
+ rightInput = createBoxOperator(newJoin.getCluster(), rightInput);
+ }
+ return newJoin.copy(
+ newJoin.getTraitSet(), ImmutableList.of(leftInput, rightInput));
+ }
+
+ private static RelNode createBoxOperator(RelOptCluster cluster, RelNode
eqRelNode) {
+ String name = ".$box" + UNIQUE_IDENTIFIER.getAndIncrement();
+ return new Box(cluster, name, eqRelNode);
+ }
+
+ private static boolean isValidRelNodePlan(RelNode rel, RelMetadataQuery
mq) {
+ final Multimap<Class<? extends RelNode>, RelNode> m =
+ mq.getNodeTypes(rel);
+ for (Entry<Class<? extends RelNode>, Collection<RelNode>> e :
m.asMap().entrySet()) {
+ Class<? extends RelNode> c = e.getKey();
+ if (!TableScan.class.isAssignableFrom(c)
+ && !Project.class.isAssignableFrom(c)
+ && !Filter.class.isAssignableFrom(c)
+ && !Join.class.isAssignableFrom(c)) {
+ // Skip it
+ return false;
+ }
+ if (Join.class.isAssignableFrom(c)) {
+ for (RelNode n : e.getValue()) {
+ final Join join = (Join) n;
+ if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin())
{
+ // Skip it
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ }
+
+ protected static final class Box extends TableScan implements HiveRelNode {
+ private final RelNode eqRelNode;
+
+ private Box(RelOptCluster cluster, String name, RelNode eqRelNode) {
+ super(cluster, TraitsUtil.getDefaultTraitSet(cluster),
+ new BoxRelOptTable(null, name, eqRelNode.getRowType()));
+ this.eqRelNode = eqRelNode;
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery
mq) {
+ return planner.getCostFactory().makeInfiniteCost();
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ private static final class BoxRelOptTable extends RelOptAbstractTable {
+ private BoxRelOptTable(RelOptSchema schema, String name, RelDataType
rowType) {
+ super(schema, name, rowType);
+ }
+ }
+ }
+
+ private static final class HiveMaterializedViewUnboxingRule extends
RelOptRule {
+
+ private HiveMaterializedViewUnboxingRule() {
+ super(operand(Box.class, any()),
+ HiveRelFactories.HIVE_BUILDER, "HiveMaterializedViewUnboxingRule");
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ Box box = call.rel(0);
+ call.transformTo(box.eqRelNode);
+ }
+ }
+}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java
index bfc157c..63d260b 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java
@@ -217,7 +217,7 @@ public class HiveMaterializedViewRule {
List<RexNode> identityFields = relBuilder.fields(
ImmutableBitSet.range(0, rel.getRowType().getFieldCount()).asList());
RelNode newRel = relBuilder
- .project(identityFields, ImmutableList.<String>of(), true)
+ .project(identityFields, ImmutableList.of(), true)
.build();
call.transformTo(fil.copy(fil.getTraitSet(), ImmutableList.of(newRel)));
}
@@ -253,7 +253,7 @@ public class HiveMaterializedViewRule {
relBuilder.push(join);
List<RexNode> identityFields = relBuilder.fields(
ImmutableBitSet.range(0,
join.getRowType().getFieldCount()).asList());
- relBuilder.project(identityFields, ImmutableList.<String>of(), true);
+ relBuilder.project(identityFields, ImmutableList.of(), true);
call.transformTo(relBuilder.build());
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 088efd4..04da0a6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -164,6 +164,8 @@ import
org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext;
import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializationRelMetadataProvider;
import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
@@ -264,6 +266,7 @@ import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCProjectPushDow
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCSortPushDownRule;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCUnionPushDownRule;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAggregateIncrementalRewritingRule;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewBoxing;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewRule;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveNoAggregateIncrementalRewritingRule;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.MaterializedViewRewritingRelVisitor;
@@ -2251,10 +2254,13 @@ public class CalcitePlanner extends SemanticAnalyzer {
// Optimize plan
basePlan = executeProgram(basePlan, program.build(), mdProvider,
executorProvider, materializations);
} else {
+ // Pre-processing to being able to trigger additional rewritings
+ basePlan = HiveMaterializedViewBoxing.boxPlan(basePlan);
+
// If this is not a rebuild, we use Volcano planner as the decision
// on whether to use MVs or not and which MVs to use should be
cost-based
optCluster.invalidateMetadataQuery();
-
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
+
RelMetadataQuery.THREAD_PROVIDERS.set(HiveMaterializationRelMetadataProvider.DEFAULT);
// Add materializations to planner
for (RelOptMaterialization materialization : materializations) {
@@ -2266,6 +2272,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
for (RelOptRule rule :
HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES) {
planner.addRule(rule);
}
+ // Unboxing rule
+ planner.addRule(HiveMaterializedViewBoxing.INSTANCE_UNBOXING);
// Partition pruner rule
planner.addRule(HiveFilterProjectTSTransposeRule.INSTANCE);
planner.addRule(new HivePartitionPruneRule(conf));
@@ -2332,7 +2340,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
if (mvRebuildMode == MaterializationRebuildMode.AGGREGATE_REBUILD) {
// Make a cost-based decision factoring the configuration property
optCluster.invalidateMetadataQuery();
-
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
+
RelMetadataQuery.THREAD_PROVIDERS.set(HiveMaterializationRelMetadataProvider.DEFAULT);
RelMetadataQuery mq = RelMetadataQuery.instance();
RelOptCost costOriginalPlan =
mq.getCumulativeCost(calcitePreMVRewritingPlan);
final double factorSelectivity = HiveConf.getFloatVar(
@@ -5668,7 +5676,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
* and the field trimmer.
*/
public static void warmup() {
+ JaninoRelMetadataProvider.DEFAULT.register(HIVE_REL_NODE_CLASSES);
HiveDefaultRelMetadataProvider.initializeMetadataProviderClass(HIVE_REL_NODE_CLASSES);
+ HiveTezModelRelMetadataProvider.DEFAULT.register(HIVE_REL_NODE_CLASSES);
+
HiveMaterializationRelMetadataProvider.DEFAULT.register(HIVE_REL_NODE_CLASSES);
HiveRelFieldTrimmer.initializeFieldTrimmerClass(HIVE_REL_NODE_CLASSES);
}
diff --git a/ql/src/test/queries/clientpositive/perf/mv_query30.q
b/ql/src/test/queries/clientpositive/perf/mv_query30.q
new file mode 100644
index 0000000..8c5f936
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/mv_query30.q
@@ -0,0 +1,38 @@
+set hive.mapred.mode=nonstrict;
+set hive.materializedview.rewriting.time.window=-1;
+
+create materialized view mv_customer_customer_address_n100 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk;
+
+explain cbo
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+ ,ca_state as ctr_state,
+ sum(wr_return_amt) as ctr_total_return
+ from web_returns
+ ,date_dim
+ ,customer_address
+ where wr_returned_date_sk = d_date_sk
+ and d_year =2002
+ and wr_returning_addr_sk = ca_address_sk
+ group by wr_returning_customer_sk
+ ,ca_state)
+ select
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2
+ where ctr1.ctr_state = ctr2.ctr_state)
+ and ca_address_sk = c_current_addr_sk
+ and ca_state = 'IL'
+ and ctr1.ctr_customer_sk = c_customer_sk
+ order by
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date,ctr_total_return
+limit 100;
+
+drop materialized view mv_customer_customer_address_n100;
diff --git a/ql/src/test/queries/clientpositive/perf/mv_query45.q
b/ql/src/test/queries/clientpositive/perf/mv_query45.q
new file mode 100644
index 0000000..338bd88
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/mv_query45.q
@@ -0,0 +1,27 @@
+set hive.mapred.mode=nonstrict;
+set hive.materializedview.rewriting.time.window=-1;
+
+create materialized view mv_customer_customer_address_n101 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk;
+
+explain cbo
+select ca_zip, ca_county, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and ws_item_sk = i_item_sk
+ and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475',
'85392', '85460', '80348', '81792')
+ or
+ i_item_id in (select i_item_id
+ from item
+ where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19,
23, 29)
+ )
+ )
+ and ws_sold_date_sk = d_date_sk
+ and d_qoy = 2 and d_year = 2000
+ group by ca_zip, ca_county
+ order by ca_zip, ca_county
+ limit 100;
+
+drop materialized view mv_customer_customer_address_n101;
diff --git a/ql/src/test/queries/clientpositive/perf/mv_query68.q
b/ql/src/test/queries/clientpositive/perf/mv_query68.q
new file mode 100644
index 0000000..80cf910
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/mv_query68.q
@@ -0,0 +1,49 @@
+set hive.mapred.mode=nonstrict;
+set hive.materializedview.rewriting.time.window=-1;
+
+create materialized view mv_customer_customer_address_n102 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk;
+
+explain cbo
+select c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ ,extended_price
+ ,extended_tax
+ ,list_price
+ from (select ss_ticket_number
+ ,ss_customer_sk
+ ,ca_city bought_city
+ ,sum(ss_ext_sales_price) extended_price
+ ,sum(ss_ext_list_price) list_price
+ ,sum(ss_ext_tax) extended_tax
+ from store_sales
+ ,date_dim
+ ,store
+ ,household_demographics
+ ,customer_address
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and store_sales.ss_addr_sk = customer_address.ca_address_sk
+ and date_dim.d_dom between 1 and 2
+ and (household_demographics.hd_dep_count = 2 or
+ household_demographics.hd_vehicle_count= 1)
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_city in ('Cedar Grove','Wildwood')
+ group by ss_ticket_number
+ ,ss_customer_sk
+ ,ss_addr_sk,ca_city) dn
+ ,customer
+ ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+ and customer.c_current_addr_sk = current_addr.ca_address_sk
+ and current_addr.ca_city <> bought_city
+ order by c_last_name
+ ,ss_ticket_number
+ limit 100;
+
+drop materialized view mv_customer_customer_address_n102;
diff --git
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out
index d259074..3318019 100644
---
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out
+++
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out
@@ -1188,7 +1188,7 @@ insert_num part c1 c2 c3 c4
c5 c6 b
6 1 hollow innocent crabs blushing
ambition ebony liquor age NULL 3841833197.314137090000000 new
7 1 wig feel social fork drum search
bump conclusion 8.31 NULL new
8 1 bubble f drain loyal station racket
antique bu 0.69 22.832613278700000 new
-9 1 12345678 1234567890123456789012345
123456789012345 1234567890 NULL 1234567890.543210987700000 new
+9 1 12345678 12345678901234567890123456789012
123456789012345 1234567890 NULL 1234567890.543210987700000 new
PREHOOK: query: drop table part_change_same_type_different_params_n8
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@part_change_same_type_different_params_n8
diff --git
a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query30.q.out
b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query30.q.out
new file mode 100644
index 0000000..19019aa
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query30.q.out
@@ -0,0 +1,136 @@
+PREHOOK: query: create materialized view mv_customer_customer_address_n100 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_customer_customer_address_n100
+POSTHOOK: query: create materialized view mv_customer_customer_address_n100 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_customer_customer_address_n100
+PREHOOK: query: explain cbo
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+ ,ca_state as ctr_state,
+ sum(wr_return_amt) as ctr_total_return
+ from web_returns
+ ,date_dim
+ ,customer_address
+ where wr_returned_date_sk = d_date_sk
+ and d_year =2002
+ and wr_returning_addr_sk = ca_address_sk
+ group by wr_returning_customer_sk
+ ,ca_state)
+ select
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2
+ where ctr1.ctr_state = ctr2.ctr_state)
+ and ca_address_sk = c_current_addr_sk
+ and ca_state = 'IL'
+ and ctr1.ctr_customer_sk = c_customer_sk
+ order by
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date,ctr_total_return
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@mv_customer_customer_address_n100
+PREHOOK: Input: default@web_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+ ,ca_state as ctr_state,
+ sum(wr_return_amt) as ctr_total_return
+ from web_returns
+ ,date_dim
+ ,customer_address
+ where wr_returned_date_sk = d_date_sk
+ and d_year =2002
+ and wr_returning_addr_sk = ca_address_sk
+ group by wr_returning_customer_sk
+ ,ca_state)
+ select
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2
+ where ctr1.ctr_state = ctr2.ctr_state)
+ and ca_address_sk = c_current_addr_sk
+ and ca_state = 'IL'
+ and ctr1.ctr_customer_sk = c_customer_sk
+ order by
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+ ,c_last_review_date,ctr_total_return
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@mv_customer_customer_address_n100
+POSTHOOK: Input: default@web_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4],
sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], sort10=[$10],
sort11=[$11], sort12=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC],
dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC],
dir10=[ASC], dir11=[ASC], dir12=[ASC], fetch=[100])
+ HiveProject(c_customer_id=[$1], c_salutation=[$2], c_first_name=[$3],
c_last_name=[$4], c_preferred_cust_flag=[$5], c_birth_day=[$6],
c_birth_month=[$7], c_birth_year=[$8], c_birth_country=[$9], c_login=[$10],
c_email_address=[$11], c_last_review_date=[$12], $f2=[$15])
+ HiveJoin(condition=[=($13, $0)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(c_customer_sk=[CAST($0):INTEGER NOT NULL],
c_customer_id=[CAST($1):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" NOT NULL],
c_salutation=[$7], c_first_name=[$8], c_last_name=[$9],
c_preferred_cust_flag=[$10], c_birth_day=[$11], c_birth_month=[$12],
c_birth_year=[$13], c_birth_country=[$14], c_login=[$15],
c_email_address=[$16], c_last_review_date=[$17])
+ HiveFilter(condition=[=($26, _UTF-16LE'IL')])
+ HiveTableScan(table=[[default, mv_customer_customer_address_n100]],
table:alias=[default.mv_customer_customer_address_n100])
+ HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+ HiveFilter(condition=[IS NOT NULL($2)])
+ HiveProject(wr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2])
+ HiveAggregate(group=[{1, 3}], agg#0=[sum($5)])
+ HiveJoin(condition=[=($4, $0)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject(ca_address_sk=[$0], ca_state=[$8])
+ HiveFilter(condition=[IS NOT NULL($8)])
+ HiveTableScan(table=[[default, customer_address]],
table:alias=[customer_address])
+ HiveJoin(condition=[=($0, $4)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject(wr_returned_date_sk=[$0],
wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15])
+ HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT
NULL($0), IS NOT NULL($10))])
+ HiveTableScan(table=[[default, web_returns]],
table:alias=[web_returns])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[=($6, 2002)])
+ HiveTableScan(table=[[default, date_dim]],
table:alias=[date_dim])
+ HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2,
1))], ctr_state=[$0])
+ HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))])
+ HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)])
+ HiveProject(ca_state=[$0], wr_returning_customer_sk=[$1],
$f2=[$2])
+ HiveAggregate(group=[{1, 3}], agg#0=[sum($5)])
+ HiveJoin(condition=[=($4, $0)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject(ca_address_sk=[$0], ca_state=[$8])
+ HiveFilter(condition=[IS NOT NULL($8)])
+ HiveTableScan(table=[[default, customer_address]],
table:alias=[customer_address])
+ HiveJoin(condition=[=($0, $4)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject(wr_returned_date_sk=[$0],
wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT
NULL($10))])
+ HiveTableScan(table=[[default, web_returns]],
table:alias=[web_returns])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[=($6, 2002)])
+ HiveTableScan(table=[[default, date_dim]],
table:alias=[date_dim])
+
+PREHOOK: query: drop materialized view mv_customer_customer_address_n100
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mv_customer_customer_address_n100
+PREHOOK: Output: default@mv_customer_customer_address_n100
+POSTHOOK: query: drop materialized view mv_customer_customer_address_n100
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mv_customer_customer_address_n100
+POSTHOOK: Output: default@mv_customer_customer_address_n100
diff --git
a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query45.q.out
b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query45.q.out
new file mode 100644
index 0000000..1b066b4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query45.q.out
@@ -0,0 +1,108 @@
+PREHOOK: query: create materialized view mv_customer_customer_address_n101 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_customer_customer_address_n101
+POSTHOOK: query: create materialized view mv_customer_customer_address_n101 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_customer_customer_address_n101
+Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_0, $hdt$_1]] in Stage
'Reducer 2' is a cross product
+PREHOOK: query: explain cbo
+select ca_zip, ca_county, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and ws_item_sk = i_item_sk
+ and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475',
'85392', '85460', '80348', '81792')
+ or
+ i_item_id in (select i_item_id
+ from item
+ where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19,
23, 29)
+ )
+ )
+ and ws_sold_date_sk = d_date_sk
+ and d_qoy = 2 and d_year = 2000
+ group by ca_zip, ca_county
+ order by ca_zip, ca_county
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@mv_customer_customer_address_n101
+PREHOOK: Input: default@web_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select ca_zip, ca_county, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+ and c_current_addr_sk = ca_address_sk
+ and ws_item_sk = i_item_sk
+ and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475',
'85392', '85460', '80348', '81792')
+ or
+ i_item_id in (select i_item_id
+ from item
+ where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19,
23, 29)
+ )
+ )
+ and ws_sold_date_sk = d_date_sk
+ and d_qoy = 2 and d_year = 2000
+ group by ca_zip, ca_county
+ order by ca_zip, ca_county
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@mv_customer_customer_address_n101
+POSTHOOK: Input: default@web_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
+ HiveProject(ca_zip=[$1], ca_county=[$0], $f2=[$2])
+ HiveAggregate(group=[{3, 4}], agg#0=[sum($2)])
+ HiveFilter(condition=[OR(AND(<>($8, 0), IS NOT NULL($10)), IN(substr($4,
1, 5), _UTF-16LE'85669':VARCHAR(2147483647) CHARACTER SET "UTF-16LE",
_UTF-16LE'86197':VARCHAR(2147483647) CHARACTER SET "UTF-16LE",
_UTF-16LE'88274':VARCHAR(2147483647) CHARACTER SET "UTF-16LE",
_UTF-16LE'83405':VARCHAR(2147483647) CHARACTER SET "UTF-16LE",
_UTF-16LE'86475':VARCHAR(2147483647) CHARACTER SET "UTF-16LE",
_UTF-16LE'85392':VARCHAR(2147483647) CHARACTER SET "UTF-16LE",
_UTF-16LE'85460':VARCHAR(214 [...]
+ HiveProject(ws_sold_date_sk=[$6], ws_item_sk=[$7],
ws_sales_price=[$9], ca_county=[$11], ca_zip=[$12], d_date_sk=[$5],
i_item_sk=[$0], i_item_id=[$1], c=[$2], i_item_id0=[$3], literalTrue=[$4])
+ HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveJoin(condition=[=($1, $3)], joinType=[left], algorithm=[none],
cost=[not available])
+ HiveJoin(condition=[true], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(i_item_sk=[$0], i_item_id=[$1])
+ HiveTableScan(table=[[default, item]], table:alias=[item])
+ HiveProject(c=[$0])
+ HiveAggregate(group=[{}], c=[COUNT()])
+ HiveFilter(condition=[IN($0, 2, 3, 5, 7, 11, 13, 17, 19,
23, 29)])
+ HiveTableScan(table=[[default, item]],
table:alias=[item])
+ HiveProject(i_item_id=[$0], literalTrue=[true])
+ HiveAggregate(group=[{1}])
+ HiveFilter(condition=[IN($0, 2, 3, 5, 7, 11, 13, 17, 19, 23,
29)])
+ HiveTableScan(table=[[default, item]], table:alias=[item])
+ HiveProject(d_date_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2],
ws_bill_customer_sk=[$3], ws_sales_price=[$4], c_customer_sk=[$5],
ca_county=[$6], ca_zip=[$7])
+ HiveJoin(condition=[=($1, $0)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[AND(=($6, 2000), =($10, 2))])
+ HiveTableScan(table=[[default, date_dim]],
table:alias=[date_dim])
+ HiveJoin(condition=[=($2, $4)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3],
ws_bill_customer_sk=[$4], ws_sales_price=[$21])
+ HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT
NULL($0))])
+ HiveTableScan(table=[[default, web_sales]],
table:alias=[web_sales])
+ HiveProject(c_customer_sk=[CAST($0):INTEGER NOT NULL],
ca_county=[$25], ca_zip=[$27])
+ HiveTableScan(table=[[default,
mv_customer_customer_address_n101]],
table:alias=[default.mv_customer_customer_address_n101])
+
+PREHOOK: query: drop materialized view mv_customer_customer_address_n101
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mv_customer_customer_address_n101
+PREHOOK: Output: default@mv_customer_customer_address_n101
+POSTHOOK: query: drop materialized view mv_customer_customer_address_n101
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mv_customer_customer_address_n101
+POSTHOOK: Output: default@mv_customer_customer_address_n101
diff --git
a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query68.q.out
b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query68.q.out
new file mode 100644
index 0000000..3069dfe
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query68.q.out
@@ -0,0 +1,149 @@
+PREHOOK: query: create materialized view mv_customer_customer_address_n102 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_customer_customer_address_n102
+POSTHOOK: query: create materialized view mv_customer_customer_address_n102 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_customer_customer_address_n102
+PREHOOK: query: explain cbo
+select c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ ,extended_price
+ ,extended_tax
+ ,list_price
+ from (select ss_ticket_number
+ ,ss_customer_sk
+ ,ca_city bought_city
+ ,sum(ss_ext_sales_price) extended_price
+ ,sum(ss_ext_list_price) list_price
+ ,sum(ss_ext_tax) extended_tax
+ from store_sales
+ ,date_dim
+ ,store
+ ,household_demographics
+ ,customer_address
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and store_sales.ss_addr_sk = customer_address.ca_address_sk
+ and date_dim.d_dom between 1 and 2
+ and (household_demographics.hd_dep_count = 2 or
+ household_demographics.hd_vehicle_count= 1)
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_city in ('Cedar Grove','Wildwood')
+ group by ss_ticket_number
+ ,ss_customer_sk
+ ,ss_addr_sk,ca_city) dn
+ ,customer
+ ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+ and customer.c_current_addr_sk = current_addr.ca_address_sk
+ and current_addr.ca_city <> bought_city
+ order by c_last_name
+ ,ss_ticket_number
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@household_demographics
+PREHOOK: Input: default@mv_customer_customer_address_n102
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select c_last_name
+ ,c_first_name
+ ,ca_city
+ ,bought_city
+ ,ss_ticket_number
+ ,extended_price
+ ,extended_tax
+ ,list_price
+ from (select ss_ticket_number
+ ,ss_customer_sk
+ ,ca_city bought_city
+ ,sum(ss_ext_sales_price) extended_price
+ ,sum(ss_ext_list_price) list_price
+ ,sum(ss_ext_tax) extended_tax
+ from store_sales
+ ,date_dim
+ ,store
+ ,household_demographics
+ ,customer_address
+ where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk
+ and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+ and store_sales.ss_addr_sk = customer_address.ca_address_sk
+ and date_dim.d_dom between 1 and 2
+ and (household_demographics.hd_dep_count = 2 or
+ household_demographics.hd_vehicle_count= 1)
+ and date_dim.d_year in (1998,1998+1,1998+2)
+ and store.s_city in ('Cedar Grove','Wildwood')
+ group by ss_ticket_number
+ ,ss_customer_sk
+ ,ss_addr_sk,ca_city) dn
+ ,customer
+ ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+ and customer.c_current_addr_sk = current_addr.ca_address_sk
+ and current_addr.ca_city <> bought_city
+ order by c_last_name
+ ,ss_ticket_number
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@household_demographics
+POSTHOOK: Input: default@mv_customer_customer_address_n102
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], sort1=[$4], dir0=[ASC], dir1=[ASC], fetch=[100])
+ HiveProject(c_last_name=[$8], c_first_name=[$7], ca_city=[$9],
bought_city=[$2], ss_ticket_number=[$0], extended_price=[$3],
extended_tax=[$5], list_price=[$4])
+ HiveJoin(condition=[AND(=($1, $6), <>($2, $9))], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject(ss_ticket_number=[$3], ss_customer_sk=[$1],
bought_city=[$0], extended_price=[$4], list_price=[$5], extended_tax=[$6])
+ HiveAggregate(group=[{1, 3, 5, 7}], agg#0=[sum($8)], agg#1=[sum($9)],
agg#2=[sum($10)])
+ HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(ca_address_sk=[$0], ca_city=[$6])
+ HiveTableScan(table=[[default, customer_address]],
table:alias=[customer_address])
+ HiveJoin(condition=[=($2, $11)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($4, $10)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($0, $9)], joinType=[inner],
algorithm=[none], cost=[not available])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3],
ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9],
ss_ext_sales_price=[$15], ss_ext_list_price=[$17], ss_ext_tax=[$18])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT
NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))])
+ HiveTableScan(table=[[default, store_sales]],
table:alias=[store_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[AND(BETWEEN(false, $9, 1, 2), IN($6,
1998, 1999, 2000))])
+ HiveTableScan(table=[[default, date_dim]],
table:alias=[date_dim])
+ HiveProject(s_store_sk=[$0])
+ HiveFilter(condition=[IN($22, _UTF-16LE'Cedar
Grove':VARCHAR(2147483647) CHARACTER SET "UTF-16LE",
_UTF-16LE'Wildwood':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")])
+ HiveTableScan(table=[[default, store]],
table:alias=[store])
+ HiveProject(hd_demo_sk=[$0])
+ HiveFilter(condition=[OR(=($4, 1), =($3, 2))])
+ HiveTableScan(table=[[default, household_demographics]],
table:alias=[household_demographics])
+ HiveProject(c_customer_sk=[CAST($0):INTEGER NOT NULL],
c_first_name=[$8], c_last_name=[$9], ca_city=[$24])
+ HiveTableScan(table=[[default, mv_customer_customer_address_n102]],
table:alias=[default.mv_customer_customer_address_n102])
+
+PREHOOK: query: drop materialized view mv_customer_customer_address_n102
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mv_customer_customer_address_n102
+PREHOOK: Output: default@mv_customer_customer_address_n102
+POSTHOOK: query: drop materialized view mv_customer_customer_address_n102
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mv_customer_customer_address_n102
+POSTHOOK: Output: default@mv_customer_customer_address_n102