This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 6f3826d  HIVE-23878: Aggregate after join throws off MV rewrite (Jesus 
Camacho Rodriguez, reviewed by Vineet Garg)
6f3826d is described below

commit 6f3826d73f6462e33cf38656d68a0e6230565c05
Author: Jesús Camacho Rodríguez <[email protected]>
AuthorDate: Fri Jul 24 17:28:48 2020 -0700

    HIVE-23878: Aggregate after join throws off MV rewrite (Jesus Camacho 
Rodriguez, reviewed by Vineet Garg)
    
    Closes apache/hive#1300
---
 .../test/resources/testconfiguration.properties    |   8 +-
 .../calcite/HiveDefaultRelMetadataProvider.java    |   2 -
 .../ql/optimizer/calcite/HiveRelShuttleImpl.java   |  10 +-
 ...r.java => HiveTezModelRelMetadataProvider.java} |  29 +--
 .../optimizer/calcite/cost/HiveVolcanoPlanner.java |   9 +-
 .../rules/HiveCardinalityPreservingJoinRule.java   |   8 +-
 .../HiveMaterializationRelMetadataProvider.java    | 120 ++++++++++++
 .../rules/views/HiveMaterializedViewBoxing.java    | 213 +++++++++++++++++++++
 .../rules/views/HiveMaterializedViewRule.java      |   4 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  15 +-
 .../test/queries/clientpositive/perf/mv_query30.q  |  38 ++++
 .../test/queries/clientpositive/perf/mv_query45.q  |  27 +++
 .../test/queries/clientpositive/perf/mv_query68.q  |  49 +++++
 .../schema_evol_text_vec_part_all_primitive.q.out  |   2 +-
 .../perf/tez/constraints/mv_query30.q.out          | 136 +++++++++++++
 .../perf/tez/constraints/mv_query45.q.out          | 108 +++++++++++
 .../perf/tez/constraints/mv_query68.q.out          | 149 ++++++++++++++
 17 files changed, 880 insertions(+), 47 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 74beeaa..e2cee9c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1081,8 +1081,11 @@ spark.only.query.negative.files=\
   spark_task_failure.q
 
 tez.perf.disabled.query.files=\
+  mv_query30.q,\
   mv_query44.q,\
-  mv_query67.q
+  mv_query45.q,\
+  mv_query67.q,\
+  mv_query68.q
 
 spark.perf.disabled.query.files=\
   cbo_ext_query1.q,\
@@ -1183,8 +1186,11 @@ spark.perf.disabled.query.files=\
   cbo_query97.q,\
   cbo_query98.q,\
   cbo_query99.q,\
+  mv_query30.q,\
   mv_query44.q,\
+  mv_query45.q,\
   mv_query67.q,\
+  mv_query68.q,\
   query14.q,\
   query1b.q,\
   query64.q
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
index c4b153e..4ef85c5 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -122,8 +122,6 @@ public class HiveDefaultRelMetadataProvider {
    * be visited during the planning phase.
    */
   public static void initializeMetadataProviderClass(List<Class<? extends 
RelNode>> nodeClasses) {
-    // This will register the classes in the default Janino implementation
-    JaninoRelMetadataProvider.DEFAULT.register(nodeClasses);
     // This will register the classes in the default Hive implementation
     DEFAULT.register(nodeClasses);
   }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
index 67f222b..13cd837 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.optimizer.calcite;
 
+import java.util.ArrayDeque;
+import java.util.Deque;
 import org.apache.calcite.linq4j.Ord;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.TableFunctionScan;
@@ -33,7 +35,6 @@ import org.apache.calcite.rel.logical.LogicalProject;
 import org.apache.calcite.rel.logical.LogicalSort;
 import org.apache.calcite.rel.logical.LogicalUnion;
 import org.apache.calcite.rel.logical.LogicalValues;
-import org.apache.calcite.util.Stacks;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
@@ -43,13 +44,13 @@ import java.util.ArrayList;
 import java.util.List;
 
 public class HiveRelShuttleImpl implements HiveRelShuttle {
-    protected final List<RelNode> stack = new ArrayList<RelNode>();
+    protected final Deque<RelNode> stack = new ArrayDeque<>();
 
     /**
      * Visits a particular child of a parent.
      */
     protected RelNode visitChild(RelNode parent, int i, RelNode child) {
-        Stacks.push(stack, parent);
+        stack.push(parent);
         try {
             RelNode child2 = child.accept(this);
             if (child2 != child) {
@@ -60,7 +61,7 @@ public class HiveRelShuttleImpl implements HiveRelShuttle {
             }
             return parent;
         } finally {
-            Stacks.pop(stack, parent);
+            stack.pop();
         }
     }
 
@@ -94,6 +95,7 @@ public class HiveRelShuttleImpl implements HiveRelShuttle {
     public RelNode visit(HiveFilter filter) {
         return visitChild(filter, 0, filter.getInput());
     }
+
     public RelNode visit(LogicalFilter filter) {
         return visitChild(filter, 0, filter.getInput());
     }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultTezModelRelMetadataProvider.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
similarity index 73%
rename from 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultTezModelRelMetadataProvider.java
rename to 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
index 39de521..cc65544 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultTezModelRelMetadataProvider.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
@@ -17,9 +17,6 @@
  */
 package org.apache.hadoop.hive.ql.optimizer.calcite;
 
-import java.util.List;
-
-import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
 import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -39,9 +36,9 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdUniqueKeys;
 
 import com.google.common.collect.ImmutableList;
 
-public class HiveDefaultTezModelRelMetadataProvider {
+public class HiveTezModelRelMetadataProvider {
 
-  private static final JaninoRelMetadataProvider DEFAULT_TEZ_COST_MODEL =
+  public static final JaninoRelMetadataProvider DEFAULT =
       JaninoRelMetadataProvider.of(
         ChainedRelMetadataProvider.of(
             ImmutableList.of(
@@ -59,26 +56,4 @@ public class HiveDefaultTezModelRelMetadataProvider {
                 HiveRelMdPredicates.SOURCE,
                 JaninoRelMetadataProvider.DEFAULT)));
 
-  private final JaninoRelMetadataProvider metadataProvider;
-
-
-  public HiveDefaultTezModelRelMetadataProvider() {
-    metadataProvider = DEFAULT_TEZ_COST_MODEL;
-  }
-
-  public JaninoRelMetadataProvider getMetadataProvider() {
-    return metadataProvider;
-  }
-
-  /**
-   * This method can be called at startup time to pre-register all the
-   * additional Hive classes (compared to Calcite core classes) that may
-   * be visited during the planning phase.
-   */
-  public static void initializeMetadataProviderClass(List<Class<? extends 
RelNode>> nodeClasses) {
-    // This will register the classes in the default Janino implementation
-    JaninoRelMetadataProvider.DEFAULT.register(nodeClasses);
-    // This will register the classes in the default Hive implementation
-    DEFAULT_TEZ_COST_MODEL.register(nodeClasses);
-  }
 }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
index 208fb15..a50dbb0 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
@@ -123,9 +123,12 @@ public class HiveVolcanoPlanner extends VolcanoPlanner {
     Multimap<Class<? extends RelNode>, RelNode> nodeTypes =
         mq.getNodeTypes(rel);
     for (RelNode scan : nodeTypes.get(TableScan.class)) {
-      if (((RelOptHiveTable) 
scan.getTable()).getHiveTableMD().isMaterializedView()) {
-        usesMaterializedViews = true;
-        break;
+      if (scan.getTable() instanceof RelOptHiveTable) {
+        RelOptHiveTable relOptHiveTable = (RelOptHiveTable) scan.getTable();
+        if (relOptHiveTable.getHiveTableMD().isMaterializedView()) {
+          usesMaterializedViews = true;
+          break;
+        }
       }
     }
     if (isHeuristic && usesMaterializedViews) {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java
index 2828bc1..c49270a 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java
@@ -23,7 +23,7 @@ import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
-import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultTezModelRelMetadataProvider;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -49,7 +49,8 @@ public class HiveCardinalityPreservingJoinRule extends 
HiveFieldTrimmerRule {
 
     JaninoRelMetadataProvider original = 
RelMetadataQuery.THREAD_PROVIDERS.get();
     try {
-      RelMetadataQuery.THREAD_PROVIDERS.set(getJaninoRelMetadataProvider());
+      RelMetadataQuery.THREAD_PROVIDERS.set(
+          HiveTezModelRelMetadataProvider.DEFAULT);
       RelMetadataQuery metadataQuery = RelMetadataQuery.instance();
 
       RelOptCost optimizedCost = metadataQuery.getCumulativeCost(optimized);
@@ -70,7 +71,4 @@ public class HiveCardinalityPreservingJoinRule extends 
HiveFieldTrimmerRule {
     }
   }
 
-  private JaninoRelMetadataProvider getJaninoRelMetadataProvider() {
-    return new HiveDefaultTezModelRelMetadataProvider().getMetadataProvider();
-  }
 }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializationRelMetadataProvider.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializationRelMetadataProvider.java
new file mode 100644
index 0000000..084df0a
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializationRelMetadataProvider.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules.views;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Multimap;
+import java.util.Set;
+import org.apache.calcite.plan.RelOptPredicateList;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdAllPredicates;
+import org.apache.calcite.rel.metadata.RelMdExpressionLineage;
+import org.apache.calcite.rel.metadata.RelMdNodeTypes;
+import org.apache.calcite.rel.metadata.RelMdTableReferences;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexTableInputRef.RelTableRef;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Util;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewBoxing.Box;
+
+/**
+ * Metadata provider implementation that is only used for MV rewriting.
+ */
+public class HiveMaterializationRelMetadataProvider {
+
+  private static final RelMetadataProvider SOURCE_NODE_TYPES =
+      ReflectiveRelMetadataProvider.reflectiveSource(
+          BuiltInMethod.NODE_TYPES.method, new HiveRelMdNodeTypes());
+
+  private static final RelMetadataProvider SOURCE_EXPRESSION_LINEAGE =
+      ReflectiveRelMetadataProvider.reflectiveSource(
+          BuiltInMethod.EXPRESSION_LINEAGE.method, new 
HiveRelMdExpressionLineage());
+
+  private static final RelMetadataProvider SOURCE_ALL_PREDICATES =
+      ReflectiveRelMetadataProvider.reflectiveSource(
+          BuiltInMethod.ALL_PREDICATES.method, new HiveRelMdAllPredicates());
+
+  private static final RelMetadataProvider SOURCE_TABLE_REFERENCES =
+      ReflectiveRelMetadataProvider.reflectiveSource(
+          BuiltInMethod.TABLE_REFERENCES.method, new 
HiveRelMdTableReferences());
+
+  public static final JaninoRelMetadataProvider DEFAULT =
+      JaninoRelMetadataProvider.of(
+          ChainedRelMetadataProvider.of(
+              ImmutableList.of(
+                  SOURCE_NODE_TYPES,
+                  SOURCE_EXPRESSION_LINEAGE,
+                  SOURCE_ALL_PREDICATES,
+                  SOURCE_TABLE_REFERENCES,
+                  JaninoRelMetadataProvider.DEFAULT)));
+
+
+  private static class HiveRelMdNodeTypes extends RelMdNodeTypes {
+    public Multimap<Class<? extends RelNode>, RelNode> getNodeTypes(RelSubset 
rel,
+        RelMetadataQuery mq) {
+      for (RelNode node : rel.getRelList()) {
+        if (node instanceof Box) {
+          return mq.getNodeTypes(node);
+        }
+      }
+      return mq.getNodeTypes(Util.first(rel.getBest(), rel.getOriginal()));
+    }
+  }
+
+  private static class HiveRelMdExpressionLineage extends 
RelMdExpressionLineage {
+    public Set<RexNode> getExpressionLineage(RelSubset rel,
+        RelMetadataQuery mq, RexNode outputExpression) {
+      for (RelNode node : rel.getRelList()) {
+        if (node instanceof Box) {
+          return mq.getExpressionLineage(node, outputExpression);
+        }
+      }
+      return mq.getExpressionLineage(Util.first(rel.getBest(), 
rel.getOriginal()),
+          outputExpression);
+    }
+  }
+
+  private static class HiveRelMdAllPredicates extends RelMdAllPredicates {
+    public RelOptPredicateList getAllPredicates(RelSubset rel,
+        RelMetadataQuery mq) {
+      for (RelNode node : rel.getRelList()) {
+        if (node instanceof Box) {
+          return mq.getAllPredicates(node);
+        }
+      }
+      return mq.getAllPredicates(Util.first(rel.getBest(), rel.getOriginal()));
+    }
+  }
+
+  private static class HiveRelMdTableReferences extends RelMdTableReferences {
+    public Set<RelTableRef> getTableReferences(RelSubset rel, RelMetadataQuery 
mq) {
+      for (RelNode node : rel.getRelList()) {
+        if (node instanceof Box) {
+          return mq.getTableReferences(node);
+        }
+      }
+      return mq.getTableReferences(Util.first(rel.getBest(), 
rel.getOriginal()));
+    }
+  }
+}
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewBoxing.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewBoxing.java
new file mode 100644
index 0000000..2b5809b
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewBoxing.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules.views;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Multimap;
+import java.util.Collection;
+import java.util.Map.Entry;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.calcite.plan.RelOptAbstractTable;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptSchema;
+import org.apache.calcite.plan.RelOptTable;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttleImpl;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
+
+/**
+ * This class contains logic that is useful to trigger additional materialized
+ * view rewritings.
+ *
+ * In particular, in Hive we do not combine the MV rewriting rules with other
+ * rules that may generate additional rewritings in the planner, e.g., join
+ * reordering rules. In fact, the rewriting has some built-in logic to add
+ * compensation joins on top of the MV scan which make adding these rules
+ * unnecessary. This leads to faster planning, however it can also lead to some
+ * missing rewriting opportunities if we are not careful. For instance, the
+ * rewriting algorithm will bail out if an input of a join contains operators
+ * that are not supported. Consider the following example where Union is not
+ * supported by the rewriting algorithm:
+ * MV:
+ *      Join
+ *     /    \
+ *    A      B
+ *
+ * Query:
+ *         Join
+ *        /    \
+ *     Join     B
+ *    /    \
+ * Union    A
+ *   |
+ *   S
+ *
+ * The rewriting can only be triggered at the root of the plan since that is
+ * the operator where A and B are visible. However, after checking the
+ * operators in the plan, the rewriting bails out since Union is not supported.
+ *
+ * This class contains boxing/unboxing logic that aims at fixing this. The
+ * boxing logic will do a traversal of the query plan and introduce Box
+ * operators when it detects an unsupported operator. For the former query,
+ * this will be the rewritten plan:
+ * Query:
+ *         Join
+ *        /    \
+ *     Join     B
+ *    /    \
+ * Box$0    A
+ *
+ * Box extends TableScan, and thus, MV rewriting will proceed as expected. In
+ * addition, the Box node keeps a internal pointer to the former subplan that
+ * it replaced. During MV rewriting, we include the unboxing rule in the
+ * planner, which will transform the Box node into the original subplan. The
+ * Box node has an infinite cost: Though it helps the rewriting to be
+ * triggered, it will never be part of the final plan, i.e., the original
+ * subplan (possibly with other MV rewritings) will be chosen.
+ */
+public class HiveMaterializedViewBoxing {
+
+  /**
+   * Create Box operators in plan where necessary.
+   */
+  public static RelNode boxPlan(RelNode plan) {
+    return plan.accept(new BoxingRelShuttle());
+  }
+
+  /**
+   * Rule that replaces Box operators by the plan they hold.
+   */
+  public static final HiveMaterializedViewUnboxingRule INSTANCE_UNBOXING =
+      new HiveMaterializedViewUnboxingRule();
+
+  /* Counter for unique identifiers for Box operator. */
+  private static final AtomicInteger UNIQUE_IDENTIFIER = new AtomicInteger(0);
+
+
+  private static final class BoxingRelShuttle extends HiveRelShuttleImpl {
+
+    @Override
+    public RelNode visit(HiveJoin join) {
+      HiveJoin newJoin = (HiveJoin) visitChildren(join);
+      if (newJoin.getJoinType() != JoinRelType.INNER && !newJoin.isSemiJoin()) 
{
+        // Nothing to do
+        return newJoin;
+      }
+      RelMetadataQuery mq = newJoin.getCluster().getMetadataQuery();
+      boolean leftValid = isValidRelNodePlan(newJoin.getLeft(), mq);
+      boolean rightValid = isValidRelNodePlan(newJoin.getRight(), mq);
+      if (leftValid == rightValid) {
+        // Both are valid or invalid, nothing to do here
+        return newJoin;
+      }
+      RelNode leftInput = newJoin.getLeft();
+      RelNode rightInput = newJoin.getRight();
+      if (!leftValid) {
+        leftInput = createBoxOperator(newJoin.getCluster(), leftInput);
+      } else {
+        rightInput = createBoxOperator(newJoin.getCluster(), rightInput);
+      }
+      return newJoin.copy(
+          newJoin.getTraitSet(), ImmutableList.of(leftInput, rightInput));
+    }
+
+    private static RelNode createBoxOperator(RelOptCluster cluster, RelNode 
eqRelNode) {
+      String name = ".$box" + UNIQUE_IDENTIFIER.getAndIncrement();
+      return new Box(cluster, name, eqRelNode);
+    }
+
+    private static boolean isValidRelNodePlan(RelNode rel, RelMetadataQuery 
mq) {
+      final Multimap<Class<? extends RelNode>, RelNode> m =
+          mq.getNodeTypes(rel);
+      for (Entry<Class<? extends RelNode>, Collection<RelNode>> e : 
m.asMap().entrySet()) {
+        Class<? extends RelNode> c = e.getKey();
+        if (!TableScan.class.isAssignableFrom(c)
+            && !Project.class.isAssignableFrom(c)
+            && !Filter.class.isAssignableFrom(c)
+            && !Join.class.isAssignableFrom(c)) {
+          // Skip it
+          return false;
+        }
+        if (Join.class.isAssignableFrom(c)) {
+          for (RelNode n : e.getValue()) {
+            final Join join = (Join) n;
+            if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) 
{
+              // Skip it
+              return false;
+            }
+          }
+        }
+      }
+      return true;
+    }
+
+  }
+
+  protected static final class Box extends TableScan implements HiveRelNode {
+    private final RelNode eqRelNode;
+
+    private Box(RelOptCluster cluster, String name, RelNode eqRelNode) {
+      super(cluster, TraitsUtil.getDefaultTraitSet(cluster),
+          new BoxRelOptTable(null, name, eqRelNode.getRowType()));
+      this.eqRelNode = eqRelNode;
+    }
+
+    @Override
+    public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery 
mq) {
+      return planner.getCostFactory().makeInfiniteCost();
+    }
+
+    @Override
+    public void implement(Implementor implementor) {
+    }
+
+    private static final class BoxRelOptTable extends RelOptAbstractTable {
+      private BoxRelOptTable(RelOptSchema schema, String name, RelDataType 
rowType) {
+        super(schema, name, rowType);
+      }
+    }
+  }
+
+  private static final class HiveMaterializedViewUnboxingRule extends 
RelOptRule {
+
+    private HiveMaterializedViewUnboxingRule() {
+      super(operand(Box.class, any()),
+          HiveRelFactories.HIVE_BUILDER, "HiveMaterializedViewUnboxingRule");
+    }
+
+    @Override
+    public void onMatch(RelOptRuleCall call) {
+      Box box = call.rel(0);
+      call.transformTo(box.eqRelNode);
+    }
+  }
+}
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java
index bfc157c..63d260b 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java
@@ -217,7 +217,7 @@ public class HiveMaterializedViewRule {
       List<RexNode> identityFields = relBuilder.fields(
           ImmutableBitSet.range(0, rel.getRowType().getFieldCount()).asList());
       RelNode newRel = relBuilder
-          .project(identityFields, ImmutableList.<String>of(), true)
+          .project(identityFields, ImmutableList.of(), true)
           .build();
       call.transformTo(fil.copy(fil.getTraitSet(), ImmutableList.of(newRel)));
     }
@@ -253,7 +253,7 @@ public class HiveMaterializedViewRule {
       relBuilder.push(join);
       List<RexNode> identityFields = relBuilder.fields(
           ImmutableBitSet.range(0, 
join.getRowType().getFieldCount()).asList());
-      relBuilder.project(identityFields, ImmutableList.<String>of(), true);
+      relBuilder.project(identityFields, ImmutableList.of(), true);
       call.transformTo(relBuilder.build());
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 088efd4..04da0a6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -164,6 +164,8 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializationRelMetadataProvider;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
@@ -264,6 +266,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCProjectPushDow
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCSortPushDownRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCUnionPushDownRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAggregateIncrementalRewritingRule;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewBoxing;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveNoAggregateIncrementalRewritingRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.MaterializedViewRewritingRelVisitor;
@@ -2251,10 +2254,13 @@ public class CalcitePlanner extends SemanticAnalyzer {
         // Optimize plan
         basePlan = executeProgram(basePlan, program.build(), mdProvider, 
executorProvider, materializations);
       } else {
+        // Pre-processing to being able to trigger additional rewritings
+        basePlan = HiveMaterializedViewBoxing.boxPlan(basePlan);
+
         // If this is not a rebuild, we use Volcano planner as the decision
         // on whether to use MVs or not and which MVs to use should be 
cost-based
         optCluster.invalidateMetadataQuery();
-        
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
+        
RelMetadataQuery.THREAD_PROVIDERS.set(HiveMaterializationRelMetadataProvider.DEFAULT);
 
         // Add materializations to planner
         for (RelOptMaterialization materialization : materializations) {
@@ -2266,6 +2272,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
         for (RelOptRule rule : 
HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES) {
           planner.addRule(rule);
         }
+        // Unboxing rule
+        planner.addRule(HiveMaterializedViewBoxing.INSTANCE_UNBOXING);
         // Partition pruner rule
         planner.addRule(HiveFilterProjectTSTransposeRule.INSTANCE);
         planner.addRule(new HivePartitionPruneRule(conf));
@@ -2332,7 +2340,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       if (mvRebuildMode == MaterializationRebuildMode.AGGREGATE_REBUILD) {
         // Make a cost-based decision factoring the configuration property
         optCluster.invalidateMetadataQuery();
-        
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
+        
RelMetadataQuery.THREAD_PROVIDERS.set(HiveMaterializationRelMetadataProvider.DEFAULT);
         RelMetadataQuery mq = RelMetadataQuery.instance();
         RelOptCost costOriginalPlan = 
mq.getCumulativeCost(calcitePreMVRewritingPlan);
         final double factorSelectivity = HiveConf.getFloatVar(
@@ -5668,7 +5676,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
    * and the field trimmer.
    */
   public static void warmup() {
+    JaninoRelMetadataProvider.DEFAULT.register(HIVE_REL_NODE_CLASSES);
     
HiveDefaultRelMetadataProvider.initializeMetadataProviderClass(HIVE_REL_NODE_CLASSES);
+    HiveTezModelRelMetadataProvider.DEFAULT.register(HIVE_REL_NODE_CLASSES);
+    
HiveMaterializationRelMetadataProvider.DEFAULT.register(HIVE_REL_NODE_CLASSES);
     HiveRelFieldTrimmer.initializeFieldTrimmerClass(HIVE_REL_NODE_CLASSES);
   }
 
diff --git a/ql/src/test/queries/clientpositive/perf/mv_query30.q 
b/ql/src/test/queries/clientpositive/perf/mv_query30.q
new file mode 100644
index 0000000..8c5f936
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/mv_query30.q
@@ -0,0 +1,38 @@
+set hive.mapred.mode=nonstrict;
+set hive.materializedview.rewriting.time.window=-1;
+
+create materialized view mv_customer_customer_address_n100 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk;
+
+explain cbo
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+        ,ca_state as ctr_state, 
+       sum(wr_return_amt) as ctr_total_return
+ from web_returns
+     ,date_dim
+     ,customer_address
+ where wr_returned_date_sk = d_date_sk 
+   and d_year =2002
+   and wr_returning_addr_sk = ca_address_sk 
+ group by wr_returning_customer_sk
+         ,ca_state)
+  select  
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+       
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+       ,c_last_review_date,ctr_total_return
+ from customer_total_return ctr1
+     ,customer_address
+     ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+                         from customer_total_return ctr2 
+                         where ctr1.ctr_state = ctr2.ctr_state)
+       and ca_address_sk = c_current_addr_sk
+       and ca_state = 'IL'
+       and ctr1.ctr_customer_sk = c_customer_sk
+ order by 
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+                  
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+                  ,c_last_review_date,ctr_total_return
+limit 100;
+
+drop materialized view mv_customer_customer_address_n100;
diff --git a/ql/src/test/queries/clientpositive/perf/mv_query45.q 
b/ql/src/test/queries/clientpositive/perf/mv_query45.q
new file mode 100644
index 0000000..338bd88
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/mv_query45.q
@@ -0,0 +1,27 @@
+set hive.mapred.mode=nonstrict;
+set hive.materializedview.rewriting.time.window=-1;
+
+create materialized view mv_customer_customer_address_n101 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk;
+
+explain cbo
+select  ca_zip, ca_county, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+       and c_current_addr_sk = ca_address_sk 
+       and ws_item_sk = i_item_sk 
+       and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 
'85392', '85460', '80348', '81792')
+             or 
+             i_item_id in (select i_item_id
+                             from item
+                             where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 
23, 29)
+                             )
+           )
+       and ws_sold_date_sk = d_date_sk
+       and d_qoy = 2 and d_year = 2000
+ group by ca_zip, ca_county
+ order by ca_zip, ca_county
+ limit 100;
+
+drop materialized view mv_customer_customer_address_n101;
diff --git a/ql/src/test/queries/clientpositive/perf/mv_query68.q 
b/ql/src/test/queries/clientpositive/perf/mv_query68.q
new file mode 100644
index 0000000..80cf910
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/mv_query68.q
@@ -0,0 +1,49 @@
+set hive.mapred.mode=nonstrict;
+set hive.materializedview.rewriting.time.window=-1;
+
+create materialized view mv_customer_customer_address_n102 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk;
+
+explain cbo
+select  c_last_name
+       ,c_first_name
+       ,ca_city
+       ,bought_city
+       ,ss_ticket_number
+       ,extended_price
+       ,extended_tax
+       ,list_price
+ from (select ss_ticket_number
+             ,ss_customer_sk
+             ,ca_city bought_city
+             ,sum(ss_ext_sales_price) extended_price 
+             ,sum(ss_ext_list_price) list_price
+             ,sum(ss_ext_tax) extended_tax 
+       from store_sales
+           ,date_dim
+           ,store
+           ,household_demographics
+           ,customer_address 
+       where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+         and store_sales.ss_store_sk = store.s_store_sk  
+        and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+        and store_sales.ss_addr_sk = customer_address.ca_address_sk
+        and date_dim.d_dom between 1 and 2 
+        and (household_demographics.hd_dep_count = 2 or
+             household_demographics.hd_vehicle_count= 1)
+        and date_dim.d_year in (1998,1998+1,1998+2)
+        and store.s_city in ('Cedar Grove','Wildwood')
+       group by ss_ticket_number
+               ,ss_customer_sk
+               ,ss_addr_sk,ca_city) dn
+      ,customer
+      ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+   and customer.c_current_addr_sk = current_addr.ca_address_sk
+   and current_addr.ca_city <> bought_city
+ order by c_last_name
+         ,ss_ticket_number
+ limit 100;
+
+drop materialized view mv_customer_customer_address_n102;
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out
 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out
index d259074..3318019 100644
--- 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_primitive.q.out
@@ -1188,7 +1188,7 @@ insert_num        part    c1      c2      c3      c4      
c5      c6      b
 6      1       hollow          innocent crabs blushing                 
ambition ebony  liquor age      NULL    3841833197.314137090000000      new
 7      1       wig feel        social fork drum                        search 
bump     conclusion      8.31    NULL    new
 8      1       bubble f        drain loyal station                     racket  
antique bu      0.69    22.832613278700000      new
-9      1       12345678        1234567890123456789012345               
123456789012345 1234567890      NULL    1234567890.543210987700000      new
+9      1       12345678        12345678901234567890123456789012        
123456789012345 1234567890      NULL    1234567890.543210987700000      new
 PREHOOK: query: drop table part_change_same_type_different_params_n8
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@part_change_same_type_different_params_n8
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query30.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query30.q.out
new file mode 100644
index 0000000..19019aa
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query30.q.out
@@ -0,0 +1,136 @@
+PREHOOK: query: create materialized view mv_customer_customer_address_n100 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_customer_customer_address_n100
+POSTHOOK: query: create materialized view mv_customer_customer_address_n100 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_customer_customer_address_n100
+PREHOOK: query: explain cbo
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+        ,ca_state as ctr_state, 
+       sum(wr_return_amt) as ctr_total_return
+ from web_returns
+     ,date_dim
+     ,customer_address
+ where wr_returned_date_sk = d_date_sk 
+   and d_year =2002
+   and wr_returning_addr_sk = ca_address_sk 
+ group by wr_returning_customer_sk
+         ,ca_state)
+  select  
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+       
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+       ,c_last_review_date,ctr_total_return
+ from customer_total_return ctr1
+     ,customer_address
+     ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+                         from customer_total_return ctr2 
+                         where ctr1.ctr_state = ctr2.ctr_state)
+       and ca_address_sk = c_current_addr_sk
+       and ca_state = 'IL'
+       and ctr1.ctr_customer_sk = c_customer_sk
+ order by 
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+                  
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+                  ,c_last_review_date,ctr_total_return
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@mv_customer_customer_address_n100
+PREHOOK: Input: default@web_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+        ,ca_state as ctr_state, 
+       sum(wr_return_amt) as ctr_total_return
+ from web_returns
+     ,date_dim
+     ,customer_address
+ where wr_returned_date_sk = d_date_sk 
+   and d_year =2002
+   and wr_returning_addr_sk = ca_address_sk 
+ group by wr_returning_customer_sk
+         ,ca_state)
+  select  
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+       
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+       ,c_last_review_date,ctr_total_return
+ from customer_total_return ctr1
+     ,customer_address
+     ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+                         from customer_total_return ctr2 
+                         where ctr1.ctr_state = ctr2.ctr_state)
+       and ca_address_sk = c_current_addr_sk
+       and ca_state = 'IL'
+       and ctr1.ctr_customer_sk = c_customer_sk
+ order by 
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+                  
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+                  ,c_last_review_date,ctr_total_return
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@mv_customer_customer_address_n100
+POSTHOOK: Input: default@web_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], 
sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], sort10=[$10], 
sort11=[$11], sort12=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], 
dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC], 
dir10=[ASC], dir11=[ASC], dir12=[ASC], fetch=[100])
+  HiveProject(c_customer_id=[$1], c_salutation=[$2], c_first_name=[$3], 
c_last_name=[$4], c_preferred_cust_flag=[$5], c_birth_day=[$6], 
c_birth_month=[$7], c_birth_year=[$8], c_birth_country=[$9], c_login=[$10], 
c_email_address=[$11], c_last_review_date=[$12], $f2=[$15])
+    HiveJoin(condition=[=($13, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
+      HiveProject(c_customer_sk=[CAST($0):INTEGER NOT NULL], 
c_customer_id=[CAST($1):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" NOT NULL], 
c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], 
c_preferred_cust_flag=[$10], c_birth_day=[$11], c_birth_month=[$12], 
c_birth_year=[$13], c_birth_country=[$14], c_login=[$15], 
c_email_address=[$16], c_last_review_date=[$17])
+        HiveFilter(condition=[=($26, _UTF-16LE'IL')])
+          HiveTableScan(table=[[default, mv_customer_customer_address_n100]], 
table:alias=[default.mv_customer_customer_address_n100])
+      HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], 
algorithm=[none], cost=[not available])
+        HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+          HiveFilter(condition=[IS NOT NULL($2)])
+            HiveProject(wr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2])
+              HiveAggregate(group=[{1, 3}], agg#0=[sum($5)])
+                HiveJoin(condition=[=($4, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
+                  HiveProject(ca_address_sk=[$0], ca_state=[$8])
+                    HiveFilter(condition=[IS NOT NULL($8)])
+                      HiveTableScan(table=[[default, customer_address]], 
table:alias=[customer_address])
+                  HiveJoin(condition=[=($0, $4)], joinType=[inner], 
algorithm=[none], cost=[not available])
+                    HiveProject(wr_returned_date_sk=[$0], 
wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15])
+                      HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT 
NULL($0), IS NOT NULL($10))])
+                        HiveTableScan(table=[[default, web_returns]], 
table:alias=[web_returns])
+                    HiveProject(d_date_sk=[$0])
+                      HiveFilter(condition=[=($6, 2002)])
+                        HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
+        HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 
1))], ctr_state=[$0])
+          HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))])
+            HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)])
+              HiveProject(ca_state=[$0], wr_returning_customer_sk=[$1], 
$f2=[$2])
+                HiveAggregate(group=[{1, 3}], agg#0=[sum($5)])
+                  HiveJoin(condition=[=($4, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
+                    HiveProject(ca_address_sk=[$0], ca_state=[$8])
+                      HiveFilter(condition=[IS NOT NULL($8)])
+                        HiveTableScan(table=[[default, customer_address]], 
table:alias=[customer_address])
+                    HiveJoin(condition=[=($0, $4)], joinType=[inner], 
algorithm=[none], cost=[not available])
+                      HiveProject(wr_returned_date_sk=[$0], 
wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15])
+                        HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT 
NULL($10))])
+                          HiveTableScan(table=[[default, web_returns]], 
table:alias=[web_returns])
+                      HiveProject(d_date_sk=[$0])
+                        HiveFilter(condition=[=($6, 2002)])
+                          HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
+
+PREHOOK: query: drop materialized view mv_customer_customer_address_n100
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mv_customer_customer_address_n100
+PREHOOK: Output: default@mv_customer_customer_address_n100
+POSTHOOK: query: drop materialized view mv_customer_customer_address_n100
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mv_customer_customer_address_n100
+POSTHOOK: Output: default@mv_customer_customer_address_n100
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query45.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query45.q.out
new file mode 100644
index 0000000..1b066b4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query45.q.out
@@ -0,0 +1,108 @@
+PREHOOK: query: create materialized view mv_customer_customer_address_n101 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_customer_customer_address_n101
+POSTHOOK: query: create materialized view mv_customer_customer_address_n101 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_customer_customer_address_n101
+Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+PREHOOK: query: explain cbo
+select  ca_zip, ca_county, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+       and c_current_addr_sk = ca_address_sk 
+       and ws_item_sk = i_item_sk 
+       and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 
'85392', '85460', '80348', '81792')
+             or 
+             i_item_id in (select i_item_id
+                             from item
+                             where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 
23, 29)
+                             )
+           )
+       and ws_sold_date_sk = d_date_sk
+       and d_qoy = 2 and d_year = 2000
+ group by ca_zip, ca_county
+ order by ca_zip, ca_county
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@mv_customer_customer_address_n101
+PREHOOK: Input: default@web_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select  ca_zip, ca_county, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+       and c_current_addr_sk = ca_address_sk 
+       and ws_item_sk = i_item_sk 
+       and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 
'85392', '85460', '80348', '81792')
+             or 
+             i_item_id in (select i_item_id
+                             from item
+                             where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 
23, 29)
+                             )
+           )
+       and ws_sold_date_sk = d_date_sk
+       and d_qoy = 2 and d_year = 2000
+ group by ca_zip, ca_county
+ order by ca_zip, ca_county
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@mv_customer_customer_address_n101
+POSTHOOK: Input: default@web_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
+  HiveProject(ca_zip=[$1], ca_county=[$0], $f2=[$2])
+    HiveAggregate(group=[{3, 4}], agg#0=[sum($2)])
+      HiveFilter(condition=[OR(AND(<>($8, 0), IS NOT NULL($10)), IN(substr($4, 
1, 5), _UTF-16LE'85669':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'86197':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'88274':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'83405':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'86475':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'85392':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'85460':VARCHAR(214 [...]
+        HiveProject(ws_sold_date_sk=[$6], ws_item_sk=[$7], 
ws_sales_price=[$9], ca_county=[$11], ca_zip=[$12], d_date_sk=[$5], 
i_item_sk=[$0], i_item_id=[$1], c=[$2], i_item_id0=[$3], literalTrue=[$4])
+          HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
+            HiveJoin(condition=[=($1, $3)], joinType=[left], algorithm=[none], 
cost=[not available])
+              HiveJoin(condition=[true], joinType=[inner], algorithm=[none], 
cost=[not available])
+                HiveProject(i_item_sk=[$0], i_item_id=[$1])
+                  HiveTableScan(table=[[default, item]], table:alias=[item])
+                HiveProject(c=[$0])
+                  HiveAggregate(group=[{}], c=[COUNT()])
+                    HiveFilter(condition=[IN($0, 2, 3, 5, 7, 11, 13, 17, 19, 
23, 29)])
+                      HiveTableScan(table=[[default, item]], 
table:alias=[item])
+              HiveProject(i_item_id=[$0], literalTrue=[true])
+                HiveAggregate(group=[{1}])
+                  HiveFilter(condition=[IN($0, 2, 3, 5, 7, 11, 13, 17, 19, 23, 
29)])
+                    HiveTableScan(table=[[default, item]], table:alias=[item])
+            HiveProject(d_date_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], 
ws_bill_customer_sk=[$3], ws_sales_price=[$4], c_customer_sk=[$5], 
ca_county=[$6], ca_zip=[$7])
+              HiveJoin(condition=[=($1, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
+                HiveProject(d_date_sk=[$0])
+                  HiveFilter(condition=[AND(=($6, 2000), =($10, 2))])
+                    HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
+                HiveJoin(condition=[=($2, $4)], joinType=[inner], 
algorithm=[none], cost=[not available])
+                  HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], 
ws_bill_customer_sk=[$4], ws_sales_price=[$21])
+                    HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT 
NULL($0))])
+                      HiveTableScan(table=[[default, web_sales]], 
table:alias=[web_sales])
+                  HiveProject(c_customer_sk=[CAST($0):INTEGER NOT NULL], 
ca_county=[$25], ca_zip=[$27])
+                    HiveTableScan(table=[[default, 
mv_customer_customer_address_n101]], 
table:alias=[default.mv_customer_customer_address_n101])
+
+PREHOOK: query: drop materialized view mv_customer_customer_address_n101
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mv_customer_customer_address_n101
+PREHOOK: Output: default@mv_customer_customer_address_n101
+POSTHOOK: query: drop materialized view mv_customer_customer_address_n101
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mv_customer_customer_address_n101
+POSTHOOK: Output: default@mv_customer_customer_address_n101
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query68.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query68.q.out
new file mode 100644
index 0000000..3069dfe
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query68.q.out
@@ -0,0 +1,149 @@
+PREHOOK: query: create materialized view mv_customer_customer_address_n102 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_customer_customer_address_n102
+POSTHOOK: query: create materialized view mv_customer_customer_address_n102 as
+select * from customer c, customer_address ca
+where c.c_current_addr_sk=ca.ca_address_sk
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_customer_customer_address_n102
+PREHOOK: query: explain cbo
+select  c_last_name
+       ,c_first_name
+       ,ca_city
+       ,bought_city
+       ,ss_ticket_number
+       ,extended_price
+       ,extended_tax
+       ,list_price
+ from (select ss_ticket_number
+             ,ss_customer_sk
+             ,ca_city bought_city
+             ,sum(ss_ext_sales_price) extended_price 
+             ,sum(ss_ext_list_price) list_price
+             ,sum(ss_ext_tax) extended_tax 
+       from store_sales
+           ,date_dim
+           ,store
+           ,household_demographics
+           ,customer_address 
+       where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+         and store_sales.ss_store_sk = store.s_store_sk  
+        and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+        and store_sales.ss_addr_sk = customer_address.ca_address_sk
+        and date_dim.d_dom between 1 and 2 
+        and (household_demographics.hd_dep_count = 2 or
+             household_demographics.hd_vehicle_count= 1)
+        and date_dim.d_year in (1998,1998+1,1998+2)
+        and store.s_city in ('Cedar Grove','Wildwood')
+       group by ss_ticket_number
+               ,ss_customer_sk
+               ,ss_addr_sk,ca_city) dn
+      ,customer
+      ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+   and customer.c_current_addr_sk = current_addr.ca_address_sk
+   and current_addr.ca_city <> bought_city
+ order by c_last_name
+         ,ss_ticket_number
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@household_demographics
+PREHOOK: Input: default@mv_customer_customer_address_n102
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select  c_last_name
+       ,c_first_name
+       ,ca_city
+       ,bought_city
+       ,ss_ticket_number
+       ,extended_price
+       ,extended_tax
+       ,list_price
+ from (select ss_ticket_number
+             ,ss_customer_sk
+             ,ca_city bought_city
+             ,sum(ss_ext_sales_price) extended_price 
+             ,sum(ss_ext_list_price) list_price
+             ,sum(ss_ext_tax) extended_tax 
+       from store_sales
+           ,date_dim
+           ,store
+           ,household_demographics
+           ,customer_address 
+       where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+         and store_sales.ss_store_sk = store.s_store_sk  
+        and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+        and store_sales.ss_addr_sk = customer_address.ca_address_sk
+        and date_dim.d_dom between 1 and 2 
+        and (household_demographics.hd_dep_count = 2 or
+             household_demographics.hd_vehicle_count= 1)
+        and date_dim.d_year in (1998,1998+1,1998+2)
+        and store.s_city in ('Cedar Grove','Wildwood')
+       group by ss_ticket_number
+               ,ss_customer_sk
+               ,ss_addr_sk,ca_city) dn
+      ,customer
+      ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+   and customer.c_current_addr_sk = current_addr.ca_address_sk
+   and current_addr.ca_city <> bought_city
+ order by c_last_name
+         ,ss_ticket_number
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@household_demographics
+POSTHOOK: Input: default@mv_customer_customer_address_n102
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], sort1=[$4], dir0=[ASC], dir1=[ASC], fetch=[100])
+  HiveProject(c_last_name=[$8], c_first_name=[$7], ca_city=[$9], 
bought_city=[$2], ss_ticket_number=[$0], extended_price=[$3], 
extended_tax=[$5], list_price=[$4])
+    HiveJoin(condition=[AND(=($1, $6), <>($2, $9))], joinType=[inner], 
algorithm=[none], cost=[not available])
+      HiveProject(ss_ticket_number=[$3], ss_customer_sk=[$1], 
bought_city=[$0], extended_price=[$4], list_price=[$5], extended_tax=[$6])
+        HiveAggregate(group=[{1, 3, 5, 7}], agg#0=[sum($8)], agg#1=[sum($9)], 
agg#2=[sum($10)])
+          HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
+            HiveProject(ca_address_sk=[$0], ca_city=[$6])
+              HiveTableScan(table=[[default, customer_address]], 
table:alias=[customer_address])
+            HiveJoin(condition=[=($2, $11)], joinType=[inner], 
algorithm=[none], cost=[not available])
+              HiveJoin(condition=[=($4, $10)], joinType=[inner], 
algorithm=[none], cost=[not available])
+                HiveJoin(condition=[=($0, $9)], joinType=[inner], 
algorithm=[none], cost=[not available])
+                  HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], 
ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], 
ss_ext_sales_price=[$15], ss_ext_list_price=[$17], ss_ext_tax=[$18])
+                    HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT 
NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))])
+                      HiveTableScan(table=[[default, store_sales]], 
table:alias=[store_sales])
+                  HiveProject(d_date_sk=[$0])
+                    HiveFilter(condition=[AND(BETWEEN(false, $9, 1, 2), IN($6, 
1998, 1999, 2000))])
+                      HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
+                HiveProject(s_store_sk=[$0])
+                  HiveFilter(condition=[IN($22, _UTF-16LE'Cedar 
Grove':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'Wildwood':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")])
+                    HiveTableScan(table=[[default, store]], 
table:alias=[store])
+              HiveProject(hd_demo_sk=[$0])
+                HiveFilter(condition=[OR(=($4, 1), =($3, 2))])
+                  HiveTableScan(table=[[default, household_demographics]], 
table:alias=[household_demographics])
+      HiveProject(c_customer_sk=[CAST($0):INTEGER NOT NULL], 
c_first_name=[$8], c_last_name=[$9], ca_city=[$24])
+        HiveTableScan(table=[[default, mv_customer_customer_address_n102]], 
table:alias=[default.mv_customer_customer_address_n102])
+
+PREHOOK: query: drop materialized view mv_customer_customer_address_n102
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mv_customer_customer_address_n102
+PREHOOK: Output: default@mv_customer_customer_address_n102
+POSTHOOK: query: drop materialized view mv_customer_customer_address_n102
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mv_customer_customer_address_n102
+POSTHOOK: Output: default@mv_customer_customer_address_n102

Reply via email to