This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 7b72320  HIVE-25747: Make a cost base decision when rebuilding 
materialized views (Krisztian Kasa, reviewed by Zoltan Haindrich, Aman Sinha)
7b72320 is described below

commit 7b723200c7721a5b0400d6194f5515d7c5e47e27
Author: Krisztian Kasa <[email protected]>
AuthorDate: Wed Feb 16 09:14:53 2022 +0100

    HIVE-25747: Make a cost base decision when rebuilding materialized views 
(Krisztian Kasa, reviewed by Zoltan Haindrich, Aman Sinha)
---
 .../AlterMaterializedViewRebuildAnalyzer.java      | 45 ++++++++++-
 .../calcite/HiveTezModelRelMetadataProvider.java   |  1 -
 .../stats/HiveIncrementalRelMdRowCount.java        | 89 ++++++++++++++++++++++
 3 files changed, 130 insertions(+), 5 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
index 682818a..05516f7 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
@@ -46,6 +46,7 @@ import 
org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
 import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInBetweenExpandRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.ColumnPropagationException;
@@ -59,6 +60,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializati
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.MaterializedViewRewritingRelVisitor;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveIncrementalRelMdRowCount;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
 import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo;
@@ -274,7 +276,7 @@ public class AlterMaterializedViewRebuildAnalyzer extends 
CalcitePlanner {
         }
 
         return applyPartitionIncrementalRebuildPlan(
-                basePlan, mdProvider, executorProvider, materialization, 
calcitePreMVRewritingPlan);
+                basePlan, mdProvider, executorProvider, materialization, 
optCluster, calcitePreMVRewritingPlan);
       }
 
       // Now we trigger some needed optimization rules again
@@ -382,7 +384,8 @@ public class AlterMaterializedViewRebuildAnalyzer extends 
CalcitePlanner {
 
     private RelNode applyPartitionIncrementalRebuildPlan(
             RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor 
executorProvider,
-            HiveRelOptMaterialization materialization, RelNode 
calcitePreMVRewritingPlan) {
+            HiveRelOptMaterialization materialization, RelOptCluster 
optCluster,
+            RelNode calcitePreMVRewritingPlan) {
 
       if (materialization.isSourceTablesUpdateDeleteModified()) {
         // TODO: Create rewrite rule to transform the plan to partition based 
incremental rebuild
@@ -399,8 +402,42 @@ public class AlterMaterializedViewRebuildAnalyzer extends 
CalcitePlanner {
         return applyPreJoinOrderingTransforms(basePlan, mdProvider, 
executorProvider);
       }
 
-      return applyIncrementalRebuild(basePlan, mdProvider, executorProvider,
-              HiveInsertOnlyScanWriteIdRule.INSTANCE, 
HiveAggregatePartitionIncrementalRewritingRule.INSTANCE);
+      RelNode incrementalRebuildPlan = applyIncrementalRebuild(basePlan, 
mdProvider, executorProvider,
+              HiveInsertOnlyScanWriteIdRule.INSTANCE,
+              HiveAggregatePartitionIncrementalRewritingRule.INSTANCE);
+
+      // Make a cost-based decision factoring the configuration property
+      RelOptCost costOriginalPlan = calculateCost(
+              optCluster, mdProvider, HiveTezModelRelMetadataProvider.DEFAULT, 
calcitePreMVRewritingPlan);
+
+      RelOptCost costIncrementalRebuildPlan = calculateCost(optCluster, 
mdProvider,
+              
HiveIncrementalRelMdRowCount.createMetadataProvider(materialization), 
incrementalRebuildPlan);
+
+      final double factorSelectivity = HiveConf.getFloatVar(
+              conf, 
HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL_FACTOR);
+      costIncrementalRebuildPlan = 
costIncrementalRebuildPlan.multiplyBy(factorSelectivity);
+
+      if (costOriginalPlan.isLe(costIncrementalRebuildPlan)) {
+        mvRebuildMode = MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD;
+        return calcitePreMVRewritingPlan;
+      }
+
+      return incrementalRebuildPlan;
+    }
+
+    private RelOptCost calculateCost(
+            RelOptCluster optCluster,
+            RelMetadataProvider originalMetadataProvider,
+            JaninoRelMetadataProvider metadataProvider,
+            RelNode plan) {
+      optCluster.invalidateMetadataQuery();
+      RelMetadataQuery.THREAD_PROVIDERS.set(metadataProvider);
+      try {
+        return RelMetadataQuery.instance().getCumulativeCost(plan);
+      } finally {
+        optCluster.invalidateMetadataQuery();
+        
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(originalMetadataProvider));
+      }
     }
 
     private RelNode applyIncrementalRebuild(RelNode basePlan, 
RelMetadataProvider mdProvider,
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
index 6d153f8..5255506 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
@@ -57,5 +57,4 @@ public class HiveTezModelRelMetadataProvider {
                 HiveRelMdCollation.SOURCE,
                 HiveRelMdPredicates.SOURCE,
                 JaninoRelMetadataProvider.DEFAULT)));
-
 }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveIncrementalRelMdRowCount.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveIncrementalRelMdRowCount.java
new file mode 100644
index 0000000..bc2427e
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveIncrementalRelMdRowCount.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptMaterialization;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.metastore.api.SourceTable;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.metadata.MaterializedViewMetadata;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class HiveIncrementalRelMdRowCount extends HiveRelMdRowCount {
+
+  public static JaninoRelMetadataProvider 
createMetadataProvider(RelOptMaterialization materialization) {
+    return JaninoRelMetadataProvider.of(
+            ChainedRelMetadataProvider.of(
+                    ImmutableList.of(
+                            
HiveIncrementalRelMdRowCount.source(materialization),
+                            HiveTezModelRelMetadataProvider.DEFAULT
+                    )));
+  }
+
+  public static RelMetadataProvider source(RelOptMaterialization 
materialization) {
+    MaterializedViewMetadata mvMetadata = ((RelOptHiveTable) 
materialization.tableRel.getTable())
+            .getHiveTableMD().getMVMetadata();
+    Map<String, SourceTable> sourceTableMap = new 
HashMap<>(mvMetadata.getSourceTables().size());
+    for (SourceTable sourceTable : mvMetadata.getSourceTables()) {
+      Table table = sourceTable.getTable();
+      sourceTableMap.put(
+              TableName.getQualified(table.getCatName(), table.getDbName(), 
table.getTableName()), sourceTable);
+    }
+
+    return ReflectiveRelMetadataProvider
+            .reflectiveSource(BuiltInMethod.ROW_COUNT.method, new 
HiveIncrementalRelMdRowCount(sourceTableMap));
+  }
+
+  private final Map<String, SourceTable> sourceTableMap;
+
+  public HiveIncrementalRelMdRowCount(Map<String, SourceTable> sourceTableMap) 
{
+    this.sourceTableMap = sourceTableMap;
+  }
+
+
+  @Override
+  public Double getRowCount(TableScan rel, RelMetadataQuery mq) {
+    if (!(rel instanceof HiveTableScan)) {
+      return super.getRowCount(rel, mq);
+    }
+
+    HiveTableScan tableScan = (HiveTableScan) rel;
+    RelOptHiveTable relOptHiveTable = (RelOptHiveTable) tableScan.getTable();
+    org.apache.hadoop.hive.ql.metadata.Table table = 
relOptHiveTable.getHiveTableMD();
+    String fullyQualifiedName = TableName.getQualified(table.getCatName(), 
table.getDbName(), table.getTableName());
+    SourceTable sourceTable = sourceTableMap.get(fullyQualifiedName);
+    if (sourceTable == null) {
+      return super.getRowCount(rel, mq);
+    }
+
+    return (double) sourceTable.getInsertedCount();
+  }
+}

Reply via email to