This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 7b72320 HIVE-25747: Make a cost base decision when rebuilding
materialized views (Krisztian Kasa, reviewed by Zoltan Haindrich, Aman Sinha)
7b72320 is described below
commit 7b723200c7721a5b0400d6194f5515d7c5e47e27
Author: Krisztian Kasa <[email protected]>
AuthorDate: Wed Feb 16 09:14:53 2022 +0100
HIVE-25747: Make a cost base decision when rebuilding materialized views
(Krisztian Kasa, reviewed by Zoltan Haindrich, Aman Sinha)
---
.../AlterMaterializedViewRebuildAnalyzer.java | 45 ++++++++++-
.../calcite/HiveTezModelRelMetadataProvider.java | 1 -
.../stats/HiveIncrementalRelMdRowCount.java | 89 ++++++++++++++++++++++
3 files changed, 130 insertions(+), 5 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
index 682818a..05516f7 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
@@ -46,6 +46,7 @@ import
org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInBetweenExpandRule;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.ColumnPropagationException;
@@ -59,6 +60,7 @@ import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializati
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewRule;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.MaterializedViewRewritingRelVisitor;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveIncrementalRelMdRowCount;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo;
@@ -274,7 +276,7 @@ public class AlterMaterializedViewRebuildAnalyzer extends
CalcitePlanner {
}
return applyPartitionIncrementalRebuildPlan(
- basePlan, mdProvider, executorProvider, materialization,
calcitePreMVRewritingPlan);
+ basePlan, mdProvider, executorProvider, materialization,
optCluster, calcitePreMVRewritingPlan);
}
// Now we trigger some needed optimization rules again
@@ -382,7 +384,8 @@ public class AlterMaterializedViewRebuildAnalyzer extends
CalcitePlanner {
private RelNode applyPartitionIncrementalRebuildPlan(
RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor
executorProvider,
- HiveRelOptMaterialization materialization, RelNode
calcitePreMVRewritingPlan) {
+ HiveRelOptMaterialization materialization, RelOptCluster
optCluster,
+ RelNode calcitePreMVRewritingPlan) {
if (materialization.isSourceTablesUpdateDeleteModified()) {
// TODO: Create rewrite rule to transform the plan to partition based
incremental rebuild
@@ -399,8 +402,42 @@ public class AlterMaterializedViewRebuildAnalyzer extends
CalcitePlanner {
return applyPreJoinOrderingTransforms(basePlan, mdProvider,
executorProvider);
}
- return applyIncrementalRebuild(basePlan, mdProvider, executorProvider,
- HiveInsertOnlyScanWriteIdRule.INSTANCE,
HiveAggregatePartitionIncrementalRewritingRule.INSTANCE);
+ RelNode incrementalRebuildPlan = applyIncrementalRebuild(basePlan,
mdProvider, executorProvider,
+ HiveInsertOnlyScanWriteIdRule.INSTANCE,
+ HiveAggregatePartitionIncrementalRewritingRule.INSTANCE);
+
+ // Make a cost-based decision factoring the configuration property
+ RelOptCost costOriginalPlan = calculateCost(
+ optCluster, mdProvider, HiveTezModelRelMetadataProvider.DEFAULT,
calcitePreMVRewritingPlan);
+
+ RelOptCost costIncrementalRebuildPlan = calculateCost(optCluster,
mdProvider,
+
HiveIncrementalRelMdRowCount.createMetadataProvider(materialization),
incrementalRebuildPlan);
+
+ final double factorSelectivity = HiveConf.getFloatVar(
+ conf,
HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL_FACTOR);
+ costIncrementalRebuildPlan =
costIncrementalRebuildPlan.multiplyBy(factorSelectivity);
+
+ if (costOriginalPlan.isLe(costIncrementalRebuildPlan)) {
+ mvRebuildMode = MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD;
+ return calcitePreMVRewritingPlan;
+ }
+
+ return incrementalRebuildPlan;
+ }
+
+ private RelOptCost calculateCost(
+ RelOptCluster optCluster,
+ RelMetadataProvider originalMetadataProvider,
+ JaninoRelMetadataProvider metadataProvider,
+ RelNode plan) {
+ optCluster.invalidateMetadataQuery();
+ RelMetadataQuery.THREAD_PROVIDERS.set(metadataProvider);
+ try {
+ return RelMetadataQuery.instance().getCumulativeCost(plan);
+ } finally {
+ optCluster.invalidateMetadataQuery();
+
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(originalMetadataProvider));
+ }
}
private RelNode applyIncrementalRebuild(RelNode basePlan,
RelMetadataProvider mdProvider,
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
index 6d153f8..5255506 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java
@@ -57,5 +57,4 @@ public class HiveTezModelRelMetadataProvider {
HiveRelMdCollation.SOURCE,
HiveRelMdPredicates.SOURCE,
JaninoRelMetadataProvider.DEFAULT)));
-
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveIncrementalRelMdRowCount.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveIncrementalRelMdRowCount.java
new file mode 100644
index 0000000..bc2427e
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveIncrementalRelMdRowCount.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptMaterialization;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.metastore.api.SourceTable;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.metadata.MaterializedViewMetadata;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class HiveIncrementalRelMdRowCount extends HiveRelMdRowCount {
+
+ public static JaninoRelMetadataProvider
createMetadataProvider(RelOptMaterialization materialization) {
+ return JaninoRelMetadataProvider.of(
+ ChainedRelMetadataProvider.of(
+ ImmutableList.of(
+
HiveIncrementalRelMdRowCount.source(materialization),
+ HiveTezModelRelMetadataProvider.DEFAULT
+ )));
+ }
+
+ public static RelMetadataProvider source(RelOptMaterialization
materialization) {
+ MaterializedViewMetadata mvMetadata = ((RelOptHiveTable)
materialization.tableRel.getTable())
+ .getHiveTableMD().getMVMetadata();
+ Map<String, SourceTable> sourceTableMap = new
HashMap<>(mvMetadata.getSourceTables().size());
+ for (SourceTable sourceTable : mvMetadata.getSourceTables()) {
+ Table table = sourceTable.getTable();
+ sourceTableMap.put(
+ TableName.getQualified(table.getCatName(), table.getDbName(),
table.getTableName()), sourceTable);
+ }
+
+ return ReflectiveRelMetadataProvider
+ .reflectiveSource(BuiltInMethod.ROW_COUNT.method, new
HiveIncrementalRelMdRowCount(sourceTableMap));
+ }
+
+ private final Map<String, SourceTable> sourceTableMap;
+
+ public HiveIncrementalRelMdRowCount(Map<String, SourceTable> sourceTableMap)
{
+ this.sourceTableMap = sourceTableMap;
+ }
+
+
+ @Override
+ public Double getRowCount(TableScan rel, RelMetadataQuery mq) {
+ if (!(rel instanceof HiveTableScan)) {
+ return super.getRowCount(rel, mq);
+ }
+
+ HiveTableScan tableScan = (HiveTableScan) rel;
+ RelOptHiveTable relOptHiveTable = (RelOptHiveTable) tableScan.getTable();
+ org.apache.hadoop.hive.ql.metadata.Table table =
relOptHiveTable.getHiveTableMD();
+ String fullyQualifiedName = TableName.getQualified(table.getCatName(),
table.getDbName(), table.getTableName());
+ SourceTable sourceTable = sourceTableMap.get(fullyQualifiedName);
+ if (sourceTable == null) {
+ return super.getRowCount(rel, mq);
+ }
+
+ return (double) sourceTable.getInsertedCount();
+ }
+}