[
https://issues.apache.org/jira/browse/HIVE-26498?focusedWorklogId=806944&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-806944
]
ASF GitHub Bot logged work on HIVE-26498:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 08/Sep/22 09:27
Start Date: 08/Sep/22 09:27
Worklog Time Spent: 10m
Work Description: lcspinter commented on code in PR #3552:
URL: https://github.com/apache/hive/pull/3552#discussion_r965718266
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java:
##########
@@ -160,11 +181,79 @@ public static Boolean isOutdatedMaterializedView(
return false;
}
+ private static Boolean isOutdatedMaterializedView(
+ MaterializationSnapshot snapshot, Hive db,
+ Set<TableName> tablesUsed, Table materializedViewTable) throws
HiveException {
+ List<String> tablesUsedNames = tablesUsed.stream()
+ .map(tableName -> TableName.getDbTable(tableName.getDb(),
tableName.getTable()))
+ .collect(Collectors.toList());
+
+ Map<String, String> snapshotMap = snapshot.getTableSnapshots();
+ if (snapshotMap == null || snapshotMap.isEmpty()) {
+ LOG.debug("Materialized view " +
materializedViewTable.getFullyQualifiedName() +
+ " ignored for rewriting as we could not obtain current snapshot
ids");
+ return null;
+ }
+
+ Set<String> storedTablesUsed =
materializedViewTable.getMVMetadata().getSourceTableFullNames();
+ for (String fullyQualifiedTableName : tablesUsedNames) {
+ // Note. If the materialized view does not contain a table that is
contained in the query,
+ // we do not need to check whether that specific table is outdated or
not. If a rewriting
+ // is produced in those cases, it is because that additional table is
joined with the
+ // existing tables with an append-columns only join, i.e., PK-FK + not
null.
+ if (!storedTablesUsed.contains(fullyQualifiedTableName)) {
+ continue;
+ }
+
+ Table table = db.getTable(fullyQualifiedTableName);
+ if (table.getStorageHandler() == null) {
+ LOG.debug("Materialized view {} ignored for rewriting as we could not
storage handler of table {}",
+ materializedViewTable.getFullyQualifiedName(),
fullyQualifiedTableName);
+ return null;
+ }
+ String currentTableSnapshot =
table.getStorageHandler().getCurrentSnapshotId(table);
+ if (isBlank(currentTableSnapshot)) {
Review Comment:
The `currentTableSnapshot` will be never empty or null.
##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was
created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+ public static MaterializationSnapshot fromJson(String jsonString) {
+ try {
+ return new ObjectMapper().readValue(jsonString,
MaterializationSnapshot.class);
+ } catch (JsonProcessingException e) {
+ // this is not a jsonString, fall back to treating it as
ValidTxnWriteIdList
+ return new MaterializationSnapshot(jsonString);
+ }
+ }
+
+ // Snapshot of native ACID tables.
+ private String validTxnList;
+ // Snapshot of non-native ACID and insert-only transactional tables. Key is
the fully qualified name of the table.
+ // Value is the unique id of the snapshot provided by the table's storage
HiveStorageHandler.
+ private Map<String, String> tableSnapshots;
+
+ private MaterializationSnapshot() {
+ }
+
+ public MaterializationSnapshot(String validTxnList) {
+ this.validTxnList = validTxnList;
+ this.tableSnapshots = null;
+ }
+
+ public MaterializationSnapshot(Map<String, String> tableSnapshots) {
+ this.validTxnList = null;
+ this.tableSnapshots = tableSnapshots;
+ }
+
+ /**
+ * Returns the json representation of this object.
+ * @return {@link String} containing a json.
+ */
+ public String asJsonString() {
+ try (Writer out = new StringWriter()) {
+ new ObjectMapper().writeValue(out, this);
+ return out.toString();
+ } catch (IOException e) {
+ throw new UncheckedIOException("Unable to convert " + this + " to json",
e);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "MaterializationSnapshot{" +
+ "validTxnList='" + validTxnList + '\'' +
+ ", tableSnapshots=" + tableSnapshots +
+ '}';
+ }
+
+ public String getValidTxnList() {
+ return validTxnList;
+ }
+
+ public Map<String, String> getTableSnapshots() {
Review Comment:
javadoc
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java:
##########
@@ -403,4 +492,28 @@ private static RelNode
copyNodeScanNewCluster(RelOptCluster optCluster, RelNode
}
return newScan;
}
+
+ public static MaterializationSnapshot getSnapshotOf(DDLOperationContext
context, Set<TableName> tables)
+ throws HiveException {
+ Map<String, String> snapshot = getSnapshotOf(context.getDb(), tables);
+ if (snapshot.isEmpty()) {
+ return new
MaterializationSnapshot(context.getConf().get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
+ }
+
+ return new MaterializationSnapshot(snapshot);
+ }
+
+ private static Map<String, String> getSnapshotOf(Hive db, Set<TableName>
tables) throws HiveException {
+ Map<String, String> snapshot = new HashMap<>(tables.size());
+ for (TableName tableName : tables) {
+ Table table = db.getTable(tableName);
+ if (table.getStorageHandler() != null) {
+ String sh = table.getStorageHandler().getCurrentSnapshotId(table);
+ if (isNotBlank(sh)) {
Review Comment:
`sh` will be never empty or null.
##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was
created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+ public static MaterializationSnapshot fromJson(String jsonString) {
+ try {
+ return new ObjectMapper().readValue(jsonString,
MaterializationSnapshot.class);
+ } catch (JsonProcessingException e) {
+ // this is not a jsonString, fall back to treating it as
ValidTxnWriteIdList
+ return new MaterializationSnapshot(jsonString);
+ }
+ }
+
+ // Snapshot of native ACID tables.
+ private String validTxnList;
+ // Snapshot of non-native ACID and insert-only transactional tables. Key is
the fully qualified name of the table.
+ // Value is the unique id of the snapshot provided by the table's storage
HiveStorageHandler.
+ private Map<String, String> tableSnapshots;
+
+ private MaterializationSnapshot() {
+ }
+
+ public MaterializationSnapshot(String validTxnList) {
+ this.validTxnList = validTxnList;
+ this.tableSnapshots = null;
+ }
+
+ public MaterializationSnapshot(Map<String, String> tableSnapshots) {
+ this.validTxnList = null;
+ this.tableSnapshots = tableSnapshots;
+ }
+
+ /**
+ * Returns the json representation of this object.
+ * @return {@link String} containing a json.
+ */
+ public String asJsonString() {
+ try (Writer out = new StringWriter()) {
+ new ObjectMapper().writeValue(out, this);
+ return out.toString();
+ } catch (IOException e) {
+ throw new UncheckedIOException("Unable to convert " + this + " to json",
e);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "MaterializationSnapshot{" +
+ "validTxnList='" + validTxnList + '\'' +
+ ", tableSnapshots=" + tableSnapshots +
+ '}';
+ }
+
+ public String getValidTxnList() {
Review Comment:
javadoc
Issue Time Tracking
-------------------
Worklog Id: (was: 806944)
Time Spent: 3h 40m (was: 3.5h)
> Implement MV maintenance with Iceberg sources using full rebuild
> ----------------------------------------------------------------
>
> Key: HIVE-26498
> URL: https://issues.apache.org/jira/browse/HIVE-26498
> Project: Hive
> Issue Type: Sub-task
> Components: Materialized views
> Reporter: Krisztian Kasa
> Assignee: Krisztian Kasa
> Priority: Major
> Labels: pull-request-available
> Time Spent: 3h 40m
> Remaining Estimate: 0h
>
> {code}
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> create external table tbl_ice(a int, b string, c int) stored by iceberg
> stored as orc tblproperties ('format-version'='2');
> insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52),
> (4, 'four', 53), (5, 'five', 54);
> create materialized view mat1 as
> select b, c from tbl_ice where c > 52;
> insert into tbl_ice values (111, 'one', 55), (333, 'two', 56);
> explain cbo
> alter materialized view mat1 rebuild;
> alter materialized view mat1 rebuild;
> {code}
> MV full rebuild plan
> {code}
> CBO PLAN:
> HiveProject(b=[$1], c=[$2])
> HiveFilter(condition=[>($2, 52)])
> HiveTableScan(table=[[default, tbl_ice]], table:alias=[tbl_ice])
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)