[
https://issues.apache.org/jira/browse/HIVE-26498?focusedWorklogId=806353&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-806353
]
ASF GitHub Bot logged work on HIVE-26498:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 06/Sep/22 13:04
Start Date: 06/Sep/22 13:04
Worklog Time Spent: 10m
Work Description: lcspinter commented on code in PR #3552:
URL: https://github.com/apache/hive/pull/3552#discussion_r963624074
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -977,4 +977,11 @@ public Configuration get() {
return conf;
}
}
+
+ @Override
+ public String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table
hmsTable) {
Review Comment:
Why cannot we use long as return type?
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java:
##########
@@ -489,4 +489,13 @@ default void validateSinkDesc(FileSinkDesc sinkDesc)
throws SemanticException {
*/
default void executeOperation(org.apache.hadoop.hive.ql.metadata.Table
table, AlterTableExecuteSpec executeSpec) {
}
+
+ /**
+ * Query the unique snapshot id of the passed table.
+ * @param table - {@link org.apache.hadoop.hive.ql.metadata.Table} which
snapshot id should be returned.
+ * @return String representation of the snapshotId or null if not supported.
+ */
+ default String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table
table) {
Review Comment:
I would change this to `getCurrentSnapshotId`
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java:
##########
@@ -32,16 +36,19 @@
import static java.util.Collections.unmodifiableSet;
public class MaterializedViewMetadata {
+ private static final Logger LOG =
LoggerFactory.getLogger(MaterializedViewMetadata.class);
+
final CreationMetadata creationMetadata;
MaterializedViewMetadata(CreationMetadata creationMetadata) {
this.creationMetadata = creationMetadata;
}
public MaterializedViewMetadata(
- String catalogName, String dbName, String mvName, Set<SourceTable>
sourceTables, String validTxnList) {
+ String catalogName, String dbName, String mvName, Set<SourceTable>
sourceTables,
+ MaterializationSnapshot snapshot) {
this.creationMetadata = new CreationMetadata(catalogName, dbName, mvName,
toFullTableNames(sourceTables));
- this.creationMetadata.setValidTxnList(validTxnList);
+ this.creationMetadata.setValidTxnList(snapshot.asJsonString());
Review Comment:
This is not a List anymore, but rather an object. Maybe we should consider
renaming it.
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java:
##########
@@ -81,15 +88,20 @@ public Collection<SourceTable> getSourceTables() {
return unmodifiableList(creationMetadata.getSourceTables());
}
- public String getValidTxnList() {
- return creationMetadata.getValidTxnList();
+ public MaterializationSnapshot getSnapshot() {
Review Comment:
Javadoc. Please cover what happens in case of native ACID table and
non-native table.
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java:
##########
@@ -98,7 +105,22 @@ public static Table extractTable(RelOptMaterialization
materialization) {
* materialized view definition uses external tables.
*/
public static Boolean isOutdatedMaterializedView(
- String validTxnsList, HiveTxnManager txnMgr,
+ String validTxnsList, HiveTxnManager txnMgr, Hive db,
+ Set<TableName> tablesUsed, Table materializedViewTable) throws
HiveException {
+
+ MaterializedViewMetadata mvMetadata =
materializedViewTable.getMVMetadata();
+ MaterializationSnapshot snapshot = mvMetadata.getSnapshot();
+
+ if (snapshot.getTableSnapshots() != null &&
!snapshot.getTableSnapshots().isEmpty()) {
Review Comment:
snapshot can be null
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -977,4 +977,11 @@ public Configuration get() {
return conf;
}
}
+
+ @Override
+ public String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table
hmsTable) {
+ TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
+ Table table = IcebergTableUtil.getTable(conf, tableDesc.getProperties());
+ return Long.toString(table.currentSnapshot().sequenceNumber());
Review Comment:
`table.currentSnashot()` can be null, if the table is empty.
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java:
##########
@@ -81,15 +88,20 @@ public Collection<SourceTable> getSourceTables() {
return unmodifiableList(creationMetadata.getSourceTables());
}
- public String getValidTxnList() {
- return creationMetadata.getValidTxnList();
+ public MaterializationSnapshot getSnapshot() {
+ if (creationMetadata.getValidTxnList() == null ||
creationMetadata.getValidTxnList().isEmpty()) {
+ LOG.debug("Could not obtain materialization snapshot of materialized
view {}.{}",
+ creationMetadata.getDbName(), creationMetadata.getDbName());
Review Comment:
`getDbName` is passed twice
##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was
created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+ public static MaterializationSnapshot fromJson(String jsonString) {
+ try {
+ return new ObjectMapper().readValue(jsonString,
MaterializationSnapshot.class);
+ } catch (JsonProcessingException e) {
+ // this is not a jsonString, fall back to treating it as
ValidTxnWriteIdList
+ return new MaterializationSnapshot(jsonString, null);
+ }
+ }
+
+ // snapshot of native ACID tables
+ private String validTxnList;
+ // snapshot of non-native ACID tables. Key is the fully qualified name of
the table.
Review Comment:
Materialized views are only supported for iceberg V2 tables? If not, we
should change this java doc.
##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was
created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+ public static MaterializationSnapshot fromJson(String jsonString) {
+ try {
+ return new ObjectMapper().readValue(jsonString,
MaterializationSnapshot.class);
+ } catch (JsonProcessingException e) {
+ // this is not a jsonString, fall back to treating it as
ValidTxnWriteIdList
+ return new MaterializationSnapshot(jsonString, null);
+ }
+ }
+
+ // snapshot of native ACID tables
+ private String validTxnList;
+ // snapshot of non-native ACID tables. Key is the fully qualified name of
the table.
+ // Value is the unique id of the snapshot provided by the table's storage
HiveStorageHandler
+ private Map<String, String> tableSnapshots;
+
+ private MaterializationSnapshot() {
+ }
+
+ public MaterializationSnapshot(String validTxnList, Map<String, String>
tableSnapshots) {
+ this.validTxnList = validTxnList;
+ this.tableSnapshots = tableSnapshots;
+ }
+
+ public String asJsonString() {
Review Comment:
Few lines of javadoc wouldn't hurt
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc.q:
##########
@@ -0,0 +1,35 @@
+
Issue Time Tracking
-------------------
Worklog Id: (was: 806353)
Time Spent: 1h 10m (was: 1h)
> Implement MV maintenance with Iceberg sources using full rebuild
> ----------------------------------------------------------------
>
> Key: HIVE-26498
> URL: https://issues.apache.org/jira/browse/HIVE-26498
> Project: Hive
> Issue Type: Sub-task
> Components: Materialized views
> Reporter: Krisztian Kasa
> Assignee: Krisztian Kasa
> Priority: Major
> Labels: pull-request-available
> Time Spent: 1h 10m
> Remaining Estimate: 0h
>
> {code}
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> create external table tbl_ice(a int, b string, c int) stored by iceberg
> stored as orc tblproperties ('format-version'='2');
> insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52),
> (4, 'four', 53), (5, 'five', 54);
> create materialized view mat1 as
> select b, c from tbl_ice where c > 52;
> insert into tbl_ice values (111, 'one', 55), (333, 'two', 56);
> explain cbo
> alter materialized view mat1 rebuild;
> alter materialized view mat1 rebuild;
> {code}
> MV full rebuild plan
> {code}
> CBO PLAN:
> HiveProject(b=[$1], c=[$2])
> HiveFilter(condition=[>($2, 52)])
> HiveTableScan(table=[[default, tbl_ice]], table:alias=[tbl_ice])
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)