kasakrisz commented on code in PR #6393:
URL: https://github.com/apache/hive/pull/6393#discussion_r3413525110
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java:
##########
@@ -153,10 +156,23 @@ static Table getTable(Configuration configuration,
org.apache.hadoop.hive.metast
properties.setProperty(Catalogs.NAME,
TableIdentifier.of(hmsTable.getDbName(), hmsTable.getTableName()).toString());
properties.setProperty(Catalogs.LOCATION, hmsTable.getSd().getLocation());
hmsTable.getParameters().computeIfPresent(InputFormatConfig.CATALOG_NAME,
- (k, v) -> {
- properties.setProperty(k, v);
- return v;
- });
+ (k, v) -> {
+ properties.setProperty(k, v);
+ return v;
+ });
+
+ if (
+ "iceberg".equals(
+ HiveConf.getVar(configuration,
HiveConf.ConfVars.HIVE_ICEBERG_MATERIALIZEDVIEW_METADATA_LOCATION)
+ ) &&
+ (
+
TableType.MATERIALIZED_VIEW.name().equalsIgnoreCase(hmsTable.getTableType()) ||
Review Comment:
Can we have managed materialized views with iceberg metadata location?
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java:
##########
@@ -279,4 +302,135 @@ private static Map<String, String>
filterIcebergTableProperties(Properties props
}
return map;
}
+
+ public static MaterializedView createMaterializedView(
+ Configuration conf, Properties props, String viewOriginalText,
String viewExpandedText,
+ CreationMetadata creationMetadata) {
+
+ boolean isExternalMaterializedView = "iceberg".equals(
+ HiveConf.getVar(conf,
HiveConf.ConfVars.HIVE_ICEBERG_MATERIALIZEDVIEW_METADATA_LOCATION));
+
+ Schema schema = schema(props);
+ PartitionSpec spec = spec(props, schema);
+ String location = props.getProperty(LOCATION);
+ String storageTableLocation = location +
+ (isExternalMaterializedView ?
MATERIALIZED_VIEW_STORAGE_TABLE_IDENTIFIER_SUFFIX : "");
+ String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME);
+
+ Optional<Catalog> catalog = loadCatalog(conf, catalogName);
+ SortOrder sortOrder = HMSTablePropertyHelper.getSortOrder(props, schema);
+ if (catalog.isEmpty()) {
+ throw new IllegalStateException("Unable to load catalog: " +
catalogName);
+ }
+ String name = props.getProperty(NAME);
+ Preconditions.checkNotNull(name, "Table identifier not set");
+
+ ViewCatalog viewCatalog = (ViewCatalog) catalog.get();
+
+ Map<String, String> map = filterIcebergTableProperties(props);
+ String storageTableIdentifier = name +
+ (isExternalMaterializedView ?
MATERIALIZED_VIEW_STORAGE_TABLE_IDENTIFIER_SUFFIX : "");
+ Table storageTable =
catalog.get().buildTable(TableIdentifier.parse(storageTableIdentifier), schema)
+
.withPartitionSpec(spec).withLocation(storageTableLocation).withProperties(map).withSortOrder(sortOrder)
+ .create();
+
+ Map<String, String> viewProperties = Maps.newHashMapWithExpectedSize(2);
+ viewProperties.put(MATERIALIZED_VIEW_PROPERTY_KEY, "true");
+ viewProperties.put(MATERIALIZED_VIEW_STORAGE_TABLE_PROPERTY_KEY,
storageTableIdentifier);
+ viewProperties.put(MATERIALIZED_VIEW_ORIGINAL_TEXT, viewOriginalText);
+
+ long createTime = System.currentTimeMillis();
+
+ List<SourceState> sourceStates = Lists.newArrayList();
+
+ for (var sourceTable : creationMetadata.getSourceTables()) {
+ SourceState.SourceStateType type =
sourceTable.getTable().getViewExpandedText() == null ?
+ SourceState.SourceStateType.TABLE :
+ SourceState.SourceStateType.VIEW;
+
+ String dbName = sourceTable.getTable().getDbName();
+ String sourceTableName = sourceTable.getTable().getTableName();
+ String sourceTableNamespace = "default";
+ String sourceTableCatalog = sourceTable.getTable().isSetCatName() ?
sourceTable.getTable().getCatName() : null;
+ Catalog tableCatalog = loadCatalog(conf,
sourceTableCatalog).orElse(catalog.get());
+ UUID uuid = null;
+ Snapshot snapshot = null;
+ ViewVersion version = null;
+
+ switch (type) {
+ case TABLE -> {
+ Table icebergTable =
tableCatalog.loadTable(TableIdentifier.parse(dbName + "." + sourceTableName));
Review Comment:
How about
```
TableIdentifier.of(dbName, sourceTableName)
```
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_native.q:
##########
@@ -0,0 +1,97 @@
+--! qt:dataset:src
+--! qt:dataset:part
+
+-- MV metadata is stored in Iceberg
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
+
+set hive.explain.user=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.iceberg.materializedview.metadata.location=iceberg;
+set hive.external.table.purge.default=true;
+
+
+drop materialized view if exists mat_native;
+drop table if exists tbl_ice_native;
+
+create table tbl_ice_native(a int, b string, c int) stored by iceberg stored
as orc tblproperties ('format-version'='1');
+insert into tbl_ice_native values (1, 'one', 50), (2, 'two', 51), (3, 'three',
52), (4, 'four', 53), (5, 'five', 54);
+
+explain
+create materialized view mat_native stored by iceberg stored as orc
tblproperties ('format-version'='1') as
+select b, c from tbl_ice_native where c > 52;
+
+create materialized view mat_native stored by iceberg stored as orc
tblproperties ('format-version'='1', 'max-staleness-ms'='1000') as
+select b, c from tbl_ice_native where c > 52;
+
+select * from mat_native;
+
+
+SHOW TABLES;
+SHOW MATERIALIZED VIEWS;
+
+show create table mat_native;
+describe formatted mat_native;
+
+drop materialized view mat_native;
+
+create materialized view mat_native_orc stored by iceberg stored as orc
tblproperties ('format-version'='1', 'max-staleness-ms'='1000') as
+select b, c from tbl_ice_native where c > 52;
+
+select * from mat_native_orc;
+
+show create table mat_native_orc;
+explain show create table mat_native_orc;
+describe extended mat_native_orc;
+explain describe formatted mat_native_orc;
Review Comment:
What is the goal of this statement ?
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergOutputCommitter.java:
##########
@@ -94,6 +95,8 @@
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import
org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.iceberg.util.Tasks;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
Review Comment:
Are we using these?
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_native.q:
##########
@@ -0,0 +1,97 @@
+--! qt:dataset:src
+--! qt:dataset:part
+
+-- MV metadata is stored in Iceberg
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
+
+set hive.explain.user=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.iceberg.materializedview.metadata.location=iceberg;
+set hive.external.table.purge.default=true;
+
+
+drop materialized view if exists mat_native;
+drop table if exists tbl_ice_native;
+
+create table tbl_ice_native(a int, b string, c int) stored by iceberg stored
as orc tblproperties ('format-version'='1');
+insert into tbl_ice_native values (1, 'one', 50), (2, 'two', 51), (3, 'three',
52), (4, 'four', 53), (5, 'five', 54);
+
+explain
+create materialized view mat_native stored by iceberg stored as orc
tblproperties ('format-version'='1') as
+select b, c from tbl_ice_native where c > 52;
+
+create materialized view mat_native stored by iceberg stored as orc
tblproperties ('format-version'='1', 'max-staleness-ms'='1000') as
+select b, c from tbl_ice_native where c > 52;
+
+select * from mat_native;
+
+
+SHOW TABLES;
+SHOW MATERIALIZED VIEWS;
+
+show create table mat_native;
+describe formatted mat_native;
+
+drop materialized view mat_native;
+
+create materialized view mat_native_orc stored by iceberg stored as orc
tblproperties ('format-version'='1', 'max-staleness-ms'='1000') as
+select b, c from tbl_ice_native where c > 52;
+
+select * from mat_native_orc;
+
+show create table mat_native_orc;
+explain show create table mat_native_orc;
+describe extended mat_native_orc;
+explain describe formatted mat_native_orc;
+
+describe formatted mat_native_orc;
+
+insert into tbl_ice_native values (6, 'six', 60);
+
+select * from mat_native_orc;
+
+alter materialized view mat_native_orc rebuild;
+
+select * from mat_native_orc;
+
+create materialized view mat_native_orc_2 stored as orc tblproperties
('format-version'='1', 'max-staleness-ms'='1000') as
+select b, c from tbl_ice_native where c > 52;
+
+
+-- partitioned materialized view
+
+create materialized view mat_native_orc_partitioned partitioned on (b) stored
by iceberg stored as orc tblproperties ('format-version'='1') as
+select b, c from tbl_ice_native where c > 52;
+
+select * from mat_native_orc_partitioned;
+
+describe formatted mat_native_orc_partitioned;
+
+-- multiple tables
Review Comment:
How about
```
--multiple source tables
```
##########
itests/src/test/resources/testconfiguration.properties:
##########
@@ -436,6 +436,9 @@ iceberg.llap.query.files=\
iceberg_merge_files.q,\
llap_iceberg_read_orc.q,\
llap_iceberg_read_parquet.q,\
+ mv_iceberg_native.q,\
+ mv_iceberg_native_outdated_insert.q,\
+ mv_iceberg_native_outdated_sleep.q,\
Review Comment:
Is there any reason for running these tests with llap?
##########
common/src/java/org/apache/hadoop/hive/conf/HiveConf.java:
##########
@@ -2127,6 +2127,9 @@ public static enum ConfVars {
"If this is set to true the URI for auth will have the default
location masked with DEFAULT_TABLE_LOCATION"),
HIVE_ICEBERG_ALLOW_DATAFILES_IN_TABLE_LOCATION_ONLY("hive.iceberg.allow.datafiles.in.table.location.only",
false,
"If this is set to true, then all the data files being read should be
withing the table location"),
+
HIVE_ICEBERG_MATERIALIZEDVIEW_METADATA_LOCATION("hive.iceberg.materializedview.metadata.location",
"metastore",
Review Comment:
I'm thinking about dropping this config since we introduce external
materialized views. We can extend the grammar to allow the `external` keyword
in create materialized view statements.
```
create external materialized view mat_native stored by iceberg stored as orc
tblproperties ('format-version'='1') as
select b, c from tbl_ice_native where c > 52;
```
For external MVs, store the MV metadata via the storage handler API, and
keep the existing HMS storage logic for non-external MVs.
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_native_outdated_insert.q:
##########
@@ -0,0 +1,52 @@
+-- MV metadata is stored in Iceberg
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
+
+set hive.explain.user=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.iceberg.materializedview.metadata.location=iceberg;
+
+
+
+drop table if exists tbl_ice;
+drop table if exists tbl_ice_v2;
+
+create external table tbl_ice(a int, b string, c int) stored by iceberg stored
as orc tblproperties ('format-version'='1');
+create external table tbl_ice_v2(d int, e string, f int) stored by iceberg
stored as orc tblproperties ('format-version'='2');
+
+insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five
v2', 54);
+
+create materialized view mat1
+ stored by iceberg stored as orc
+as
+select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice
+join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52;
+-- group by tbl_ice.b, tbl_ice.c, tbl_ice_v2.e;
Review Comment:
Please remove this comment if not necessary for testing.
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_native_outdated_sleep.q:
##########
@@ -0,0 +1,54 @@
+-- MV metadata is stored in Iceberg
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
+
+set hive.explain.user=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.iceberg.materializedview.metadata.location=iceberg;
+
+
+
+drop table if exists tbl_ice;
+drop table if exists tbl_ice_v2;
Review Comment:
Should the MVs created in this test be dropped first?
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java:
##########
@@ -204,6 +216,53 @@ public void preCreateTable(CreateTableRequest request) {
setSortOrder(hmsTable, schema, tableProperties);
}
+ private void setTableType(org.apache.hadoop.hive.metastore.api.Table
hmsTable) {
+ String tableTypeValue = hmsTable.getTableType();
+
+ if (tableTypeValue == null) {
+ // Set the table type even for non HiveCatalog based tables
+
hmsTable.getParameters().put(BaseMetastoreTableOperations.TABLE_TYPE_PROP,
+
BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toUpperCase());
+ return;
+ }
+
+ TableType tableType = Enum.valueOf(TableType.class, tableTypeValue);
Review Comment:
Should the exception handled when `tableTypeValue` contains an unknown enum
constant ?
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java:
##########
@@ -390,6 +435,48 @@ private void
doPreAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable
* @param tblParams hms table properties, must be non-null
*/
private void assertNotCrossTableMetadataLocationChange(Map<String, String>
tblParams, EnvironmentContext context) {
+ if (icebergMaterializedView != null) {
+ assertNotCrossTableMetadataLocationChangeForMaterializedView(tblParams,
context);
+ } else {
+ assertNotCrossTableMetadataLocationChangeForTable(tblParams, context);
+ }
+
+ }
+
+ private void assertNotCrossTableMetadataLocationChangeForMaterializedView(
+ Map<String, String> tblParams, EnvironmentContext context) {
+ if
(tblParams.containsKey(BaseMetastoreTableOperations.METADATA_LOCATION_PROP)) {
+ Preconditions.checkArgument(icebergMaterializedView != null,
+ "Cannot perform table migration to Iceberg and setting the snapshot
location in one step. " +
+ "Please migrate the table first");
+ String newMetadataLocation =
tblParams.get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
+ FileIO io = ((BaseTable)
icebergMaterializedView.getStorageTable()).operations().io();
+ ViewMetadata newMetadata = ViewMetadataParser.read(io,
newMetadataLocation);
+ ViewMetadata currentMetadata = ((BaseView)
icebergMaterializedView.getView()).operations().current();
+ if (!currentMetadata.uuid().equals(newMetadata.uuid())) {
+ throw new UnsupportedOperationException(
+ String.format("Cannot change iceberg table %s metadata location
pointing to another table's metadata %s",
+ icebergTable.name(), newMetadataLocation)
+ );
+ }
+
+ String currentMetadataLocation = currentMetadata.metadataFileLocation();
+ if (!currentMetadataLocation.equals(newMetadataLocation) &&
+
!context.getProperties().containsKey(MANUAL_ICEBERG_METADATA_LOCATION_CHANGE)) {
+ // If we got here then this is an alter table operation where the
table to be changed had an Iceberg commit
+ // meanwhile. The base metadata locations differ, while we know that
this wasn't an intentional, manual
+ // metadata_location set by a user. To protect the interim commit we
need to refresh the metadata file location.
+ tblParams.put(BaseMetastoreTableOperations.METADATA_LOCATION_PROP,
currentMetadataLocation);
+ LOG.warn("Detected an alter table operation attempting to do
alterations on an Iceberg table with a stale " +
+ "metadata_location. Considered base metadata_location: {},
Actual metadata_location: {}. Will override " +
+ "this request with the refreshed metadata_location in order to
preserve the concurrent commit.",
+ newMetadataLocation, currentMetadataLocation);
+ }
+ }
Review Comment:
Can these lines be extracted? They appear in
`assertNotCrossTableMetadataLocationChangeForTable` too.
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java:
##########
@@ -279,4 +302,135 @@ private static Map<String, String>
filterIcebergTableProperties(Properties props
}
return map;
}
+
+ public static MaterializedView createMaterializedView(
+ Configuration conf, Properties props, String viewOriginalText,
String viewExpandedText,
+ CreationMetadata creationMetadata) {
+
+ boolean isExternalMaterializedView = "iceberg".equals(
+ HiveConf.getVar(conf,
HiveConf.ConfVars.HIVE_ICEBERG_MATERIALIZEDVIEW_METADATA_LOCATION));
+
+ Schema schema = schema(props);
+ PartitionSpec spec = spec(props, schema);
+ String location = props.getProperty(LOCATION);
+ String storageTableLocation = location +
+ (isExternalMaterializedView ?
MATERIALIZED_VIEW_STORAGE_TABLE_IDENTIFIER_SUFFIX : "");
+ String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME);
+
+ Optional<Catalog> catalog = loadCatalog(conf, catalogName);
+ SortOrder sortOrder = HMSTablePropertyHelper.getSortOrder(props, schema);
+ if (catalog.isEmpty()) {
+ throw new IllegalStateException("Unable to load catalog: " +
catalogName);
+ }
+ String name = props.getProperty(NAME);
+ Preconditions.checkNotNull(name, "Table identifier not set");
+
+ ViewCatalog viewCatalog = (ViewCatalog) catalog.get();
+
+ Map<String, String> map = filterIcebergTableProperties(props);
+ String storageTableIdentifier = name +
+ (isExternalMaterializedView ?
MATERIALIZED_VIEW_STORAGE_TABLE_IDENTIFIER_SUFFIX : "");
+ Table storageTable =
catalog.get().buildTable(TableIdentifier.parse(storageTableIdentifier), schema)
+
.withPartitionSpec(spec).withLocation(storageTableLocation).withProperties(map).withSortOrder(sortOrder)
+ .create();
+
+ Map<String, String> viewProperties = Maps.newHashMapWithExpectedSize(2);
+ viewProperties.put(MATERIALIZED_VIEW_PROPERTY_KEY, "true");
+ viewProperties.put(MATERIALIZED_VIEW_STORAGE_TABLE_PROPERTY_KEY,
storageTableIdentifier);
+ viewProperties.put(MATERIALIZED_VIEW_ORIGINAL_TEXT, viewOriginalText);
+
+ long createTime = System.currentTimeMillis();
+
+ List<SourceState> sourceStates = Lists.newArrayList();
+
+ for (var sourceTable : creationMetadata.getSourceTables()) {
+ SourceState.SourceStateType type =
sourceTable.getTable().getViewExpandedText() == null ?
Review Comment:
Please use `sourceTable.getTable().getTableType()` instead of
`sourceTable.getTable().getViewExpandedText()`
AFAIK, nesting views is not supported in Hive. Could you please add a test
if it doesn't already exist?
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_native.q:
##########
@@ -0,0 +1,97 @@
+--! qt:dataset:src
+--! qt:dataset:part
+
+-- MV metadata is stored in Iceberg
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
+
+set hive.explain.user=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.iceberg.materializedview.metadata.location=iceberg;
+set hive.external.table.purge.default=true;
+
+
+drop materialized view if exists mat_native;
+drop table if exists tbl_ice_native;
+
+create table tbl_ice_native(a int, b string, c int) stored by iceberg stored
as orc tblproperties ('format-version'='1');
+insert into tbl_ice_native values (1, 'one', 50), (2, 'two', 51), (3, 'three',
52), (4, 'four', 53), (5, 'five', 54);
+
+explain
+create materialized view mat_native stored by iceberg stored as orc
tblproperties ('format-version'='1') as
+select b, c from tbl_ice_native where c > 52;
+
+create materialized view mat_native stored by iceberg stored as orc
tblproperties ('format-version'='1', 'max-staleness-ms'='1000') as
+select b, c from tbl_ice_native where c > 52;
+
+select * from mat_native;
+
+
+SHOW TABLES;
+SHOW MATERIALIZED VIEWS;
+
+show create table mat_native;
+describe formatted mat_native;
+
+drop materialized view mat_native;
+
+create materialized view mat_native_orc stored by iceberg stored as orc
tblproperties ('format-version'='1', 'max-staleness-ms'='1000') as
+select b, c from tbl_ice_native where c > 52;
+
+select * from mat_native_orc;
+
+show create table mat_native_orc;
+explain show create table mat_native_orc;
Review Comment:
What is the goal of this statement ?
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_native.q:
##########
@@ -0,0 +1,97 @@
+--! qt:dataset:src
+--! qt:dataset:part
+
+-- MV metadata is stored in Iceberg
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
+
+set hive.explain.user=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.iceberg.materializedview.metadata.location=iceberg;
+set hive.external.table.purge.default=true;
+
+
+drop materialized view if exists mat_native;
Review Comment:
Should the rest of the MVs created by this test be dropped?
mat_native_orc, mat_native_orc_partitioned...
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java:
##########
@@ -76,6 +89,16 @@ public final class Catalogs {
ImmutableSet.of(InputFormatConfig.TABLE_SCHEMA,
InputFormatConfig.PARTITION_SPEC, LOCATION, NAME,
InputFormatConfig.CATALOG_NAME);
+ public static final String MATERIALIZED_VIEW_PROPERTY_KEY =
"iceberg.materialized.view";
+ public static final String MATERIALIZED_VIEW_STORAGE_TABLE_PROPERTY_KEY =
+ "iceberg.materialized.view.storage.table";
+ public static final String
MATERIALIZED_VIEW_BASE_SNAPSHOT_PROPERTY_KEY_PREFIX =
+ "iceberg.base.snapshot.";
+ public static final String MATERIALIZED_VIEW_VERSION_PROPERTY_KEY =
+ "iceberg.materialized.view.version";
Review Comment:
These are not used. Should we remove them?
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_native.q:
##########
@@ -0,0 +1,97 @@
+--! qt:dataset:src
+--! qt:dataset:part
+
+-- MV metadata is stored in Iceberg
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
+
+set hive.explain.user=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.iceberg.materializedview.metadata.location=iceberg;
+set hive.external.table.purge.default=true;
+
+
+drop materialized view if exists mat_native;
+drop table if exists tbl_ice_native;
+
+create table tbl_ice_native(a int, b string, c int) stored by iceberg stored
as orc tblproperties ('format-version'='1');
+insert into tbl_ice_native values (1, 'one', 50), (2, 'two', 51), (3, 'three',
52), (4, 'four', 53), (5, 'five', 54);
+
+explain
+create materialized view mat_native stored by iceberg stored as orc
tblproperties ('format-version'='1') as
+select b, c from tbl_ice_native where c > 52;
+
+create materialized view mat_native stored by iceberg stored as orc
tblproperties ('format-version'='1', 'max-staleness-ms'='1000') as
+select b, c from tbl_ice_native where c > 52;
+
+select * from mat_native;
+
+
+SHOW TABLES;
+SHOW MATERIALIZED VIEWS;
+
+show create table mat_native;
+describe formatted mat_native;
+
+drop materialized view mat_native;
+
+create materialized view mat_native_orc stored by iceberg stored as orc
tblproperties ('format-version'='1', 'max-staleness-ms'='1000') as
+select b, c from tbl_ice_native where c > 52;
+
+select * from mat_native_orc;
+
+show create table mat_native_orc;
+explain show create table mat_native_orc;
+describe extended mat_native_orc;
+explain describe formatted mat_native_orc;
+
+describe formatted mat_native_orc;
+
+insert into tbl_ice_native values (6, 'six', 60);
+
+select * from mat_native_orc;
+
+alter materialized view mat_native_orc rebuild;
+
+select * from mat_native_orc;
+
+create materialized view mat_native_orc_2 stored as orc tblproperties
('format-version'='1', 'max-staleness-ms'='1000') as
+select b, c from tbl_ice_native where c > 52;
Review Comment:
This MV is created but never used.
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_native_outdated_sleep.q:
##########
@@ -0,0 +1,54 @@
+-- MV metadata is stored in Iceberg
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
+
+set hive.explain.user=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.iceberg.materializedview.metadata.location=iceberg;
+
+
+
+drop table if exists tbl_ice;
+drop table if exists tbl_ice_v2;
+
+create external table tbl_ice(a int, b string, c int) stored by iceberg stored
as orc tblproperties ('format-version'='1');
+create external table tbl_ice_v2(d int, e string, f int) stored by iceberg
stored as orc tblproperties ('format-version'='2');
+
+insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five
v2', 54);
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52),
(4, 'four', 53), (5, 'five', 54);
+
+create materialized view mat1
+stored by iceberg stored as orc
+ tblproperties ('rewriting.time.window' = '1min')
+as
+select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice
+join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52;
+
+!sleep 61;
+
+-- view should be empty
Review Comment:
MV is not empty, please check the q.out file
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc9.q:
##########
@@ -26,3 +26,4 @@ alter materialized view mv_insert_only rebuild;
select * from mv_acid;
select * from mv_insert_only;
+
Review Comment:
Is this change intentional?
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_native_outdated_insert.q:
##########
@@ -0,0 +1,52 @@
+-- MV metadata is stored in Iceberg
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
+
+set hive.explain.user=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.iceberg.materializedview.metadata.location=iceberg;
+
+
+
+drop table if exists tbl_ice;
+drop table if exists tbl_ice_v2;
Review Comment:
Should the MVs created in this test be dropped first?
##########
standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java:
##########
@@ -292,7 +290,16 @@ public static boolean isExternalTable(Table table) {
return false;
}
- return isExternal(params);
+ String storageHandler = params.get("storage_handler");
+ if (
+ StringUtils.isNotBlank(storageHandler) &&
+ TableType.EXTERNAL_MATERIALIZED_VIEW.equals(table.getTableType()) &&
+
"org.apache.iceberg.mr.hive.HiveIcebergStorageHandler".equals(storageHandler)
+ ) {
Review Comment:
Can this be simplified to
```
if
(TableType.EXTERNAL_MATERIALIZED_VIEW.toString().equals(table.getTableType())
```
?
In the current form it is always false: comparing an enum with a string
```
TableType.EXTERNAL_MATERIALIZED_VIEW.equals(table.getTableType())
```
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java:
##########
@@ -285,12 +285,15 @@ public void checkValidity(Configuration conf) throws
HiveException {
}
}
- if (isView() || isMaterializedView()) {
- assert (getViewOriginalText() != null);
- assert (getViewExpandedText() != null);
- } else {
- assert(getViewOriginalText() == null);
- assert(getViewExpandedText() == null);
+ if (!(DDLUtils.isIcebergTable(this) && isMaterializedView() &&
+ "iceberg".equals(HiveConf.getVar(conf,
ConfVars.HIVE_ICEBERG_MATERIALIZEDVIEW_METADATA_LOCATION)))) {
+ if (isView() || isMaterializedView()) {
+ assert (getViewOriginalText() != null);
+ assert (getViewExpandedText() != null);
+ } else {
+ assert (getViewOriginalText() == null);
+ assert (getViewExpandedText() == null);
+ }
Review Comment:
I think these should be present only in case of Hive views
(`TableType.VIRTUAL_VIEW`) and managed materialized views
(`TableType.MATERIALIZED_VIEW`).
##########
itests/src/test/resources/testconfiguration.properties:
##########
@@ -488,6 +491,9 @@ iceberg.llap.only.query.files=\
iceberg_merge_files.q,\
llap_iceberg_read_orc.q,\
llap_iceberg_read_parquet.q,\
+ mv_iceberg_native.q,\
+ mv_iceberg_native_outdated_insert.q,\
+ mv_iceberg_native_outdated_sleep.q,\
Review Comment:
Is there any reason for running these tests only with llap?
##########
standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/client/HookEnabledMetaStoreClient.java:
##########
@@ -56,6 +55,9 @@
import static
org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog;
public class HookEnabledMetaStoreClient extends MetaStoreClientWrapper {
+ public static final String ICEBERG_STORAGE_HANDLER =
"org.apache.iceberg.mr.hive.HiveIcebergStorageHandler";
+ public static final String STORAGE_HANDLER_KEY = "storage_handler";
Review Comment:
These are not referenced, please remove them.
##########
standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java:
##########
@@ -17,6 +17,35 @@
*/
package org.apache.hadoop.hive.metastore.utils;
+import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.common.repl.ReplConst;
+import org.apache.hadoop.hive.metastore.ColumnType;
+import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.DatabaseType;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.GetPartitionsByNamesRequest;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.PartitionSpec;
+import org.apache.hadoop.hive.metastore.api.PartitionsSpecByExprResult;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.api.WMPoolSchedulingPolicy;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.Nullable;
Review Comment:
Is this change intentional?
##########
ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java:
##########
@@ -14081,14 +14083,22 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB
qb, PlannerContext plannerCt
storageFormat.getInputFormat(), storageFormat.getOutputFormat(),
location, storageFormat.getSerde(), storageFormat.getStorageHandler(),
storageFormat.getSerdeProps());
- addDbAndTabToOutputs(new String[] {qualTabName.getDb(),
qualTabName.getTable()}, TableType.MATERIALIZED_VIEW,
+ addDbAndTabToOutputs(new String[] {qualTabName.getDb(),
qualTabName.getTable()}, getViewType(storageFormat.getStorageHandler()),
false, tblProps, storageFormat);
queryState.setCommandType(HiveOperation.CREATE_MATERIALIZED_VIEW);
qb.setViewDesc(createVwDesc);
return selectStmt;
}
+ private static @NotNull TableType getViewType(String storageHandler) {
+ if
("org.apache.iceberg.mr.hive.HiveIcebergStorageHandler".equals(storageHandler))
{
+ return TableType.EXTERNAL_MATERIALIZED_VIEW;
+ }
+
+ return TableType.MATERIALIZED_VIEW;
+ }
Review Comment:
Hardcoding `HiveIcebergStorageHandler` is not a good approach. I think the
presence of a storage handler should indicate that an external MV needs to be
created. Alternatively, extending the grammar to allow the `EXTERNAL` keyword
in `CREATE MATERIALIZED VIEW` statements would also be a good option.
##########
ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateMaterializedViewDesc.java:
##########
@@ -232,6 +233,10 @@ public Table toTable(HiveConf conf) throws HiveException {
HiveStorageHandler storageHandler = tbl.getStorageHandler();
+ if (storageHandler != null &&
storageHandler.getClass().equals("org.apache.iceberg.mr.hive.HiveIcebergStorageHandler")){
Review Comment:
Please remove this if statement as it is always false. Sonar also point it
out that comparing a class with a String can not be true.
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_native_outdated_sleep.q:
##########
@@ -0,0 +1,54 @@
+-- MV metadata is stored in Iceberg
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
+
+set hive.explain.user=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.iceberg.materializedview.metadata.location=iceberg;
+
+
+
+drop table if exists tbl_ice;
+drop table if exists tbl_ice_v2;
+
+create external table tbl_ice(a int, b string, c int) stored by iceberg stored
as orc tblproperties ('format-version'='1');
+create external table tbl_ice_v2(d int, e string, f int) stored by iceberg
stored as orc tblproperties ('format-version'='2');
+
+insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five
v2', 54);
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52),
(4, 'four', 53), (5, 'five', 54);
+
+create materialized view mat1
+stored by iceberg stored as orc
+ tblproperties ('rewriting.time.window' = '1min')
+as
+select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice
+join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52;
+
+!sleep 61;
+
+-- view should be empty
+select * from mat1;
+
+-- view is up-to-date, use it
+explain cbo
+select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on
tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52;
+
+
+
+-- view is outdated, cannot be used
Review Comment:
Based on the q.out file the plan of this second exlain cbo statement is
```
CBO PLAN:
HiveProject(tbl_ice.b=[$0], tbl_ice.c=[$1], tbl_ice_v2.e=[$2])
HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
```
so the MV is used.
##########
itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java:
##########
@@ -394,7 +396,6 @@ public void clearTablesCreatedDuringTests() throws
Exception {
conf.set("hive.metastore.filter.hook",
"org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl");
db = Hive.get(conf);
-
Review Comment:
Is this change intentional?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]