This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new b9ada6d8b30 HIVE-28801: Iceberg: Refactor HMS table parameter setting
to be able to reuse (Zoltán Rátkai, reviewed by Denys Kuzmenko, Dmitriy
Fingerman)
b9ada6d8b30 is described below
commit b9ada6d8b30b325c91e6ab88a9deb7bb98b8a738
Author: Zoltan Ratkai <[email protected]>
AuthorDate: Tue Apr 8 16:46:10 2025 +0200
HIVE-28801: Iceberg: Refactor HMS table parameter setting to be able to
reuse (Zoltán Rátkai, reviewed by Denys Kuzmenko, Dmitriy Fingerman)
Closes #5676, Iceberg #12461
---
.../iceberg/hive/HMSTablePropertyHelper.java | 245 +++++++++++++++++++++
.../apache/iceberg/hive/HiveOperationsBase.java | 23 --
.../apache/iceberg/hive/HiveTableOperations.java | 168 +-------------
.../org/apache/iceberg/hive/TestHiveCatalog.java | 20 +-
.../iceberg/mr/hive/HiveIcebergMetaHook.java | 7 +-
.../iceberg/mr/hive/TestConflictingDataFiles.java | 12 +-
6 files changed, 269 insertions(+), 206 deletions(-)
diff --git
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HMSTablePropertyHelper.java
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HMSTablePropertyHelper.java
new file mode 100644
index 00000000000..2eaaaee1272
--- /dev/null
+++
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HMSTablePropertyHelper.java
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.hive;
+
+import java.util.Locale;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import
org.apache.hive.iceberg.com.fasterxml.jackson.core.JsonProcessingException;
+import org.apache.iceberg.BaseMetastoreTableOperations;
+import org.apache.iceberg.PartitionSpecParser;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SchemaParser;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.SnapshotSummary;
+import org.apache.iceberg.SortOrderParser;
+import org.apache.iceberg.TableMetadata;
+import org.apache.iceberg.TableProperties;
+import
org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
+import org.apache.iceberg.relocated.com.google.common.collect.BiMap;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableBiMap;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.util.JsonUtil;
+import org.apache.parquet.hadoop.ParquetOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.iceberg.TableProperties.GC_ENABLED;
+
+public class HMSTablePropertyHelper {
+ private static final Logger LOG =
LoggerFactory.getLogger(HMSTablePropertyHelper.class);
+ public static final String HIVE_ICEBERG_STORAGE_HANDLER =
"org.apache.iceberg.mr.hive.HiveIcebergStorageHandler";
+
+ private static final BiMap<String, String> ICEBERG_TO_HMS_TRANSLATION =
ImmutableBiMap.of(
+ // gc.enabled in Iceberg and external.table.purge in Hive are meant to
do the same things
+ // but with different names
+ GC_ENABLED, "external.table.purge", TableProperties.PARQUET_COMPRESSION,
ParquetOutputFormat.COMPRESSION,
+ TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES,
ParquetOutputFormat.BLOCK_SIZE);
+
+ private HMSTablePropertyHelper() {
+ }
+
+ /**
+ * Provides key translation where necessary between Iceberg and HMS props.
This translation is needed because some
+ * properties control the same behaviour but are named differently in
Iceberg and Hive. Therefore changes to these
+ * property pairs should be synchronized.
+ *
+ * Example: Deleting data files upon DROP TABLE is enabled using
gc.enabled=true in Iceberg and
+ * external.table.purge=true in Hive. Hive and Iceberg users are unaware of
each other's control flags, therefore
+ * inconsistent behaviour can occur from e.g. a Hive user's point of view if
external.table.purge=true is set on the
+ * HMS table but gc.enabled=false is set on the Iceberg table, resulting in
no data file deletion.
+ *
+ * @param hmsProp The HMS property that should be translated to Iceberg
property
+ * @return Iceberg property equivalent to the hmsProp. If no such
translation exists, the original hmsProp is returned
+ */
+ public static String translateToIcebergProp(String hmsProp) {
+ return ICEBERG_TO_HMS_TRANSLATION.inverse().getOrDefault(hmsProp, hmsProp);
+ }
+ /** Updates the HMS Table properties based on the Iceberg Table metadata. */
+ public static void updateHmsTableForIcebergTable(
+ String newMetadataLocation,
+ Table tbl,
+ TableMetadata metadata,
+ Set<String> obsoleteProps,
+ boolean hiveEngineEnabled,
+ long maxHiveTablePropertySize,
+ String currentLocation) {
+ Map<String, String> parameters =
+ Optional.ofNullable(tbl.getParameters()).orElseGet(Maps::newHashMap);
+ Map<String, String> summary =
+ Optional.ofNullable(metadata.currentSnapshot())
+ .map(Snapshot::summary)
+ .orElseGet(ImmutableMap::of);
+ // push all Iceberg table properties into HMS
+ metadata.properties().entrySet().stream()
+ .filter(entry ->
!entry.getKey().equalsIgnoreCase(HiveCatalog.HMS_TABLE_OWNER))
+ .forEach(
+ entry -> {
+ String key = entry.getKey();
+ // translate key names between Iceberg and HMS where needed
+ String hmsKey = ICEBERG_TO_HMS_TRANSLATION.getOrDefault(key,
key);
+ parameters.put(hmsKey, entry.getValue());
+ });
+ setCommonParameters(
+ newMetadataLocation,
+ metadata.uuid(),
+ obsoleteProps,
+ currentLocation,
+ parameters,
+
BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toUpperCase(Locale.ENGLISH),
+ metadata.schema(),
+ maxHiveTablePropertySize);
+ setStorageHandler(parameters, hiveEngineEnabled);
+
+ // Set the basic statistics
+ if (summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP) != null) {
+ parameters.put(StatsSetupConst.NUM_FILES,
summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
+ }
+ if (summary.get(SnapshotSummary.TOTAL_RECORDS_PROP) != null) {
+ parameters.put(StatsSetupConst.ROW_COUNT,
summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
+ }
+ if (summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP) != null) {
+ parameters.put(StatsSetupConst.TOTAL_SIZE,
summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
+ }
+
+ setSnapshotStats(metadata, parameters, maxHiveTablePropertySize);
+ setPartitionSpec(metadata, parameters, maxHiveTablePropertySize);
+ setSortOrder(metadata, parameters, maxHiveTablePropertySize);
+
+ tbl.setParameters(parameters);
+ }
+
+ private static void setCommonParameters(
+ String newMetadataLocation,
+ String uuid,
+ Set<String> obsoleteProps,
+ String currentLocation,
+ Map<String, String> parameters,
+ String tableType,
+ Schema schema,
+ long maxHiveTablePropertySize) {
+ if (uuid != null) {
+ parameters.put(TableProperties.UUID, uuid);
+ }
+
+ obsoleteProps.forEach(parameters::remove);
+
+ parameters.put(BaseMetastoreTableOperations.TABLE_TYPE_PROP, tableType);
+ parameters.put(BaseMetastoreTableOperations.METADATA_LOCATION_PROP,
newMetadataLocation);
+
+ if (currentLocation != null && !currentLocation.isEmpty()) {
+
parameters.put(BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP,
currentLocation);
+ }
+
+ setSchema(schema, parameters, maxHiveTablePropertySize);
+ }
+
+ @VisibleForTesting
+ static void setStorageHandler(Map<String, String> parameters, boolean
hiveEngineEnabled) {
+ // If needed set the 'storage_handler' property to enable query from Hive
+ if (hiveEngineEnabled) {
+ parameters.put(hive_metastoreConstants.META_TABLE_STORAGE,
HIVE_ICEBERG_STORAGE_HANDLER);
+ } else {
+ parameters.remove(hive_metastoreConstants.META_TABLE_STORAGE);
+ }
+ }
+
+ @VisibleForTesting
+ static void setSnapshotStats(TableMetadata metadata, Map<String, String>
parameters, long maxHiveTablePropertySize) {
+ parameters.remove(TableProperties.CURRENT_SNAPSHOT_ID);
+ parameters.remove(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP);
+ parameters.remove(TableProperties.CURRENT_SNAPSHOT_SUMMARY);
+
+ Snapshot currentSnapshot = metadata.currentSnapshot();
+ if (exposeInHmsProperties(maxHiveTablePropertySize) && currentSnapshot !=
null) {
+ parameters.put(TableProperties.CURRENT_SNAPSHOT_ID,
String.valueOf(currentSnapshot.snapshotId()));
+ parameters.put(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP,
String.valueOf(currentSnapshot.timestampMillis()));
+ setSnapshotSummary(parameters, currentSnapshot,
maxHiveTablePropertySize);
+ }
+
+ parameters.put(TableProperties.SNAPSHOT_COUNT,
String.valueOf(metadata.snapshots().size()));
+ }
+
+ @VisibleForTesting
+ static void setSnapshotSummary(
+ Map<String, String> parameters,
+ Snapshot currentSnapshot,
+ long maxHiveTablePropertySize) {
+ try {
+ String summary =
JsonUtil.mapper().writeValueAsString(currentSnapshot.summary());
+ if (summary.length() <= maxHiveTablePropertySize) {
+ parameters.put(TableProperties.CURRENT_SNAPSHOT_SUMMARY, summary);
+ } else {
+ LOG.warn("Not exposing the current snapshot({}) summary in HMS since
it exceeds {} characters",
+ currentSnapshot.snapshotId(), maxHiveTablePropertySize);
+ }
+ } catch (JsonProcessingException e) {
+ LOG.warn("Failed to convert current snapshot({}) summary to a json
string", currentSnapshot.snapshotId(), e);
+ }
+ }
+
+ @VisibleForTesting
+ static void setPartitionSpec(TableMetadata metadata, Map<String, String>
parameters, long maxHiveTablePropertySize) {
+ parameters.remove(TableProperties.DEFAULT_PARTITION_SPEC);
+ if (exposeInHmsProperties(maxHiveTablePropertySize) && metadata.spec() !=
null && metadata.spec().isPartitioned()) {
+ String spec = PartitionSpecParser.toJson(metadata.spec());
+ setField(parameters, TableProperties.DEFAULT_PARTITION_SPEC, spec,
maxHiveTablePropertySize);
+ }
+ }
+
+ @VisibleForTesting
+ static void setSortOrder(TableMetadata metadata, Map<String, String>
parameters, long maxHiveTablePropertySize) {
+ parameters.remove(TableProperties.DEFAULT_SORT_ORDER);
+ if (exposeInHmsProperties(maxHiveTablePropertySize) &&
+ metadata.sortOrder() != null &&
+ metadata.sortOrder().isSorted()) {
+ String sortOrder = SortOrderParser.toJson(metadata.sortOrder());
+ setField(parameters, TableProperties.DEFAULT_SORT_ORDER, sortOrder,
maxHiveTablePropertySize);
+ }
+ }
+
+ public static void setSchema(Schema schema, Map<String, String> parameters,
long maxHiveTablePropertySize) {
+ parameters.remove(TableProperties.CURRENT_SCHEMA);
+ if (exposeInHmsProperties(maxHiveTablePropertySize) && schema != null) {
+ String jsonSchema = SchemaParser.toJson(schema);
+ setField(parameters, TableProperties.CURRENT_SCHEMA, jsonSchema,
maxHiveTablePropertySize);
+ }
+ }
+
+ private static void setField(
+ Map<String, String> parameters,
+ String key, String value,
+ long maxHiveTablePropertySize) {
+ if (value.length() <= maxHiveTablePropertySize) {
+ parameters.put(key, value);
+ } else {
+ LOG.warn("Not exposing {} in HMS since it exceeds {} characters", key,
maxHiveTablePropertySize);
+ }
+ }
+
+ private static boolean exposeInHmsProperties(long maxHiveTablePropertySize) {
+ return maxHiveTablePropertySize > 0;
+ }
+}
diff --git
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveOperationsBase.java
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveOperationsBase.java
index a24548290e2..e8f9c59d6de 100644
---
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveOperationsBase.java
+++
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveOperationsBase.java
@@ -31,9 +31,7 @@
import org.apache.iceberg.BaseMetastoreTableOperations;
import org.apache.iceberg.ClientPool;
import org.apache.iceberg.Schema;
-import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.TableMetadata;
-import org.apache.iceberg.TableProperties;
import org.apache.iceberg.exceptions.NoSuchIcebergTableException;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
@@ -100,27 +98,6 @@ default Map<String, String> hmsEnvContext(String
metadataLocation) {
metadataLocation);
}
- default boolean exposeInHmsProperties() {
- return maxHiveTablePropertySize() > 0;
- }
-
- default void setSchema(Schema schema, Map<String, String> parameters) {
- parameters.remove(TableProperties.CURRENT_SCHEMA);
- if (exposeInHmsProperties() && schema != null) {
- String jsonSchema = SchemaParser.toJson(schema);
- setField(parameters, TableProperties.CURRENT_SCHEMA, jsonSchema);
- }
- }
-
- default void setField(Map<String, String> parameters, String key, String
value) {
- if (value.length() <= maxHiveTablePropertySize()) {
- parameters.put(key, value);
- } else {
- LOG.warn(
- "Not exposing {} in HMS since it exceeds {} characters", key,
maxHiveTablePropertySize());
- }
- }
-
static void validateTableIsIceberg(Table table, String fullName) {
String tableType =
table.getParameters().get(BaseMetastoreTableOperations.TABLE_TYPE_PROP);
NoSuchIcebergTableException.check(
diff --git
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
index b7142a5f491..cb84a6a57cd 100644
---
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
+++
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
@@ -20,10 +20,8 @@
package org.apache.iceberg.hive;
import java.util.Collections;
-import java.util.Locale;
import java.util.Map;
import java.util.Objects;
-import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
@@ -33,15 +31,9 @@
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
-import
org.apache.hive.iceberg.com.fasterxml.jackson.core.JsonProcessingException;
import org.apache.iceberg.BaseMetastoreOperations;
import org.apache.iceberg.BaseMetastoreTableOperations;
import org.apache.iceberg.ClientPool;
-import org.apache.iceberg.PartitionSpecParser;
-import org.apache.iceberg.Snapshot;
-import org.apache.iceberg.SnapshotSummary;
-import org.apache.iceberg.SortOrderParser;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.exceptions.AlreadyExistsException;
@@ -52,19 +44,11 @@
import org.apache.iceberg.hadoop.ConfigProperties;
import org.apache.iceberg.io.FileIO;
import
org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
-import org.apache.iceberg.relocated.com.google.common.collect.BiMap;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableBiMap;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.relocated.com.google.common.collect.Maps;
-import org.apache.iceberg.util.JsonUtil;
import org.apache.iceberg.util.PropertyUtil;
-import org.apache.parquet.hadoop.ParquetOutputFormat;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static org.apache.iceberg.TableProperties.GC_ENABLED;
-
/**
* TODO we should be able to extract some more commonalities to
BaseMetastoreTableOperations to
* avoid code duplication between this class and Metacat Tables.
@@ -77,31 +61,6 @@ public class HiveTableOperations extends
BaseMetastoreTableOperations
"iceberg.hive.metadata-refresh-max-retries";
private static final int HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES_DEFAULT =
2;
- private static final BiMap<String, String> ICEBERG_TO_HMS_TRANSLATION =
- ImmutableBiMap.of(
- // gc.enabled in Iceberg and external.table.purge in Hive are meant
to do the same things
- // but with different names
- GC_ENABLED, "external.table.purge",
- TableProperties.PARQUET_COMPRESSION, ParquetOutputFormat.COMPRESSION,
- TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES,
ParquetOutputFormat.BLOCK_SIZE);
-
- /**
- * Provides key translation where necessary between Iceberg and HMS props.
This translation is needed because some
- * properties control the same behaviour but are named differently in
Iceberg and Hive. Therefore changes to these
- * property pairs should be synchronized.
- *
- * Example: Deleting data files upon DROP TABLE is enabled using
gc.enabled=true in Iceberg and
- * external.table.purge=true in Hive. Hive and Iceberg users are unaware of
each other's control flags, therefore
- * inconsistent behaviour can occur from e.g. a Hive user's point of view if
external.table.purge=true is set on the
- * HMS table but gc.enabled=false is set on the Iceberg table, resulting in
no data file deletion.
- *
- * @param hmsProp The HMS property that should be translated to Iceberg
property
- * @return Iceberg property equivalent to the hmsProp. If no such
translation exists, the original hmsProp is returned
- */
- public static String translateToIcebergProp(String hmsProp) {
- return ICEBERG_TO_HMS_TRANSLATION.inverse().getOrDefault(hmsProp, hmsProp);
- }
-
private final String fullName;
private final String catalogName;
private final String database;
@@ -220,13 +179,14 @@ protected void doCommit(TableMetadata base, TableMetadata
metadata) {
.filter(key -> !metadata.properties().containsKey(key))
.collect(Collectors.toSet());
}
-
- Map<String, String> summary =
- Optional.ofNullable(metadata.currentSnapshot())
- .map(Snapshot::summary)
- .orElseGet(ImmutableMap::of);
- setHmsTableParameters(
- newMetadataLocation, tbl, metadata, removedProps, hiveEngineEnabled,
summary);
+ HMSTablePropertyHelper.updateHmsTableForIcebergTable(
+ newMetadataLocation,
+ tbl,
+ metadata,
+ removedProps,
+ hiveEngineEnabled,
+ maxHiveTablePropertySize,
+ currentMetadataLocation());
if (!keepHiveStats) {
StatsSetupConst.setBasicStatsState(tbl.getParameters(),
StatsSetupConst.FALSE);
@@ -306,118 +266,6 @@ protected void doCommit(TableMetadata base, TableMetadata
metadata) {
LOG.info("Committed to table {} with the new metadata location {}",
fullName, newMetadataLocation);
}
- private void setHmsTableParameters(String newMetadataLocation, Table tbl,
TableMetadata metadata,
- Set<String> obsoleteProps, boolean hiveEngineEnabled,
- Map<String, String> summary) {
- Map<String, String> parameters = Optional.ofNullable(tbl.getParameters())
- .orElseGet(Maps::newHashMap);
-
- // push all Iceberg table properties into HMS
- metadata.properties().entrySet().stream()
- .filter(entry ->
!entry.getKey().equalsIgnoreCase(HiveCatalog.HMS_TABLE_OWNER))
- .forEach(
- entry -> {
- String key = entry.getKey();
- // translate key names between Iceberg and HMS where needed
- String hmsKey = ICEBERG_TO_HMS_TRANSLATION.getOrDefault(key,
key);
- parameters.put(hmsKey, entry.getValue());
- });
- if (metadata.uuid() != null) {
- parameters.put(TableProperties.UUID, metadata.uuid());
- }
-
- // remove any props from HMS that are no longer present in Iceberg table
props
- obsoleteProps.forEach(parameters::remove);
-
- parameters.put(TABLE_TYPE_PROP,
ICEBERG_TABLE_TYPE_VALUE.toUpperCase(Locale.ENGLISH));
- parameters.put(METADATA_LOCATION_PROP, newMetadataLocation);
-
- if (currentMetadataLocation() != null &&
!currentMetadataLocation().isEmpty()) {
- parameters.put(PREVIOUS_METADATA_LOCATION_PROP,
currentMetadataLocation());
- }
-
- setStorageHandler(parameters, hiveEngineEnabled);
-
- // Set the basic statistics
- if (summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP) != null) {
- parameters.put(StatsSetupConst.NUM_FILES,
summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
- }
- if (summary.get(SnapshotSummary.TOTAL_RECORDS_PROP) != null) {
- parameters.put(StatsSetupConst.ROW_COUNT,
summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
- }
- if (summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP) != null) {
- parameters.put(StatsSetupConst.TOTAL_SIZE,
summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
- }
-
- setSnapshotStats(metadata, parameters);
- setSchema(metadata.schema(), parameters);
- setPartitionSpec(metadata, parameters);
- setSortOrder(metadata, parameters);
-
- tbl.setParameters(parameters);
- }
-
- private static void setStorageHandler(Map<String, String> parameters,
boolean hiveEngineEnabled) {
- // If needed set the 'storage_handler' property to enable query from Hive
- if (hiveEngineEnabled) {
- parameters.put(hive_metastoreConstants.META_TABLE_STORAGE,
HiveOperationsBase.HIVE_ICEBERG_STORAGE_HANDLER);
- } else {
- parameters.remove(hive_metastoreConstants.META_TABLE_STORAGE);
- }
- }
-
- @VisibleForTesting
- void setSnapshotStats(TableMetadata metadata, Map<String, String>
parameters) {
- parameters.remove(TableProperties.CURRENT_SNAPSHOT_ID);
- parameters.remove(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP);
- parameters.remove(TableProperties.CURRENT_SNAPSHOT_SUMMARY);
-
- Snapshot currentSnapshot = metadata.currentSnapshot();
- if (exposeInHmsProperties() && currentSnapshot != null) {
- parameters.put(
- TableProperties.CURRENT_SNAPSHOT_ID,
String.valueOf(currentSnapshot.snapshotId()));
- parameters.put(
- TableProperties.CURRENT_SNAPSHOT_TIMESTAMP,
String.valueOf(currentSnapshot.timestampMillis()));
- setSnapshotSummary(parameters, currentSnapshot);
- }
-
- parameters.put(TableProperties.SNAPSHOT_COUNT,
String.valueOf(metadata.snapshots().size()));
- }
-
- @VisibleForTesting
- void setSnapshotSummary(Map<String, String> parameters, Snapshot
currentSnapshot) {
- try {
- String summary =
JsonUtil.mapper().writeValueAsString(currentSnapshot.summary());
- if (summary.length() <= maxHiveTablePropertySize) {
- parameters.put(TableProperties.CURRENT_SNAPSHOT_SUMMARY, summary);
- } else {
- LOG.warn("Not exposing the current snapshot({}) summary in HMS since
it exceeds {} characters",
- currentSnapshot.snapshotId(), maxHiveTablePropertySize);
- }
- } catch (JsonProcessingException e) {
- LOG.warn("Failed to convert current snapshot({}) summary to a json
string",
- currentSnapshot.snapshotId(), e);
- }
- }
-
- @VisibleForTesting
- void setPartitionSpec(TableMetadata metadata, Map<String, String>
parameters) {
- parameters.remove(TableProperties.DEFAULT_PARTITION_SPEC);
- if (exposeInHmsProperties() && metadata.spec() != null &&
metadata.spec().isPartitioned()) {
- String spec = PartitionSpecParser.toJson(metadata.spec());
- setField(parameters, TableProperties.DEFAULT_PARTITION_SPEC, spec);
- }
- }
-
- @VisibleForTesting
- void setSortOrder(TableMetadata metadata, Map<String, String> parameters) {
- parameters.remove(TableProperties.DEFAULT_SORT_ORDER);
- if (exposeInHmsProperties() && metadata.sortOrder() != null &&
metadata.sortOrder().isSorted()) {
- String sortOrder = SortOrderParser.toJson(metadata.sortOrder());
- setField(parameters, TableProperties.DEFAULT_SORT_ORDER, sortOrder);
- }
- }
-
@Override
public long maxHiveTablePropertySize() {
return maxHiveTablePropertySize;
diff --git
a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java
b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java
index eb73915f5c7..4b59b46c3bf 100644
---
a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java
+++
b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java
@@ -1041,9 +1041,6 @@ public void testSnapshotStatsTableProperties() throws
Exception {
@Test
public void testSetSnapshotSummary() throws Exception {
- Configuration conf = new Configuration();
- conf.set("iceberg.hive.table-property-max-size", "4000");
- HiveTableOperations ops = new HiveTableOperations(conf, null, null,
catalog.name(), DB_NAME, "tbl");
Snapshot snapshot = mock(Snapshot.class);
Map<String, String> summary = Maps.newHashMap();
when(snapshot.summary()).thenReturn(summary);
@@ -1054,7 +1051,8 @@ public void testSetSnapshotSummary() throws Exception {
}
assertThat(JsonUtil.mapper().writeValueAsString(summary).length()).isLessThan(4000);
Map<String, String> parameters = Maps.newHashMap();
- ops.setSnapshotSummary(parameters, snapshot);
+ final long maxHiveTablePropertySize = 4000;
+ HMSTablePropertyHelper.setSnapshotSummary(parameters, snapshot,
maxHiveTablePropertySize);
assertThat(parameters).as("The snapshot summary must be in
parameters").hasSize(1);
// create a snapshot summary whose json string size exceeds the limit
@@ -1065,7 +1063,7 @@ public void testSetSnapshotSummary() throws Exception {
// the limit has been updated to 4000 instead of the default value(32672)
assertThat(summarySize).isGreaterThan(4000).isLessThan(32672);
parameters.remove(CURRENT_SNAPSHOT_SUMMARY);
- ops.setSnapshotSummary(parameters, snapshot);
+ HMSTablePropertyHelper.setSnapshotSummary(parameters, snapshot,
maxHiveTablePropertySize);
assertThat(parameters)
.as("The snapshot summary must not be in parameters due to the size
limit")
.isEmpty();
@@ -1073,9 +1071,7 @@ public void testSetSnapshotSummary() throws Exception {
@Test
public void testNotExposeTableProperties() {
- Configuration conf = new Configuration();
- conf.set("iceberg.hive.table-property-max-size", "0");
- HiveTableOperations ops = new HiveTableOperations(conf, null, null,
catalog.name(), DB_NAME, "tbl");
+ final long maxHiveTablePropertySize = 0;
TableMetadata metadata = mock(TableMetadata.class);
Map<String, String> parameters = Maps.newHashMap();
parameters.put(CURRENT_SNAPSHOT_SUMMARY, "summary");
@@ -1085,19 +1081,19 @@ public void testNotExposeTableProperties() {
parameters.put(DEFAULT_PARTITION_SPEC, "partitionSpec");
parameters.put(DEFAULT_SORT_ORDER, "sortOrder");
- ops.setSnapshotStats(metadata, parameters);
+ HMSTablePropertyHelper.setSnapshotStats(metadata, parameters,
maxHiveTablePropertySize);
assertThat(parameters)
.doesNotContainKey(CURRENT_SNAPSHOT_SUMMARY)
.doesNotContainKey(CURRENT_SNAPSHOT_ID)
.doesNotContainKey(CURRENT_SNAPSHOT_TIMESTAMP);
- ops.setSchema(metadata.schema(), parameters);
+ HMSTablePropertyHelper.setSchema(metadata.schema(), parameters,
maxHiveTablePropertySize);
assertThat(parameters).doesNotContainKey(CURRENT_SCHEMA);
- ops.setPartitionSpec(metadata, parameters);
+ HMSTablePropertyHelper.setPartitionSpec(metadata, parameters,
maxHiveTablePropertySize);
assertThat(parameters).doesNotContainKey(DEFAULT_PARTITION_SPEC);
- ops.setSortOrder(metadata, parameters);
+ HMSTablePropertyHelper.setSortOrder(metadata, parameters,
maxHiveTablePropertySize);
assertThat(parameters).doesNotContainKey(DEFAULT_SORT_ORDER);
}
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 6bb5f27e76e..65368460c09 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -118,6 +118,7 @@
import org.apache.iceberg.expressions.UnboundPredicate;
import org.apache.iceberg.expressions.UnboundTerm;
import org.apache.iceberg.hive.CachedClientPool;
+import org.apache.iceberg.hive.HMSTablePropertyHelper;
import org.apache.iceberg.hive.HiveLock;
import org.apache.iceberg.hive.HiveSchemaUtil;
import org.apache.iceberg.hive.HiveTableOperations;
@@ -845,7 +846,7 @@ private static Properties
getCatalogProperties(org.apache.hadoop.hive.metastore.
hmsTable.getParameters().entrySet().stream().filter(e -> e.getKey() !=
null && e.getValue() != null).forEach(e -> {
// translate key names between HMS and Iceberg where needed
- String icebergKey =
HiveTableOperations.translateToIcebergProp(e.getKey());
+ String icebergKey =
HMSTablePropertyHelper.translateToIcebergProp(e.getKey());
properties.put(icebergKey, e.getValue());
});
@@ -862,7 +863,7 @@ private static Properties
getCatalogProperties(org.apache.hadoop.hive.metastore.
if (serdeInfo != null) {
serdeInfo.getParameters().entrySet().stream()
.filter(e -> e.getKey() != null && e.getValue() != null).forEach(e
-> {
- String icebergKey =
HiveTableOperations.translateToIcebergProp(e.getKey());
+ String icebergKey =
HMSTablePropertyHelper.translateToIcebergProp(e.getKey());
properties.put(icebergKey, e.getValue());
});
}
@@ -1130,7 +1131,7 @@ public void
postGetTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
// Check if META_TABLE_STORAGE is not present or is not an instance of
ICEBERG_STORAGE_HANDLER
if (storageHandler == null ||
!isHiveIcebergStorageHandler(storageHandler)) {
hmsTable.getParameters()
- .put(hive_metastoreConstants.META_TABLE_STORAGE,
HiveTableOperations.HIVE_ICEBERG_STORAGE_HANDLER);
+ .put(hive_metastoreConstants.META_TABLE_STORAGE,
HMSTablePropertyHelper.HIVE_ICEBERG_STORAGE_HANDLER);
}
} catch (NoSuchTableException | NotFoundException ex) {
// If the table doesn't exist, ignore throwing exception from here
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestConflictingDataFiles.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestConflictingDataFiles.java
index c98f0cb9b34..fb5b44eb062 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestConflictingDataFiles.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestConflictingDataFiles.java
@@ -20,10 +20,8 @@
package org.apache.iceberg.mr.hive;
-import java.lang.reflect.Method;
import java.util.Collections;
import java.util.List;
-import java.util.Map;
import java.util.concurrent.Executors;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.iceberg.FileFormat;
@@ -47,8 +45,6 @@
import static
org.apache.iceberg.mr.hive.HiveIcebergStorageHandlerTestUtils.init;
import static org.apache.iceberg.types.Types.NestedField.required;
-import static org.mockito.ArgumentMatchers.anyMap;
-import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.CALLS_REAL_METHODS;
import static org.mockito.Mockito.mockStatic;
@@ -62,12 +58,12 @@ public void setUpTables() throws NoSuchMethodException {
PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name")
.bucket("customer_id", 16).build();
- Method method =
HiveTableOperations.class.getDeclaredMethod("setStorageHandler", Map.class,
Boolean.TYPE);
- method.setAccessible(true);
+// Method method =
HiveTableOperations.class.getDeclaredMethod("setStorageHandler", Map.class,
Boolean.TYPE);
+// method.setAccessible(true);
try (MockedStatic<HiveTableOperations> tableOps =
mockStatic(HiveTableOperations.class, CALLS_REAL_METHODS)) {
- tableOps.when(() -> method.invoke(null, anyMap(), eq(true)))
- .thenAnswer(invocation -> null);
+// tableOps.when(() -> method.invoke(null, anyMap(), eq(true)))
+// .thenAnswer(invocation -> null);
// create and insert an initial batch of records
testTables.createTable(shell, "customers",
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec,
fileFormat,
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2,
Collections.emptyMap(),