This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d2ae8a3330b HIVE-28299: Iceberg: Optimize show partitions through
column projection (Butao Zhang, reviewed by Denys Kuzmenko)
d2ae8a3330b is described below
commit d2ae8a3330bc1b2fa9a4c9b7b33726ab846c865a
Author: Butao Zhang <[email protected]>
AuthorDate: Thu Jun 20 20:36:12 2024 +0800
HIVE-28299: Iceberg: Optimize show partitions through column projection
(Butao Zhang, reviewed by Denys Kuzmenko)
Closed #5276
---
common/src/java/org/apache/hadoop/hive/conf/Constants.java | 5 +----
.../java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java | 4 +++-
.../src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java | 2 +-
3 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java
b/common/src/java/org/apache/hadoop/hive/conf/Constants.java
index dca36c204d4..efbee20c558 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java
@@ -104,13 +104,10 @@ public class Constants {
public static final String HTTP_HEADER_REQUEST_TRACK = "X-Request-ID";
public static final String TIME_POSTFIX_REQUEST_TRACK = "_TIME";
-
- public static final String ICEBERG_PARTITION_TABLE_SCHEMA =
"partition,spec_id,record_count,file_count," +
-
"position_delete_record_count,position_delete_file_count,equality_delete_record_count,"
+
-
"equality_delete_file_count,last_updated_at,total_data_file_size_in_bytes,last_updated_snapshot_id";
public static final String DELIMITED_JSON_SERDE =
"org.apache.hadoop.hive.serde2.DelimitedJSONSerDe";
public static final String CLUSTER_ID_ENV_VAR_NAME = "HIVE_CLUSTER_ID";
public static final String CLUSTER_ID_CLI_OPT_NAME = "hive.cluster.id";
public static final String CLUSTER_ID_HIVE_CONF_PROP = "hive.cluster.id";
+ public static final String ICEBERG_PARTITION_COLUMNS = "partition,spec_id";
}
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 7ccb4bfb3bf..9ed58c3f059 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -58,6 +58,7 @@ import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
@@ -1804,7 +1805,8 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
fetcher.initialize(job, HiveTableUtil.getSerializationProps());
org.apache.hadoop.hive.ql.metadata.Table metaDataPartTable =
context.getDb().getTable(hmstbl.getDbName(), hmstbl.getTableName(),
"partitions", true);
- Deserializer currSerDe = metaDataPartTable.getDeserializer();
+ Deserializer currSerDe = HiveMetaStoreUtils.getDeserializer(job,
metaDataPartTable.getTTable(),
+ metaDataPartTable.getMetaTable(), false);
ObjectMapper mapper = new ObjectMapper();
Table tbl = getTable(hmstbl);
while (reader.next(key, value)) {
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
index 510f562922b..439639167f8 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
@@ -302,7 +302,7 @@ public class HiveTableUtil {
static JobConf getPartJobConf(Configuration confs,
org.apache.hadoop.hive.ql.metadata.Table tbl) {
JobConf job = new JobConf(confs);
- job.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
Constants.ICEBERG_PARTITION_TABLE_SCHEMA);
+ job.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
Constants.ICEBERG_PARTITION_COLUMNS);
job.set(InputFormatConfig.TABLE_LOCATION, tbl.getPath().toString());
job.set(InputFormatConfig.TABLE_IDENTIFIER, tbl.getFullyQualifiedName() +
".partitions");
HiveConf.setVar(job, HiveConf.ConfVars.HIVE_FETCH_OUTPUT_SERDE,
Constants.DELIMITED_JSON_SERDE);