This is an automated email from the ASF dual-hosted git repository.
mbod pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 3f2067f HIVE-25581: Iceberg storage handler should set common
projection pruning config (Marton Bod, reviewed by Peter Vary and Adam Szita)
3f2067f is described below
commit 3f2067f02c86460030948d930379c168c40eca87
Author: Marton Bod <[email protected]>
AuthorDate: Sat Oct 2 19:29:44 2021 +0200
HIVE-25581: Iceberg storage handler should set common projection pruning
config (Marton Bod, reviewed by Peter Vary and Adam Szita)
Iceberg queries always need "tez.mrreader.config.update.properties" to be
set in order for projection pruning to work. Currently it's only set as part of
the TestHiveShell setup for unit tests. We should ensure it's set by the
Iceberg storage
handler by default for all Iceberg queries.
---
data/conf/iceberg/llap/tez-site.xml | 4 ----
data/conf/iceberg/tez/tez-site.xml | 4 ----
.../java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java | 5 +++++
.../src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java | 3 ---
4 files changed, 5 insertions(+), 11 deletions(-)
diff --git a/data/conf/iceberg/llap/tez-site.xml
b/data/conf/iceberg/llap/tez-site.xml
index 779886f..7ad5ad4 100644
--- a/data/conf/iceberg/llap/tez-site.xml
+++ b/data/conf/iceberg/llap/tez-site.xml
@@ -8,10 +8,6 @@
<value>org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled</value>
</property>
<property>
- <name>tez.mrreader.config.update.properties</name>
- <value>hive.io.file.readcolumn.names,hive.io.file.readcolumn.ids</value>
- </property>
- <property>
<name>tez.am.resource.memory.mb</name>
<value>256</value>
</property>
diff --git a/data/conf/iceberg/tez/tez-site.xml
b/data/conf/iceberg/tez/tez-site.xml
index 779886f..7ad5ad4 100644
--- a/data/conf/iceberg/tez/tez-site.xml
+++ b/data/conf/iceberg/tez/tez-site.xml
@@ -8,10 +8,6 @@
<value>org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled</value>
</property>
<property>
- <name>tez.mrreader.config.update.properties</name>
- <value>hive.io.file.readcolumn.names,hive.io.file.readcolumn.ids</value>
- </property>
- <property>
<name>tez.am.resource.memory.mb</name>
<value>256</value>
</property>
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 5df64bf..7a357af 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -164,6 +164,7 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
+ setCommonJobConf(jobConf);
if (tableDesc != null && tableDesc.getProperties() != null &&
tableDesc.getProperties().get(WRITE_KEY) != null) {
String tableName = tableDesc.getTableName();
@@ -359,6 +360,10 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
return IcebergMetadataTables.isValidMetaTable(metaTableName);
}
+ private void setCommonJobConf(JobConf jobConf) {
+ jobConf.set("tez.mrreader.config.update.properties",
"hive.io.file.readcolumn.names,hive.io.file.readcolumn.ids");
+ }
+
public boolean
addDynamicSplitPruningEdge(org.apache.hadoop.hive.ql.metadata.Table table,
ExprNodeDesc
syntheticFilterPredicate) {
try {
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
index 3d39889..8538cb1 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
@@ -214,9 +214,6 @@ public class TestHiveShell {
// set to true so that the Tez session will create an empty jar for
localization
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_IN_TEST_IDE, true);
- // enables vectorization on Tez
- hiveConf.set("tez.mrreader.config.update.properties",
"hive.io.file.readcolumn.names,hive.io.file.readcolumn.ids");
-
// set lifecycle hooks
hiveConf.setVar(HiveConf.ConfVars.HIVE_QUERY_LIFETIME_HOOKS,
HiveIcebergQueryLifeTimeHook.class.getName());