This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 3ca97725854 HIVE-27869: Iceberg: Select from HadoopTable fails at
HiveIcebergStorageHandler#canProvideColStats (Butao Zhang, reviewed by Denys
Kuzmenko)
3ca97725854 is described below
commit 3ca97725854df10e7cd67514a3219e996453eab1
Author: Butao Zhang <[email protected]>
AuthorDate: Wed Nov 15 23:22:56 2023 +0800
HIVE-27869: Iceberg: Select from HadoopTable fails at
HiveIcebergStorageHandler#canProvideColStats (Butao Zhang, reviewed by Denys
Kuzmenko)
Closes #4871
---
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 6 +++---
.../iceberg/mr/hive/TestHiveIcebergStatistics.java | 23 ++++++++++++++++++++++
2 files changed, 26 insertions(+), 3 deletions(-)
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 538c7316779..537ffd28d41 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -210,7 +210,7 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
private static final int PART_IDX = 0;
public static final String COPY_ON_WRITE = "copy-on-write";
public static final String MERGE_ON_READ = "merge-on-read";
- public static final String STATS = "/stats/";
+ public static final String STATS = "/stats/snap-";
/**
* Function template for producing a custom sort expression function:
@@ -538,7 +538,7 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
try {
FileSystem fs = statsPath.getFileSystem(conf);
return fs.exists(statsPath);
- } catch (IOException e) {
+ } catch (Exception e) {
LOG.warn("Exception when trying to find Iceberg column stats for
table:{} , snapshot:{} , " +
"statsPath: {} , stack trace: {}", table.name(),
table.currentSnapshot(), statsPath, e);
}
@@ -593,7 +593,7 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
}
private Path getColStatsPath(Table table, long snapshotId) {
- return new Path(table.location() + STATS + table.name() + snapshotId);
+ return new Path(table.location() + STATS + snapshotId);
}
private boolean removeColStatsIfExists(Table tbl) throws IOException {
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStatistics.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStatistics.java
index e09cc732e3b..d31ebcf80d1 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStatistics.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStatistics.java
@@ -21,6 +21,7 @@ package org.apache.iceberg.mr.hive;
import java.io.IOException;
import java.util.List;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.iceberg.PartitionSpec;
@@ -37,6 +38,8 @@ import org.junit.Assert;
import org.junit.Assume;
import org.junit.Test;
+import static org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.STATS;
+
/**
* Tests verifying correct statistics generation behaviour on Iceberg tables
triggered by: ANALYZE queries, inserts,
* CTAS, etc...
@@ -260,6 +263,26 @@ public class TestHiveIcebergStatistics extends
HiveIcebergStorageHandlerWithEngi
checkColStatMinMaxDistinctValue(identifier.name(), "customer_id", 0, 5, 6,
0);
}
+ @Test
+ public void testIcebergColStatsPath() throws IOException {
+ TableIdentifier identifier = TableIdentifier.of("default", "customers");
+
+ shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname,
true);
+ Table table = testTables.createTable(shell, identifier.name(),
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+ PartitionSpec.unpartitioned(), fileFormat, ImmutableList.of());
+
+ String insert =
testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS,
identifier, false);
+ shell.executeStatement(insert);
+
+ table.refresh();
+ Path tblColPath = new Path(table.location() + STATS +
table.currentSnapshot().snapshotId());
+ // Check that if colPath is created correctly
+
Assert.assertTrue(tblColPath.getFileSystem(shell.getHiveConf()).exists(tblColPath));
+ List<Object[]> result = shell.executeStatement("SELECT * FROM customers");
+
HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS,
+
HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
result));
+ }
+
private void checkColStat(String tableName, String colName, boolean
accurate) {
List<Object[]> rows = shell.executeStatement("DESCRIBE " + tableName + " "
+ colName);