This is an automated email from the ASF dual-hosted git repository.
szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 7b600fe HIVE-25563: Iceberg table operations hang a long time if
metadata is missing/corrupted (Adam Szita, reviewed by Marton Bod)
7b600fe is described below
commit 7b600fe38f03b9790b193171a65e57f6a6970820
Author: Adam Szita <[email protected]>
AuthorDate: Mon Oct 4 10:21:08 2021 +0200
HIVE-25563: Iceberg table operations hang a long time if metadata is
missing/corrupted (Adam Szita, reviewed by Marton Bod)
---
.../src/java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++++
.../org/apache/iceberg/hive/HiveTableOperations.java | 4 +++-
.../org/apache/iceberg/hive/HiveTableBaseTest.java | 2 +-
.../java/org/apache/iceberg/hive/HiveTableTest.java | 18 ++++++++++++++++++
4 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 903a803..cf96fff 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -5593,6 +5593,10 @@ public class HiveConf extends Configuration {
HIVE_SERVER2_ICEBERG_METADATA_GENERATOR_THREADS("hive.server2.iceberg.metadata.generator.threads",
10,
"Number of threads used to scan partition directories for data files
and update/generate iceberg metadata"),
+
HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES("hive.iceberg.metadata.refresh.max.retries",
2,
+ "Max retry count for trying to access the metadata location in order
to refresh metadata during " +
+ " Iceberg table load."),
+
/* BLOBSTORE section */
HIVE_BLOBSTORE_SUPPORTED_SCHEMES("hive.blobstore.supported.schemes",
"s3,s3a,s3n",
diff --git
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
index d7533e0..386d9ff 100644
---
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
+++
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
@@ -32,6 +32,7 @@ import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
@@ -180,7 +181,8 @@ public class HiveTableOperations extends
BaseMetastoreTableOperations {
throw new RuntimeException("Interrupted during refresh", e);
}
- refreshFromMetadataLocation(metadataLocation);
+ refreshFromMetadataLocation(metadataLocation, HiveConf.getIntVar(conf,
+ HiveConf.ConfVars.HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES));
}
@SuppressWarnings("checkstyle:CyclomaticComplexity")
diff --git
a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableBaseTest.java
b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableBaseTest.java
index 76f2192..b584b9e 100644
---
a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableBaseTest.java
+++
b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableBaseTest.java
@@ -79,7 +79,7 @@ public class HiveTableBaseTest extends HiveMetastoreTest {
return getTableLocationPath(tableName).toString();
}
- private static String metadataLocation(String tableName) {
+ protected static String metadataLocation(String tableName) {
return Paths.get(getTableBasePath(tableName), "metadata").toString();
}
diff --git
a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java
b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java
index fa67e5d..6804fc8 100644
---
a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java
+++
b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java
@@ -26,6 +26,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
@@ -48,6 +49,7 @@ import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.CommitFailedException;
+import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.hadoop.ConfigProperties;
import org.apache.iceberg.io.FileAppender;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
@@ -434,6 +436,22 @@ public class HiveTableTest extends HiveTableBaseTest {
assertHiveEnabled(hmsTable, false);
}
+ @Test(timeout = 60000, expected = NotFoundException.class)
+ public void testMissingMetadataWontCauseHang() throws Exception {
+ catalog.loadTable(TABLE_IDENTIFIER);
+ HiveConf.setIntVar(catalog.getConf(),
HiveConf.ConfVars.HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES, 3);
+
+ File realLocation = new File(metadataLocation(TABLE_NAME));
+ File fakeLocation = new File(metadataLocation(TABLE_NAME) + "_dummy");
+ realLocation.renameTo(fakeLocation);
+
+ try {
+ catalog.loadTable(TABLE_IDENTIFIER);
+ } finally {
+ realLocation.renameTo(realLocation);
+ }
+ }
+
private void assertHiveEnabled(org.apache.hadoop.hive.metastore.api.Table
hmsTable, boolean expected) {
if (expected) {
Assert.assertEquals("org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",