This is an automated email from the ASF dual-hosted git repository.
szehon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 81bf8d3076 Core: Avoid creating new metadata file on registerTable
(#6591)
81bf8d3076 is described below
commit 81bf8d30766b1b129b87abde15239645cb127046
Author: Vikash Kumar <[email protected]>
AuthorDate: Thu Jan 26 00:41:09 2023 +0530
Core: Avoid creating new metadata file on registerTable (#6591)
---
.../iceberg/aws/dynamodb/TestDynamoDbCatalog.java | 6 +++-
.../iceberg/aws/glue/TestGlueCatalogTable.java | 6 +++-
.../aws/dynamodb/DynamoDbTableOperations.java | 3 +-
.../iceberg/aws/glue/GlueTableOperations.java | 3 +-
.../iceberg/BaseMetastoreTableOperations.java | 6 ++++
.../apache/iceberg/jdbc/JdbcTableOperations.java | 3 +-
.../org/apache/iceberg/jdbc/TestJdbcCatalog.java | 3 ++
.../iceberg/dell/ecs/EcsTableOperations.java | 3 +-
.../apache/iceberg/dell/ecs/TestEcsCatalog.java | 6 +++-
.../apache/iceberg/hive/HiveTableOperations.java | 8 ++---
.../org/apache/iceberg/hive/TestHiveCatalog.java | 34 ++++++++++++++++++++++
.../iceberg/nessie/NessieTableOperations.java | 6 ++--
12 files changed, 71 insertions(+), 16 deletions(-)
diff --git
a/aws/src/integration/java/org/apache/iceberg/aws/dynamodb/TestDynamoDbCatalog.java
b/aws/src/integration/java/org/apache/iceberg/aws/dynamodb/TestDynamoDbCatalog.java
index 1c21f0ba31..f6fbfa55cf 100644
---
a/aws/src/integration/java/org/apache/iceberg/aws/dynamodb/TestDynamoDbCatalog.java
+++
b/aws/src/integration/java/org/apache/iceberg/aws/dynamodb/TestDynamoDbCatalog.java
@@ -369,7 +369,11 @@ public class TestDynamoDbCatalog {
Assertions.assertThat(catalog.dropTable(identifier, false)).isTrue();
TableOperations ops = ((HasTableOperations) registeringTable).operations();
String metadataLocation = ((DynamoDbTableOperations)
ops).currentMetadataLocation();
- Assertions.assertThat(catalog.registerTable(identifier,
metadataLocation)).isNotNull();
+ Table registeredTable = catalog.registerTable(identifier,
metadataLocation);
+ Assertions.assertThat(registeredTable).isNotNull();
+ String expectedMetadataLocation =
+ ((HasTableOperations)
registeredTable).operations().current().metadataFileLocation();
+
Assertions.assertThat(metadataLocation).isEqualTo(expectedMetadataLocation);
Assertions.assertThat(catalog.loadTable(identifier)).isNotNull();
Assertions.assertThat(catalog.dropTable(identifier, true)).isTrue();
Assertions.assertThat(catalog.dropNamespace(namespace)).isTrue();
diff --git
a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java
b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java
index 4eebaf2e7c..427952fa16 100644
---
a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java
+++
b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java
@@ -560,7 +560,11 @@ public class TestGlueCatalogTable extends GlueTestBase {
Table table = glueCatalog.loadTable(identifier);
String metadataLocation = ((BaseTable)
table).operations().current().metadataFileLocation();
Assertions.assertThat(glueCatalog.dropTable(identifier, false)).isTrue();
- Assertions.assertThat(glueCatalog.registerTable(identifier,
metadataLocation)).isNotNull();
+ Table registeredTable = glueCatalog.registerTable(identifier,
metadataLocation);
+ Assertions.assertThat(registeredTable).isNotNull();
+ String expectedMetadataLocation =
+ ((BaseTable) table).operations().current().metadataFileLocation();
+
Assertions.assertThat(metadataLocation).isEqualTo(expectedMetadataLocation);
Assertions.assertThat(glueCatalog.loadTable(identifier)).isNotNull();
Assertions.assertThat(glueCatalog.dropTable(identifier, true)).isTrue();
Assertions.assertThat(glueCatalog.dropNamespace(Namespace.of(namespace))).isTrue();
diff --git
a/aws/src/main/java/org/apache/iceberg/aws/dynamodb/DynamoDbTableOperations.java
b/aws/src/main/java/org/apache/iceberg/aws/dynamodb/DynamoDbTableOperations.java
index 0fa4f8f0b1..e38aa0a867 100644
---
a/aws/src/main/java/org/apache/iceberg/aws/dynamodb/DynamoDbTableOperations.java
+++
b/aws/src/main/java/org/apache/iceberg/aws/dynamodb/DynamoDbTableOperations.java
@@ -102,7 +102,8 @@ class DynamoDbTableOperations extends
BaseMetastoreTableOperations {
@Override
protected void doCommit(TableMetadata base, TableMetadata metadata) {
- String newMetadataLocation = writeNewMetadata(metadata, currentVersion() +
1);
+ boolean newTable = base == null;
+ String newMetadataLocation = writeNewMetadataIfRequired(newTable,
metadata);
CommitStatus commitStatus = CommitStatus.FAILURE;
Map<String, AttributeValue> tableKey =
DynamoDbCatalog.tablePrimaryKey(tableIdentifier);
try {
diff --git
a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java
b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java
index ad34ce24c5..84887cf253 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java
@@ -148,7 +148,8 @@ class GlueTableOperations extends
BaseMetastoreTableOperations {
try {
glueTempTableCreated = createGlueTempTableIfNecessary(base,
metadata.location());
- newMetadataLocation = writeNewMetadata(metadata, currentVersion() + 1);
+ boolean newTable = base == null;
+ newMetadataLocation = writeNewMetadataIfRequired(newTable, metadata);
lock(newMetadataLocation);
Table glueTable = getGlueTable();
checkMetadataLocation(glueTable, base);
diff --git
a/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java
b/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java
index 1cbc6608e5..2fccef5a0a 100644
--- a/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java
+++ b/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java
@@ -154,6 +154,12 @@ public abstract class BaseMetastoreTableOperations
implements TableOperations {
this.shouldRefresh = false;
}
+ protected String writeNewMetadataIfRequired(boolean newTable, TableMetadata
metadata) {
+ return newTable && metadata.metadataFileLocation() != null
+ ? metadata.metadataFileLocation()
+ : writeNewMetadata(metadata, currentVersion() + 1);
+ }
+
protected String writeNewMetadata(TableMetadata metadata, int newVersion) {
String newTableMetadataFilePath = newTableMetadataFilePath(metadata,
newVersion);
OutputFile newMetadataLocation =
io().newOutputFile(newTableMetadataFilePath);
diff --git
a/core/src/main/java/org/apache/iceberg/jdbc/JdbcTableOperations.java
b/core/src/main/java/org/apache/iceberg/jdbc/JdbcTableOperations.java
index 8e07aa594a..cdc6be6c70 100644
--- a/core/src/main/java/org/apache/iceberg/jdbc/JdbcTableOperations.java
+++ b/core/src/main/java/org/apache/iceberg/jdbc/JdbcTableOperations.java
@@ -102,7 +102,8 @@ class JdbcTableOperations extends
BaseMetastoreTableOperations {
@Override
public void doCommit(TableMetadata base, TableMetadata metadata) {
- String newMetadataLocation = writeNewMetadata(metadata, currentVersion() +
1);
+ boolean newTable = base == null;
+ String newMetadataLocation = writeNewMetadataIfRequired(newTable,
metadata);
try {
Map<String, String> table = getTable();
diff --git a/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java
b/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java
index 472824fa5e..268391bf43 100644
--- a/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java
+++ b/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java
@@ -782,6 +782,9 @@ public class TestJdbcCatalog extends
CatalogTests<JdbcCatalog> {
Table registeredTable = catalog.registerTable(identifier,
metadataLocation);
Assertions.assertThat(registeredTable).isNotNull();
TestHelpers.assertSerializedAndLoadedMetadata(registeringTable,
registeredTable);
+ String expectedMetadataLocation =
+ ((HasTableOperations)
registeredTable).operations().current().metadataFileLocation();
+
Assertions.assertThat(metadataLocation).isEqualTo(expectedMetadataLocation);
Assertions.assertThat(catalog.loadTable(identifier)).isNotNull();
Assertions.assertThat(catalog.dropTable(identifier)).isTrue();
}
diff --git
a/dell/src/main/java/org/apache/iceberg/dell/ecs/EcsTableOperations.java
b/dell/src/main/java/org/apache/iceberg/dell/ecs/EcsTableOperations.java
index 9f2c24ac3e..d7467eff45 100644
--- a/dell/src/main/java/org/apache/iceberg/dell/ecs/EcsTableOperations.java
+++ b/dell/src/main/java/org/apache/iceberg/dell/ecs/EcsTableOperations.java
@@ -87,7 +87,8 @@ public class EcsTableOperations extends
BaseMetastoreTableOperations {
@Override
protected void doCommit(TableMetadata base, TableMetadata metadata) {
- String newMetadataLocation = writeNewMetadata(metadata, currentVersion() +
1);
+ boolean newTable = base == null;
+ String newMetadataLocation = writeNewMetadataIfRequired(newTable,
metadata);
if (base == null) {
// create a new table, the metadataKey should be absent
if (!catalog.putNewProperties(tableObject,
buildProperties(newMetadataLocation))) {
diff --git a/dell/src/test/java/org/apache/iceberg/dell/ecs/TestEcsCatalog.java
b/dell/src/test/java/org/apache/iceberg/dell/ecs/TestEcsCatalog.java
index 8c667f6898..1b26d23175 100644
--- a/dell/src/test/java/org/apache/iceberg/dell/ecs/TestEcsCatalog.java
+++ b/dell/src/test/java/org/apache/iceberg/dell/ecs/TestEcsCatalog.java
@@ -190,7 +190,11 @@ public class TestEcsCatalog {
ecsCatalog.dropTable(identifier, false);
TableOperations ops = ((HasTableOperations) registeringTable).operations();
String metadataLocation = ((EcsTableOperations)
ops).currentMetadataLocation();
- Assertions.assertThat(ecsCatalog.registerTable(identifier,
metadataLocation)).isNotNull();
+ Table registeredTable = ecsCatalog.registerTable(identifier,
metadataLocation);
+ Assertions.assertThat(registeredTable).isNotNull();
+ String expectedMetadataLocation =
+ ((HasTableOperations)
registeredTable).operations().current().metadataFileLocation();
+
Assertions.assertThat(metadataLocation).isEqualTo(expectedMetadataLocation);
Assertions.assertThat(ecsCatalog.loadTable(identifier)).isNotNull();
Assertions.assertThat(ecsCatalog.dropTable(identifier, true)).isTrue();
}
diff --git
a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
index 13f6df8630..1781640ca0 100644
---
a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
+++
b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java
@@ -268,10 +268,8 @@ public class HiveTableOperations extends
BaseMetastoreTableOperations {
@SuppressWarnings("checkstyle:CyclomaticComplexity")
@Override
protected void doCommit(TableMetadata base, TableMetadata metadata) {
- String newMetadataLocation =
- base == null && metadata.metadataFileLocation() != null
- ? metadata.metadataFileLocation()
- : writeNewMetadata(metadata, currentVersion() + 1);
+ boolean newTable = base == null;
+ String newMetadataLocation = writeNewMetadataIfRequired(newTable,
metadata);
boolean hiveEngineEnabled = hiveEngineEnabled(metadata, conf);
boolean keepHiveStats = conf.getBoolean(ConfigProperties.KEEP_HIVE_STATS,
false);
@@ -296,7 +294,7 @@ public class HiveTableOperations extends
BaseMetastoreTableOperations {
if (tbl != null) {
// If we try to create the table but the metadata location is already
set, then we had a
// concurrent commit
- if (base == null
+ if (newTable
&&
tbl.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP)
!= null) {
throw new AlreadyExistsException("Table already exists: %s.%s",
database, tableName);
diff --git
a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java
b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java
index 21acb9b4a0..b06c718e3c 100644
--- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java
+++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java
@@ -28,6 +28,7 @@ import static
org.apache.iceberg.TableProperties.DEFAULT_PARTITION_SPEC;
import static org.apache.iceberg.TableProperties.DEFAULT_SORT_ORDER;
import static org.apache.iceberg.expressions.Expressions.bucket;
import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -51,6 +52,7 @@ import org.apache.iceberg.CatalogUtil;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
@@ -62,6 +64,7 @@ import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.Transaction;
import org.apache.iceberg.UpdateSchema;
import org.apache.iceberg.catalog.Catalog;
@@ -1203,4 +1206,35 @@ public class TestHiveCatalog extends HiveMetastoreTest {
Assert.assertEquals("s3://bucket/database.db", database.getLocationUri());
}
+
+ @Test
+ public void testRegisterTable() {
+ TableIdentifier identifier = TableIdentifier.of(DB_NAME, "t1");
+ catalog.createTable(identifier, getTestSchema());
+ Table registeringTable = catalog.loadTable(identifier);
+ catalog.dropTable(identifier, false);
+ TableOperations ops = ((HasTableOperations) registeringTable).operations();
+ String metadataLocation = ((HiveTableOperations)
ops).currentMetadataLocation();
+ Table registeredTable = catalog.registerTable(identifier,
metadataLocation);
+ assertThat(registeredTable).isNotNull();
+ TestHelpers.assertSerializedAndLoadedMetadata(registeringTable,
registeredTable);
+ String expectedMetadataLocation =
+ ((HasTableOperations)
registeredTable).operations().current().metadataFileLocation();
+ assertThat(metadataLocation).isEqualTo(expectedMetadataLocation);
+ assertThat(catalog.loadTable(identifier)).isNotNull();
+ assertThat(catalog.dropTable(identifier)).isTrue();
+ }
+
+ @Test
+ public void testRegisterExistingTable() {
+ TableIdentifier identifier = TableIdentifier.of(DB_NAME, "t1");
+ catalog.createTable(identifier, getTestSchema());
+ Table registeringTable = catalog.loadTable(identifier);
+ TableOperations ops = ((HasTableOperations) registeringTable).operations();
+ String metadataLocation = ((HiveTableOperations)
ops).currentMetadataLocation();
+ assertThatThrownBy(() -> catalog.registerTable(identifier,
metadataLocation))
+ .isInstanceOf(AlreadyExistsException.class)
+ .hasMessage("Table already exists: hivedb.t1");
+ assertThat(catalog.dropTable(identifier, true)).isTrue();
+ }
}
diff --git
a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
index 799960c2b6..4cc3547b66 100644
--- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
+++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
@@ -143,10 +143,8 @@ public class NessieTableOperations extends
BaseMetastoreTableOperations {
@Override
protected void doCommit(TableMetadata base, TableMetadata metadata) {
- String newMetadataLocation =
- (base == null) && (metadata.metadataFileLocation() != null)
- ? metadata.metadataFileLocation()
- : writeNewMetadata(metadata, currentVersion() + 1);
+ boolean newTable = base == null;
+ String newMetadataLocation = writeNewMetadataIfRequired(newTable,
metadata);
String refName = client.refName();
boolean delete = true;