amogh-jahagirdar commented on code in PR #6742:
URL: https://github.com/apache/iceberg/pull/6742#discussion_r1107995932
##########
aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java:
##########
@@ -269,14 +269,6 @@ public void testRenameTableFailsToDeleteOldTable() {
.databaseName(namespace)
.tableInput(TableInput.builder().name(tableName).parameters(Maps.newHashMap()).build())
.build());
- AssertHelpers.assertThrows(
- "should fail to rename",
- ValidationException.class,
- "Input Glue table is not an iceberg table",
- () ->
- glueCatalog.renameTable(
- TableIdentifier.of(namespace, tableName),
- TableIdentifier.of(namespace, newTableName)));
Review Comment:
Why is this assertion removed? It looks like it's for rename
##########
aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java:
##########
@@ -563,25 +555,88 @@ public void testRegisterTable() {
Table registeredTable = glueCatalog.registerTable(identifier,
metadataLocation);
Assertions.assertThat(registeredTable).isNotNull();
String expectedMetadataLocation =
- ((BaseTable) table).operations().current().metadataFileLocation();
+ ((BaseTable)
registeredTable).operations().current().metadataFileLocation();
Assertions.assertThat(metadataLocation).isEqualTo(expectedMetadataLocation);
+
+ GetTableResponse response =
+
glue.getTable(GetTableRequest.builder().databaseName(namespace).name(tableName).build());
+ String actualMetadataLocationGlue =
+
response.table().parameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
+
+ Assert.assertEquals(
+ "Glue Catalog Register Table should not submit a new commit",
+ expectedMetadataLocation,
+ actualMetadataLocationGlue);
+
Assertions.assertThat(glueCatalog.loadTable(identifier)).isNotNull();
Assertions.assertThat(glueCatalog.dropTable(identifier, true)).isTrue();
Assertions.assertThat(glueCatalog.dropNamespace(Namespace.of(namespace))).isTrue();
}
+ @Test
+ public void testRegisterTableNamespaceNotFound() {
+ String namespace = createNamespace();
+ String tableName = getRandomName();
+ createTable(namespace, tableName);
+ Table table =
+
glueCatalogWithForceRegisterTable.loadTable(TableIdentifier.of(namespace,
tableName));
+ String metadataLocation = ((BaseTable)
table).operations().current().metadataFileLocation();
+ AssertHelpers.assertThrows(
+ "Should fail to register to an unknown namespace",
+ NoSuchNamespaceException.class,
+ "not found in Glue",
+ () ->
+ glueCatalogWithForceRegisterTable.registerTable(
+ TableIdentifier.of(getRandomName(), getRandomName()),
metadataLocation));
+ }
+
@Test
public void testRegisterTableAlreadyExists() {
String namespace = createNamespace();
String tableName = getRandomName();
createTable(namespace, tableName);
TableIdentifier identifier = TableIdentifier.of(namespace, tableName);
- Table table = glueCatalog.loadTable(identifier);
+ Table table = glueCatalogWithForceRegisterTable.loadTable(identifier);
String metadataLocation = ((BaseTable)
table).operations().current().metadataFileLocation();
- Assertions.assertThatThrownBy(() -> glueCatalog.registerTable(identifier,
metadataLocation))
- .isInstanceOf(AlreadyExistsException.class);
- Assertions.assertThat(glueCatalog.dropTable(identifier, true)).isTrue();
-
Assertions.assertThat(glueCatalog.dropNamespace(Namespace.of(namespace))).isTrue();
+
Assertions.assertThat(glueCatalogWithForceRegisterTable.dropTable(identifier,
false)).isTrue();
+ Table registeredTable =
+ glueCatalogWithForceRegisterTable.registerTable(identifier,
metadataLocation);
+ Assertions.assertThat(registeredTable).isNotNull();
+
+ GetTableResponse response =
+
glue.getTable(GetTableRequest.builder().databaseName(namespace).name(tableName).build());
+ Assert.assertEquals(
+ "external table type is set after register",
+ "EXTERNAL_TABLE",
+ response.table().tableType());
+ String actualMetadataLocation =
+
response.table().parameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
+ Assert.assertEquals(
+ "metadata location should be updated with registerTable call",
+ metadataLocation,
+ actualMetadataLocation);
+
+ // commit new transaction, should create a new metadata file
+ DataFile dataFile =
+ DataFiles.builder(partitionSpec)
+ .withPath("/path/to/data-a.parquet")
+ .withFileSizeInBytes(10)
+ .withRecordCount(1)
+ .build();
+ table.newAppend().appendFile(dataFile).commit();
+
+ metadataLocation = ((BaseTable)
table).operations().current().metadataFileLocation();
+ // update metadata location
+ glueCatalogWithForceRegisterTable.registerTable(identifier,
metadataLocation);
+ response =
+
glue.getTable(GetTableRequest.builder().databaseName(namespace).name(tableName).build());
+ String updatedMetadataLocation =
+
response.table().parameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
+ Assert.assertEquals(
+ "metadata location should be updated with registerTable call",
+ metadataLocation,
+ updatedMetadataLocation);
+ Assert.assertEquals("Table Version should be updated", "2",
response.table().versionId());
Review Comment:
I think we'll want separate tests for `testRegisterWhenTableExists`, one is
when force registration is enabled and one when not. we still want to validate
the case when force is false works as expected but this change seems to be
removing the validating the previous case. Let me know if I missed something
when reading the code!
##########
aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java:
##########
@@ -437,6 +441,69 @@ public void renameTable(TableIdentifier from,
TableIdentifier to) {
LOG.info("Successfully renamed table from {} to {}", from, to);
}
+ @Override
+ public org.apache.iceberg.Table registerTable(
+ TableIdentifier identifier, String metadataFileLocation) {
+ Preconditions.checkArgument(
+ isValidIdentifier(identifier), "Table identifier to register is
invalid: " + identifier);
+ Preconditions.checkArgument(
+ metadataFileLocation != null && !metadataFileLocation.isEmpty(),
+ "Cannot register an empty metadata file location as a table");
+
+ // keep the original behavior when force-register-table flag is off
+ if (!awsProperties.glueCatalogForceRegisterTable()) {
+ return super.registerTable(identifier, metadataFileLocation);
+ }
+
+ TableOperations ops = newTableOps(identifier);
+ InputFile metadataFile = ops.io().newInputFile(metadataFileLocation);
+ TableMetadata metadata = TableMetadataParser.read(ops.io(), metadataFile);
+
+ Map<String, String> tableParameters =
+ ImmutableMap.of(
+ BaseMetastoreTableOperations.TABLE_TYPE_PROP,
+
BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH),
+ BaseMetastoreTableOperations.METADATA_LOCATION_PROP,
+ metadataFileLocation);
+
+ String databaseName =
+ IcebergToGlueConverter.getDatabaseName(
+ identifier, awsProperties.glueCatalogSkipNameValidation());
+ String tableName =
+ IcebergToGlueConverter.getTableName(
+ identifier, awsProperties.glueCatalogSkipNameValidation());
+
+ TableInput tableInput =
+ TableInput.builder()
+ .applyMutation(
+ builder ->
IcebergToGlueConverter.setTableInputInformation(builder, metadata))
+ .name(tableName)
+ .tableType(GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE)
+ .parameters(tableParameters)
+ .build();
+
+ try {
+ glue.createTable(
+
CreateTableRequest.builder().databaseName(databaseName).tableInput(tableInput).build());
+ } catch (software.amazon.awssdk.services.glue.model.AlreadyExistsException
e) {
+ GetTableResponse response =
+ glue.getTable(
+
GetTableRequest.builder().databaseName(databaseName).name(tableName).build());
+ String versionId = response.table().versionId();
+ glue.updateTable(
+ UpdateTableRequest.builder()
+ .databaseName(databaseName)
+ .tableInput(tableInput)
+ .versionId(versionId)
+ .build());
+ } catch (EntityNotFoundException e) {
+ throw new NoSuchNamespaceException(
+ e, "Namespace %s is not found in Glue", identifier.namespace());
Review Comment:
The exception handling seems off, can't EntityNotFoundException also be
thrown when the table is not found?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]