This is an automated email from the ASF dual-hosted git repository.
russellspitzer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new febba74700 Nessie: Nessie Catalog to return a unique path for
different tables with same name (#4826)
febba74700 is described below
commit febba7470004805cb9bebdc0de5d683b8fd9429a
Author: Ajantha Bhat <[email protected]>
AuthorDate: Wed Nov 9 22:03:33 2022 +0530
Nessie: Nessie Catalog to return a unique path for different tables with
same name (#4826)
---
.../org/apache/iceberg/nessie/NessieCatalog.java | 11 +++-
.../iceberg/nessie/TestBranchVisibility.java | 28 ++++++++++
.../org/apache/iceberg/nessie/TestNessieTable.java | 62 +++++++++-------------
3 files changed, 62 insertions(+), 39 deletions(-)
diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java
b/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java
index b48509becc..4c6b4035ed 100644
--- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java
+++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.UUID;
import java.util.function.Function;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
@@ -199,10 +200,16 @@ public class NessieCatalog extends BaseMetastoreCatalog
@Override
protected String defaultWarehouseLocation(TableIdentifier table) {
+ String location;
if (table.hasNamespace()) {
- return SLASH.join(warehouseLocation, table.namespace().toString(),
table.name());
+ location = SLASH.join(warehouseLocation, table.namespace().toString(),
table.name());
+ } else {
+ location = SLASH.join(warehouseLocation, table.name());
}
- return SLASH.join(warehouseLocation, table.name());
+ // Different tables with same table name can exist across references in
Nessie.
+ // To avoid sharing same table path between two tables with same name, use
uuid in the table
+ // path.
+ return location + "_" + UUID.randomUUID();
}
@Override
diff --git
a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java
b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java
index 7db904ef92..f2412ec64d 100644
--- a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java
+++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java
@@ -484,4 +484,32 @@ public class TestBranchVisibility extends BaseTestIceberg {
nessieCatalog.createTable(identifier2, schema);
Assertions.assertThat(nessieCatalog.listTables(namespace)).hasSize(2);
}
+
+ @Test
+ public void testDifferentTableSameName() throws NessieConflictException,
NessieNotFoundException {
+ String branch1 = "branch1";
+ String branch2 = "branch2";
+ createBranch(branch1, null);
+ createBranch(branch2, null);
+ Schema schema1 =
+ new Schema(Types.StructType.of(required(1, "id",
Types.LongType.get())).fields());
+ Schema schema2 =
+ new Schema(
+ Types.StructType.of(
+ required(1, "file_count", Types.IntegerType.get()),
+ required(2, "record_count", Types.LongType.get()))
+ .fields());
+
+ TableIdentifier identifier = TableIdentifier.of("db", "table1");
+
+ NessieCatalog nessieCatalog = initCatalog(branch1);
+ Table table1 = nessieCatalog.createTable(identifier, schema1);
+
Assertions.assertThat(table1.schema().asStruct()).isEqualTo(schema1.asStruct());
+
+ nessieCatalog = initCatalog(branch2);
+ Table table2 = nessieCatalog.createTable(identifier, schema2);
+
Assertions.assertThat(table2.schema().asStruct()).isEqualTo(schema2.asStruct());
+
+ Assertions.assertThat(table1.location()).isNotEqualTo(table2.location());
+ }
}
diff --git
a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java
b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java
index c919431a33..272cee2284 100644
--- a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java
+++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java
@@ -25,14 +25,13 @@ import static
org.apache.iceberg.types.Types.NestedField.required;
import java.io.File;
import java.io.IOException;
import java.net.URI;
-import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecordBuilder;
-import org.apache.hadoop.fs.Path;
+import org.apache.commons.io.FileUtils;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.ManifestFile;
@@ -82,7 +81,7 @@ public class TestNessieTable extends BaseTestIceberg {
optional(2, "data", Types.LongType.get()))
.fields());
- private Path tableLocation;
+ private String tableLocation;
public TestNessieTable() {
super(BRANCH);
@@ -92,7 +91,8 @@ public class TestNessieTable extends BaseTestIceberg {
@BeforeEach
public void beforeEach(@NessieUri URI uri) throws IOException {
super.beforeEach(uri);
- this.tableLocation = new Path(catalog.createTable(TABLE_IDENTIFIER,
schema).location());
+ this.tableLocation =
+ catalog.createTable(TABLE_IDENTIFIER,
schema).location().replaceFirst("file:", "");
}
@Override
@@ -100,7 +100,7 @@ public class TestNessieTable extends BaseTestIceberg {
public void afterEach() throws Exception {
// drop the table data
if (tableLocation != null) {
- tableLocation.getFileSystem(hadoopConfig).delete(tableLocation, true);
+ FileUtils.deleteDirectory(new File(tableLocation));
catalog.dropTable(TABLE_IDENTIFIER, false);
}
@@ -200,12 +200,12 @@ public class TestNessieTable extends BaseTestIceberg {
icebergTable.updateSchema().addColumn("mother",
Types.LongType.get()).commit();
getTable(KEY); // sanity, check table exists
// check parameters are in expected state
- String expected = (temp.toUri() + DB_NAME + "/" +
tableName).replace("///", "/");
- Assertions.assertThat(getTableLocation(tableName)).isEqualTo(expected);
+ String expected = temp.toUri() + DB_NAME + "/" + tableName;
+ Assertions.assertThat(getTableBasePath(tableName)).isEqualTo(expected);
// Only 1 snapshotFile Should exist and no manifests should exist
-
Assertions.assertThat(metadataVersionFiles(tableName)).isNotNull().hasSize(2);
- Assertions.assertThat(manifestFiles(tableName)).isNotNull().isEmpty();
+
Assertions.assertThat(metadataVersionFiles(tableLocation)).isNotNull().hasSize(2);
+ Assertions.assertThat(manifestFiles(tableLocation)).isNotNull().isEmpty();
verifyCommitMetadata();
}
@@ -428,7 +428,7 @@ public class TestNessieTable extends BaseTestIceberg {
@Test
public void testRegisterTableWithGivenBranch() {
- List<String> metadataVersionFiles = metadataVersionFiles(TABLE_NAME);
+ List<String> metadataVersionFiles = metadataVersionFiles(tableLocation);
Assertions.assertThat(1).isEqualTo(metadataVersionFiles.size());
ImmutableTableReference tableReference =
ImmutableTableReference.builder().reference("main").name(TABLE_NAME).build();
@@ -439,7 +439,7 @@ public class TestNessieTable extends BaseTestIceberg {
@Test
public void testRegisterTableFailureScenarios()
throws NessieConflictException, NessieNotFoundException {
- List<String> metadataVersionFiles = metadataVersionFiles(TABLE_NAME);
+ List<String> metadataVersionFiles = metadataVersionFiles(tableLocation);
Assertions.assertThat(1).isEqualTo(metadataVersionFiles.size());
// Case 1: Branch does not exist
ImmutableTableReference defaultTableReference =
@@ -485,7 +485,7 @@ public class TestNessieTable extends BaseTestIceberg {
@Test
public void testRegisterTableWithDefaultBranch() {
- List<String> metadataVersionFiles = metadataVersionFiles(TABLE_NAME);
+ List<String> metadataVersionFiles = metadataVersionFiles(tableLocation);
Assertions.assertThat(1).isEqualTo(metadataVersionFiles.size());
Assertions.assertThat(catalog.dropTable(TABLE_IDENTIFIER, false)).isTrue();
validateRegister(TABLE_IDENTIFIER, metadataVersionFiles.get(0));
@@ -493,7 +493,7 @@ public class TestNessieTable extends BaseTestIceberg {
@Test
public void testRegisterTableMoreThanOneBranch() {
- List<String> metadataVersionFiles = metadataVersionFiles(TABLE_NAME);
+ List<String> metadataVersionFiles = metadataVersionFiles(tableLocation);
Assertions.assertThat(1).isEqualTo(metadataVersionFiles.size());
ImmutableTableReference tableReference =
ImmutableTableReference.builder().reference("main").name(TABLE_NAME).build();
@@ -512,8 +512,8 @@ public class TestNessieTable extends BaseTestIceberg {
icebergTable = catalog.loadTable(TABLE_IDENTIFIER);
// Only 2 snapshotFile Should exist and no manifests should exist
-
Assertions.assertThat(metadataVersionFiles(TABLE_NAME)).isNotNull().hasSize(2);
- Assertions.assertThat(manifestFiles(TABLE_NAME)).isNotNull().isEmpty();
+
Assertions.assertThat(metadataVersionFiles(tableLocation)).isNotNull().hasSize(2);
+ Assertions.assertThat(manifestFiles(tableLocation)).isNotNull().isEmpty();
Assertions.assertThat(altered.asStruct()).isEqualTo(icebergTable.schema().asStruct());
}
@@ -567,40 +567,28 @@ public class TestNessieTable extends BaseTestIceberg {
}
private String getTableBasePath(String tableName) {
- String databasePath = temp.toString() + "/" + DB_NAME;
- return Paths.get(databasePath, tableName).toAbsolutePath().toString();
- }
-
- protected Path getTableLocationPath(String tableName) {
- return new Path("file", null,
Paths.get(getTableBasePath(tableName)).toString());
- }
-
- protected String getTableLocation(String tableName) {
- return getTableLocationPath(tableName).toString();
- }
-
- private String metadataLocation(String tableName) {
- return Paths.get(getTableBasePath(tableName), "metadata").toString();
+ return temp.toUri() + DB_NAME + "/" + tableName;
}
@SuppressWarnings(
"RegexpSinglelineJava") // respecting this rule requires a lot more
lines of code
- private List<String> metadataFiles(String tableName) {
- return Arrays.stream(Objects.requireNonNull(new
File(metadataLocation(tableName)).listFiles()))
+ private List<String> metadataFiles(String tablePath) {
+ return Arrays.stream(
+ Objects.requireNonNull(new File((tablePath + "/" +
"metadata")).listFiles()))
.map(File::getAbsolutePath)
.collect(Collectors.toList());
}
- protected List<String> metadataVersionFiles(String tableName) {
- return filterByExtension(tableName,
getFileExtension(TableMetadataParser.Codec.NONE));
+ protected List<String> metadataVersionFiles(String tablePath) {
+ return filterByExtension(tablePath,
getFileExtension(TableMetadataParser.Codec.NONE));
}
- protected List<String> manifestFiles(String tableName) {
- return filterByExtension(tableName, ".avro");
+ protected List<String> manifestFiles(String tablePath) {
+ return filterByExtension(tablePath, ".avro");
}
- private List<String> filterByExtension(String tableName, String extension) {
- return metadataFiles(tableName).stream()
+ private List<String> filterByExtension(String tablePath, String extension) {
+ return metadataFiles(tablePath).stream()
.filter(f -> f.endsWith(extension))
.collect(Collectors.toList());
}