dimas-b commented on code in PR #4237:
URL: https://github.com/apache/polaris/pull/4237#discussion_r3326040087
##########
runtime/service/src/test/java/org/apache/polaris/service/catalog/generic/AbstractPolarisGenericTableCatalogTest.java:
##########
@@ -511,4 +546,85 @@ public void testDropViaIcebergView() {
Assertions.assertThat(genericTableCatalog.loadGenericTable(TableIdentifier.of("ns",
"t1")))
.isNotNull();
}
+
+ @Test
+ public void testCreateGenericTableOverlapsExistingGenericTable() {
+ Namespace namespace = Namespace.of("ns");
+ icebergCatalog.createNamespace(namespace);
+ genericTableCatalog.createGenericTable(
+ TableIdentifier.of("ns", "t1"),
+ "format",
+ "s3://my-bucket/path/to/data/ns/t1",
+ "doc",
+ Map.of());
+
+ Assertions.assertThatThrownBy(
+ () ->
+ genericTableCatalog.createGenericTable(
+ TableIdentifier.of("ns", "t2"),
+ "format",
+ "s3://my-bucket/path/to/data/ns/t1/sub",
+ "doc",
+ Map.of()))
+ .isInstanceOf(ForbiddenException.class)
+ .hasMessageContaining("conflicts with");
+ }
+
+ @Test
+ public void testCreateGenericTableOverlapsExistingIcebergTable() {
+ Namespace namespace = Namespace.of("ns");
+ icebergCatalog.createNamespace(namespace);
+ icebergCatalog.createTable(TableIdentifier.of("ns", "t1"), SCHEMA);
+
+ Assertions.assertThatThrownBy(
+ () ->
+ genericTableCatalog.createGenericTable(
+ TableIdentifier.of("ns", "t2"),
+ "format",
+ "s3://my-bucket/path/to/data/ns/t1/sub",
Review Comment:
What about the reverse order (first create a Generic table, then an Iceberg
table with and overlapping location)?
##########
plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkDeltaIT.java:
##########
@@ -252,4 +269,81 @@ public void testDataframeSaveOperations() {
sql("DROP TABLE %s", deltatb);
}
+
+ @Test
+ public void testCreateTableWithInvalidLocationFails() {
+ String deltatb = getTableNameWithRandomSuffix();
+ String invalidLocation =
+ new File(System.getProperty("java.io.tmpdir"), "invalid_" +
deltatb).getAbsolutePath();
+ assertThatThrownBy(
+ () ->
+ sql("CREATE TABLE %s (id INT) USING DELTA LOCATION '%s'",
deltatb, invalidLocation))
+ .hasMessageContaining("is not in the list of allowed locations");
+ }
+
+ @Test
+ public void testCreateTableWithOverlappingLocationFails() {
+ String deltatb1 = getTableNameWithRandomSuffix();
+ String location1 = getTableLocation(deltatb1);
+ sql("CREATE TABLE %s (id INT, name STRING) USING DELTA LOCATION '%s'",
deltatb1, location1);
+ sql("INSERT INTO %s VALUES (1, 'anna')", deltatb1);
+
+ String deltatb2 = getTableNameWithRandomSuffix();
+ String overlappingLocation = location1 + "/child";
+ assertThatThrownBy(
+ () ->
+ sql(
+ "CREATE TABLE %s (id INT) USING DELTA LOCATION '%s'",
+ deltatb2, overlappingLocation))
+ .hasMessageContaining("conflicts with");
+
+ sql("DROP TABLE %s", deltatb1);
+ }
+
+ @Test
+ public void testMixedTableAndViews() {
+ String icebergTable = "icebergtb";
+ sql("CREATE TABLE %s (col1 int, col2 String)", icebergTable);
+ sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b')", icebergTable);
+
+ String viewName = "icebergview";
+ sql("CREATE VIEW %s AS SELECT col1 + 2 AS col1, col2 FROM %s", viewName,
icebergTable);
+
+ String deltaTable = "deltatb";
+ sql(
+ "CREATE TABLE %s (col1 int, col2 int) using delta location '%s'",
+ deltaTable, getTableLocation(deltaTable));
+ sql("INSERT INTO %s VALUES (1, 3), (2, 5), (11, 20)", deltaTable);
+
+ List<Object[]> joinResult =
+ sql(
+ "SELECT icebergtb.col1 as id, icebergtb.col2 as str_col,
deltatb.col2 as int_col from icebergtb inner join deltatb on icebergtb.col1 =
deltatb.col1 order by id");
+ assertThat(joinResult.get(0)).isEqualTo(new Object[] {1, "a", 3});
Review Comment:
Why does it matter for a "location validation" PR? 🤔
##########
runtime/service/src/test/java/org/apache/polaris/service/catalog/iceberg/IcebergOverlappingTableTest.java:
##########
@@ -293,21 +293,44 @@ void testTableLocationRestrictions(
services, String.format("%s/%s/%s/table_100", baseLocation,
catalog, namespace)))
.isEqualTo(Response.Status.OK.getStatusCode());
- // Repeat location
+ // Repeat location (iceberg at existing iceberg location)
assertThat(
createTable(
services, String.format("%s/%s/%s/table_100", baseLocation,
catalog, namespace)))
.isEqualTo(expectedStatusForOverlaps);
+ // Iceberg at existing generic table location
assertThat(
createTable(
services, String.format("%s/%s/%s/generic_1", baseLocation,
catalog, namespace)))
.isEqualTo(expectedStatusForOverlaps);
- // Parent of existing location
+ // Generic table at existing iceberg table location
+ assertThat(
+ createGenericTable(
Review Comment:
Feedback from artificial helpers:
> P3 The new generic-table overlap test helper builds an invalid API
request: it sets name and base-location but omits required format
(runtime/service/
src/test/java/org/apache/polaris/service/catalog/iceberg/IcebergOverlappingTableTest.java:94).
The OpenAPI schema requires both name and format (spec/
polaris-catalog-apis/generic-tables-api.yaml:210), so these tests are
not exercising a valid generic-table create request and could fail for the wrong
reason if request validation is tightened.
##########
plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkDeltaIT.java:
##########
@@ -252,4 +269,81 @@ public void testDataframeSaveOperations() {
sql("DROP TABLE %s", deltatb);
}
+
+ @Test
+ public void testCreateTableWithInvalidLocationFails() {
+ String deltatb = getTableNameWithRandomSuffix();
+ String invalidLocation =
+ new File(System.getProperty("java.io.tmpdir"), "invalid_" +
deltatb).getAbsolutePath();
Review Comment:
`System.getProperty("java.io.tmpdir")` looks a bit strange... why not use
the `@TempDir` annotation supported by JUnit?
##########
runtime/service/src/main/java/org/apache/polaris/service/catalog/generic/PolarisGenericTableCatalog.java:
##########
@@ -89,6 +101,43 @@ public GenericTableEntity createGenericTable(
"Failed to fetch resolved parent for TableIdentifier '%s'",
tableIdentifier));
}
+ if (baseLocation != null && !baseLocation.isEmpty()) {
+ LOGGER.debug(
+ "Validating location and overlap for generic table '{}' at '{}'",
+ tableIdentifier,
+ baseLocation);
+ CatalogUtils.validateLocationForTableLike(
+ resolvedEntityView, callContext.getRealmConfig(), tableIdentifier,
baseLocation);
+
+ CatalogEntity catalogEntity =
resolvedEntityView.getResolvedCatalogEntity();
+ if (catalogEntity == null) {
+ throw new IllegalStateException(
+ String.format("Failed to resolve catalog entity for table '%s'",
tableIdentifier));
+ }
+ RealmConfig realmConfig = callContext.getRealmConfig();
+ if (!realmConfig.getConfig(
+ FeatureConfiguration.ALLOW_TABLE_LOCATION_OVERLAP, catalogEntity)) {
+ LOGGER.debug("Validating no overlap with sibling tables or
namespaces");
+ PolarisEntity lastParent = resolvedParent.getRawLeafEntity();
+ GenericTableEntity virtualEntity =
+ new GenericTableEntity.Builder(tableIdentifier, "")
+ .setCatalogId(catalogId)
+ .setParentId(lastParent.getId())
+ .setBaseLocation(baseLocation)
+ .build();
+ CatalogUtils.validateNoLocationOverlap(
Review Comment:
Feedback from artificial helpers:
> P1 Generic-table overlap checks are bypassed with
OPTIMIZED_SIBLING_CHECK=true. The new generic path calls
CatalogUtils.validateNoLocationOverlap
(runtime/service/src/main/java/org/apache/polaris/service/catalog/generic/PolarisGenericTableCatalog.java:128),
and CatalogUtils returns immediately when
the optimized check reports no conflict
(runtime/service/src/main/java/org/apache/polaris/service/catalog/common/CatalogUtils.java:168).
But generic
table locations are stored in internal properties
(polaris-core/src/main/java/org/apache/polaris/core/entity/table/GenericTableEntity.java:70),
while
optimized location indexing/read paths only include Iceberg tables/views
and namespaces (persistence/relational-jdbc/src/main/java/org/apache/polaris/
persistence/relational/jdbc/models/ModelEntity.java:389,
polaris-core/src/main/java/org/apache/polaris/core/persistence/transactional/
TreeMapTransactionalPersistenceImpl.java:663). With optimized checking
enabled, an existing generic table is invisible, so overlapping generic/generic
or
Iceberg/generic locations can be accepted despite
ALLOW_TABLE_LOCATION_OVERLAP=false.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]