This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 83d12ef28b [#9277] improve(lance): Improve Lance table creation to
support creation modes (#9279)
83d12ef28b is described below
commit 83d12ef28b11103dd9c43de19a3f837a783994d5
Author: Jerry Shao <[email protected]>
AuthorDate: Mon Dec 1 10:27:32 2025 +0800
[#9277] improve(lance): Improve Lance table creation to support creation
modes (#9279)
### What changes were proposed in this pull request?
This PR improves Lance table creation to support three creation modes
(CREATE, EXIST_OK, OVERWRITE) by passing the mode as a property rather
than using exception-based flow control.
**Key changes:**
- Add `LANCE_CREATION_MODE` property to support CREATE, EXIST_OK, and
OVERWRITE modes
- Refactor `LanceTableOperations.createTable()` to handle all three
creation modes upfront
- Add `CreationMode` enum (CREATE, EXIST_OK, OVERWRITE) to
`LanceTableOperations`
- Extract `createTableInternal()` method to handle actual table creation
logic
- Simplify `GravitinoLanceTableOperations` to pass mode via property
instead of exception handling
- Centralize Lance-related constants in `LanceConstants` class
- Automatically mark registered tables as external to preserve physical
datasets
- Add comprehensive unit tests and integration tests
### Why are the changes needed?
Fix: #9277
The current implementation uses exception handling (catch
TableAlreadyExistsException) to handle different creation modes, which
requires multiple round-trip requests and complicates the code. This
change:
1. Eliminates multiple round-trip requests for EXIST_OK and OVERWRITE
modes
2. Improves performance by handling modes server-side in a single
request
3. Simplifies codebase by removing exception-based flow control
4. Makes the API more consistent and easier to maintain
### Does this PR introduce _any_ user-facing change?
Yes, users can now specify the `lance.creation-mode` property when
creating or registering Lance tables:
- `CREATE` (default): Fails if table already exists
- `EXIST_OK`: Returns existing table if found, creates if not
- `OVERWRITE`: Drops/purges existing table and creates new one
### How was this patch tested?
- Added unit tests in `TestLanceTableOperations` for mode validation and
error handling
- Added integration tests in `CatalogGenericCatalogLanceIT` for:
- createTable with CREATE, EXIST_OK, and OVERWRITE modes
- registerTable with CREATE, EXIST_OK, and OVERWRITE modes
- Existing tests in `LanceRESTServiceIT` already cover end-to-end
scenarios
- All tests passing
---
.../lakehouse/lance/LanceTableDelegator.java | 26 +-
.../lakehouse/lance/LanceTableOperations.java | 125 +++++--
.../lakehouse/lance/TestLanceTableOperations.java | 92 +++++
.../test/CatalogGenericCatalogLanceIT.java | 407 +++++++++++++++++++++
.../gravitino/GravitinoLanceTableOperations.java | 86 ++---
.../lance/common/utils/LanceConstants.java | 7 +
.../lance/service/LanceExceptionMapper.java | 5 +
.../lance/service/rest/LanceTableOperations.java | 3 +-
.../lance/integration/test/LanceRESTServiceIT.java | 1 -
.../service/rest/TestLanceNamespaceOperations.java | 6 +-
10 files changed, 651 insertions(+), 107 deletions(-)
diff --git
a/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableDelegator.java
b/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableDelegator.java
index d23322eb17..6fe7e64fae 100644
---
a/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableDelegator.java
+++
b/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableDelegator.java
@@ -18,6 +18,11 @@
*/
package org.apache.gravitino.catalog.lakehouse.lance;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_CREATION_MODE;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_STORAGE_OPTIONS_PREFIX;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_FORMAT;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_REGISTER;
+
import com.google.common.collect.ImmutableList;
import java.util.List;
import org.apache.gravitino.EntityStore;
@@ -29,12 +34,6 @@ import org.apache.gravitino.storage.IdGenerator;
public class LanceTableDelegator implements LakehouseTableDelegator {
- public static final String LANCE_TABLE_FORMAT = "lance";
-
- public static final String PROPERTY_LANCE_TABLE_REGISTER = "lance.register";
-
- public static final String PROPERTY_LANCE_STORAGE_OPTIONS_PREFIX =
"lance.storage.";
-
@Override
public String tableFormat() {
return LANCE_TABLE_FORMAT;
@@ -44,20 +43,29 @@ public class LanceTableDelegator implements
LakehouseTableDelegator {
public List<PropertyEntry<?>> tablePropertyEntries() {
return ImmutableList.of(
PropertyEntry.stringOptionalPropertyPrefixEntry(
- PROPERTY_LANCE_STORAGE_OPTIONS_PREFIX,
+ LANCE_STORAGE_OPTIONS_PREFIX,
"The storage options passed to Lance table.",
false /* immutable */,
null /* default value*/,
false /* hidden */,
false /* reserved */),
PropertyEntry.booleanPropertyEntry(
- PROPERTY_LANCE_TABLE_REGISTER,
+ LANCE_TABLE_REGISTER,
"Whether this is a table registration operation.",
false,
true /* immutable */,
false /* defaultValue */,
false /* hidden */,
- false));
+ false),
+ PropertyEntry.enumPropertyEntry(
+ LANCE_CREATION_MODE,
+ "Creation mode for Lance table: CREATE, EXIST_OK, or OVERWRITE",
+ false /* required */,
+ true /* immutable */,
+ LanceTableOperations.CreationMode.class,
+ LanceTableOperations.CreationMode.CREATE,
+ false /* hidden */,
+ false /* reserved */));
}
@Override
diff --git
a/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java
b/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java
index 2eed5288be..529c798c1e 100644
---
a/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java
+++
b/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java
@@ -18,6 +18,9 @@
*/
package org.apache.gravitino.catalog.lakehouse.lance;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_CREATION_MODE;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_REGISTER;
+
import com.google.common.base.Preconditions;
import com.lancedb.lance.Dataset;
import com.lancedb.lance.WriteParams;
@@ -58,6 +61,12 @@ import org.slf4j.LoggerFactory;
public class LanceTableOperations extends ManagedTableOperations {
private static final Logger LOG =
LoggerFactory.getLogger(LanceTableOperations.class);
+ public enum CreationMode {
+ CREATE,
+ EXIST_OK,
+ OVERWRITE
+ }
+
private final EntityStore store;
private final ManagedSchemaOperations schemaOps;
@@ -100,45 +109,51 @@ public class LanceTableOperations extends
ManagedTableOperations {
String location = properties.get(Table.PROPERTY_LOCATION);
Preconditions.checkArgument(
StringUtils.isNotBlank(location), "Table location must be specified");
- Map<String, String> storageProps =
LancePropertiesUtils.getLanceStorageOptions(properties);
+
+ // Extract creation mode from properties
+ CreationMode mode =
+ Optional.ofNullable(properties.get(LANCE_CREATION_MODE))
+ .map(CreationMode::valueOf)
+ .orElse(CreationMode.CREATE);
boolean register =
-
Optional.ofNullable(properties.get(LanceTableDelegator.PROPERTY_LANCE_TABLE_REGISTER))
+ Optional.ofNullable(properties.get(LANCE_TABLE_REGISTER))
.map(Boolean::parseBoolean)
.orElse(false);
- if (register) {
- // If this is a registration operation, just create the table metadata
without creating a new
- // dataset
- return super.createTable(
- ident, columns, comment, properties, partitions, distribution,
sortOrders, indexes);
+
+ // Handle EXIST_OK mode - check if table exists first
+ if (mode == CreationMode.EXIST_OK) {
+ Preconditions.checkArgument(
+ !register, "EXIST_OK mode is not supported for register operation");
+
+ try {
+ return super.loadTable(ident);
+ } catch (NoSuchTableException e) {
+ // Table doesn't exist, proceed with creation
+ }
}
- try (Dataset ignored =
- Dataset.create(
- new RootAllocator(),
- location,
- convertColumnsToArrowSchema(columns),
- new
WriteParams.Builder().withStorageOptions(storageProps).build())) {
- // Only create the table metadata in Gravitino after the Lance dataset
is successfully
- // created.
- return super.createTable(
- ident, columns, comment, properties, partitions, distribution,
sortOrders, indexes);
- } catch (NoSuchSchemaException e) {
- throw e;
- } catch (TableAlreadyExistsException e) {
- // If the table metadata already exists, but the underlying lance table
was just created
- // successfully, we need to clean up the created lance table to avoid
orphaned datasets.
- Dataset.drop(location,
LancePropertiesUtils.getLanceStorageOptions(properties));
- throw e;
- } catch (IllegalArgumentException e) {
- if (e.getMessage().contains("Dataset already exists")) {
- throw new TableAlreadyExistsException(
- e, "Lance dataset already exists at location %s", location);
+ // Handle OVERWRITE mode - drop existing table if present
+ if (mode == CreationMode.OVERWRITE) {
+ if (register) {
+ dropTable(ident);
+ } else {
+ purgeTable(ident);
}
- throw e;
- } catch (Exception e) {
- throw new RuntimeException("Failed to create Lance dataset at location "
+ location, e);
}
+
+ // Now create the table (for all modes after handling above)
+ return createTableInternal(
+ ident,
+ columns,
+ comment,
+ properties,
+ partitions,
+ distribution,
+ sortOrders,
+ indexes,
+ register,
+ location);
}
@Override
@@ -229,6 +244,54 @@ public class LanceTableOperations extends
ManagedTableOperations {
}
}
+ // Package-private for testing
+ Table createTableInternal(
+ NameIdentifier ident,
+ Column[] columns,
+ String comment,
+ Map<String, String> properties,
+ Transform[] partitions,
+ Distribution distribution,
+ SortOrder[] sortOrders,
+ Index[] indexes,
+ boolean register,
+ String location)
+ throws NoSuchSchemaException, TableAlreadyExistsException {
+
+ if (register) {
+ return super.createTable(
+ ident, columns, comment, properties, partitions, distribution,
sortOrders, indexes);
+ }
+
+ Map<String, String> storageProps =
LancePropertiesUtils.getLanceStorageOptions(properties);
+ try (Dataset ignored =
+ Dataset.create(
+ new RootAllocator(),
+ location,
+ convertColumnsToArrowSchema(columns),
+ new
WriteParams.Builder().withStorageOptions(storageProps).build())) {
+ // Only create the table metadata in Gravitino after the Lance dataset
is successfully
+ // created.
+ return super.createTable(
+ ident, columns, comment, properties, partitions, distribution,
sortOrders, indexes);
+ } catch (NoSuchSchemaException e) {
+ throw e;
+ } catch (TableAlreadyExistsException e) {
+ // If the table metadata already exists, but the underlying lance table
was just created
+ // successfully, we need to clean up the created lance table to avoid
orphaned datasets.
+ Dataset.drop(location,
LancePropertiesUtils.getLanceStorageOptions(properties));
+ throw e;
+ } catch (IllegalArgumentException e) {
+ if (e.getMessage().contains("Dataset already exists")) {
+ throw new TableAlreadyExistsException(
+ e, "Lance dataset already exists at location %s", location);
+ }
+ throw e;
+ } catch (Exception e) {
+ throw new RuntimeException("Failed to create Lance dataset at location "
+ location, e);
+ }
+ }
+
private org.apache.arrow.vector.types.pojo.Schema
convertColumnsToArrowSchema(Column[] columns) {
List<Field> fields =
Arrays.stream(columns)
diff --git
a/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/TestLanceTableOperations.java
b/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/TestLanceTableOperations.java
new file mode 100644
index 0000000000..e87b85fa6e
--- /dev/null
+++
b/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/TestLanceTableOperations.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.catalog.lakehouse.lance;
+
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_CREATION_MODE;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+
+import com.google.common.collect.Maps;
+import java.util.Map;
+import org.apache.gravitino.EntityStore;
+import org.apache.gravitino.NameIdentifier;
+import org.apache.gravitino.catalog.ManagedSchemaOperations;
+import org.apache.gravitino.rel.Column;
+import org.apache.gravitino.rel.Table;
+import org.apache.gravitino.rel.expressions.sorts.SortOrder;
+import org.apache.gravitino.rel.expressions.transforms.Transform;
+import org.apache.gravitino.rel.indexes.Index;
+import org.apache.gravitino.rel.types.Types;
+import org.apache.gravitino.storage.IdGenerator;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+public class TestLanceTableOperations {
+
+ @TempDir private java.nio.file.Path tempDir;
+
+ private LanceTableOperations lanceTableOps;
+ private EntityStore store;
+ private ManagedSchemaOperations schemaOps;
+ private IdGenerator idGenerator;
+
+ @BeforeEach
+ public void setUp() {
+ store = mock(EntityStore.class);
+ schemaOps = mock(ManagedSchemaOperations.class);
+ idGenerator = mock(IdGenerator.class);
+ lanceTableOps = spy(new LanceTableOperations(store, schemaOps,
idGenerator));
+ }
+
+ @Test
+ public void testCreationModeEnum() {
+ // Test that CreationMode enum has expected values
+ Assertions.assertEquals(3,
LanceTableOperations.CreationMode.values().length);
+
Assertions.assertNotNull(LanceTableOperations.CreationMode.valueOf("CREATE"));
+
Assertions.assertNotNull(LanceTableOperations.CreationMode.valueOf("EXIST_OK"));
+
Assertions.assertNotNull(LanceTableOperations.CreationMode.valueOf("OVERWRITE"));
+ }
+
+ @Test
+ public void testCreateTableWithInvalidMode() {
+ // Arrange
+ NameIdentifier ident = NameIdentifier.of("catalog", "schema", "table");
+ Column[] columns = new Column[] {Column.of("id", Types.IntegerType.get(),
"id column")};
+ String location = tempDir.resolve("table6").toString();
+ Map<String, String> properties = Maps.newHashMap();
+ properties.put(Table.PROPERTY_LOCATION, location);
+ properties.put(LANCE_CREATION_MODE, "INVALID_MODE");
+
+ // Act & Assert
+ Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ lanceTableOps.createTable(
+ ident,
+ columns,
+ null,
+ properties,
+ new Transform[0],
+ null,
+ new SortOrder[0],
+ new Index[0]));
+ }
+}
diff --git
a/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java
b/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java
index 01b0ce2aed..1988a2d56d 100644
---
a/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java
+++
b/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java
@@ -18,6 +18,10 @@
*/
package org.apache.gravitino.catalog.lakehouse.lance.integration.test;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_CREATION_MODE;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_FORMAT;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_REGISTER;
+
import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
@@ -490,4 +494,407 @@ public class CatalogGenericCatalogLanceIT extends BaseIT {
return properties;
}
+
+ @Test
+ public void testCreateTableWithExistOkMode() {
+ // Create initial table
+ Column[] columns =
+ new Column[] {
+ Column.of(LANCE_COL_NAME1, Types.IntegerType.get(), "col_1_comment"),
+ Column.of(LANCE_COL_NAME2, Types.StringType.get(), "col_2_comment"),
+ Column.of(LANCE_COL_NAME3, Types.LongType.get(), "col_3_comment")
+ };
+
+ NameIdentifier tableIdentifier = NameIdentifier.of(schemaName, tableName);
+ String location = String.format("%s/%s/%s", tempDirectory, schemaName,
tableName);
+
+ Map<String, String> properties = createProperties();
+ properties.put(Table.PROPERTY_LOCATION, location);
+ properties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ properties.put(Table.PROPERTY_EXTERNAL, "true");
+
+ Table createdTable =
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ columns,
+ TABLE_COMMENT,
+ properties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]);
+ Assertions.assertNotNull(createdTable);
+
+ // Try to create the same table again with EXIST_OK mode
+ Map<String, String> existOkProperties = createProperties();
+ existOkProperties.put(Table.PROPERTY_LOCATION, location);
+ existOkProperties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ existOkProperties.put(LANCE_CREATION_MODE, "EXIST_OK");
+ existOkProperties.put(Table.PROPERTY_EXTERNAL, "true");
+
+ Table existingTable =
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ columns,
+ TABLE_COMMENT,
+ existOkProperties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]);
+
+ // Should return the existing table without error
+ Assertions.assertNotNull(existingTable);
+ Assertions.assertEquals(createdTable.name(), existingTable.name());
+
+ // Verify the table exists on disk
+ File tableDir = new File(location);
+ Assertions.assertTrue(tableDir.exists());
+ }
+
+ @Test
+ public void testCreateTableWithOverwriteMode() {
+ // Create initial table
+ Column[] columns =
+ new Column[] {
+ Column.of(LANCE_COL_NAME1, Types.IntegerType.get(), "col_1_comment"),
+ Column.of(LANCE_COL_NAME2, Types.StringType.get(), "col_2_comment")
+ };
+
+ NameIdentifier tableIdentifier =
+ NameIdentifier.of(schemaName,
GravitinoITUtils.genRandomName(TABLE_PREFIX));
+ String location = String.format("%s/%s/%s", tempDirectory, schemaName,
tableIdentifier.name());
+
+ Map<String, String> properties = createProperties();
+ properties.put(Table.PROPERTY_LOCATION, location);
+ properties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ properties.put(Table.PROPERTY_EXTERNAL, "true");
+
+ Table createdTable =
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ columns,
+ TABLE_COMMENT,
+ properties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]);
+ Assertions.assertNotNull(createdTable);
+
+ // Create the table again with OVERWRITE mode and different columns
+ Column[] newColumns =
+ new Column[] {
+ Column.of(LANCE_COL_NAME1, Types.IntegerType.get(), "col_1_comment"),
+ Column.of(LANCE_COL_NAME2, Types.StringType.get(), "col_2_comment"),
+ Column.of(LANCE_COL_NAME3, Types.LongType.get(), "col_3_comment")
+ };
+
+ Map<String, String> overwriteProperties = createProperties();
+ overwriteProperties.put(Table.PROPERTY_LOCATION, location);
+ overwriteProperties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ overwriteProperties.put(LANCE_CREATION_MODE, "OVERWRITE");
+ overwriteProperties.put(Table.PROPERTY_EXTERNAL, "true");
+
+ Table overwrittenTable =
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ newColumns,
+ TABLE_COMMENT,
+ overwriteProperties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]);
+
+ // Should create a new table
+ Assertions.assertNotNull(overwrittenTable);
+ Assertions.assertEquals(3, overwrittenTable.columns().length);
+
+ // Verify the table exists on disk
+ File tableDir = new File(location);
+ Assertions.assertTrue(tableDir.exists());
+ }
+
+ @Test
+ public void testCreateTableWithCreateModeFailsWhenExists() {
+ // Create initial table
+ Column[] columns =
+ new Column[] {
+ Column.of(LANCE_COL_NAME1, Types.IntegerType.get(), "col_1_comment"),
+ Column.of(LANCE_COL_NAME2, Types.StringType.get(), "col_2_comment")
+ };
+
+ NameIdentifier tableIdentifier =
+ NameIdentifier.of(schemaName,
GravitinoITUtils.genRandomName(TABLE_PREFIX));
+ String location = String.format("%s/%s/%s", tempDirectory, schemaName,
tableIdentifier.name());
+
+ Map<String, String> properties = createProperties();
+ properties.put(Table.PROPERTY_LOCATION, location);
+ properties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ properties.put(Table.PROPERTY_EXTERNAL, "true");
+
+ Table createdTable =
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ columns,
+ TABLE_COMMENT,
+ properties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]);
+ Assertions.assertNotNull(createdTable);
+
+ // Try to create the same table again with CREATE mode (default) - should
fail
+ Map<String, String> createProperties = createProperties();
+ createProperties.put(Table.PROPERTY_LOCATION, location);
+ createProperties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ createProperties.put(LANCE_CREATION_MODE, "CREATE");
+ createProperties.put(Table.PROPERTY_EXTERNAL, "true");
+
+ Assertions.assertThrows(
+ Exception.class,
+ () ->
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ columns,
+ TABLE_COMMENT,
+ createProperties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]));
+ }
+
+ @Test
+ public void testRegisterTableWithExistOkMode() throws IOException {
+ // First, create a physical Lance dataset
+ Column[] columns =
+ new Column[] {
+ Column.of(LANCE_COL_NAME1, Types.IntegerType.get(), "col_1_comment"),
+ Column.of(LANCE_COL_NAME2, Types.StringType.get(), "col_2_comment")
+ };
+
+ NameIdentifier tableIdentifier =
+ NameIdentifier.of(schemaName,
GravitinoITUtils.genRandomName(TABLE_PREFIX));
+ String location = String.format("%s/%s/%s", tempDirectory, schemaName,
tableIdentifier.name());
+
+ // Create a physical Lance dataset using Lance SDK directly
+ org.apache.arrow.vector.types.pojo.Schema arrowSchema =
+ new org.apache.arrow.vector.types.pojo.Schema(
+ Arrays.asList(
+ Field.nullable("lance_col_name1", new ArrowType.Int(32, true)),
+ Field.nullable("lance_col_name2", new ArrowType.Utf8())));
+
+ try (RootAllocator allocator = new RootAllocator();
+ Dataset dataset =
+ Dataset.create(allocator, location, arrowSchema, new
WriteParams.Builder().build())) {
+ // Dataset created successfully
+ }
+
+ // Register the table in Gravitino
+ Map<String, String> properties = createProperties();
+ properties.put(Table.PROPERTY_LOCATION, location);
+ properties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ properties.put(LANCE_TABLE_REGISTER, "true");
+ properties.put(Table.PROPERTY_EXTERNAL, "true");
+
+ Table registeredTable =
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ columns,
+ TABLE_COMMENT,
+ properties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]);
+
+ Assertions.assertNotNull(registeredTable);
+ Assertions.assertEquals(tableIdentifier.name(), registeredTable.name());
+
+ Map<String, String> existOkProperties = createProperties();
+ existOkProperties.put(Table.PROPERTY_LOCATION, location);
+ existOkProperties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ existOkProperties.put(LANCE_TABLE_REGISTER, "true");
+ existOkProperties.put(Table.PROPERTY_EXTERNAL, "true");
+ existOkProperties.put(LANCE_CREATION_MODE, "EXIST_OK");
+
+ // Throw an exception for registering the table with EXIST_OK mode, since
register operation
+ // doesn't support EXIST_OK mode currently.
+ Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ columns,
+ TABLE_COMMENT,
+ existOkProperties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]));
+ }
+
+ @Test
+ public void testRegisterTableWithOverwriteMode() throws IOException {
+ // First, create a physical Lance dataset
+ Column[] columns =
+ new Column[] {
+ Column.of(LANCE_COL_NAME1, Types.IntegerType.get(), "col_1_comment"),
+ Column.of(LANCE_COL_NAME2, Types.StringType.get(), "col_2_comment")
+ };
+
+ NameIdentifier tableIdentifier =
+ NameIdentifier.of(schemaName,
GravitinoITUtils.genRandomName(TABLE_PREFIX));
+ String location = String.format("%s/%s/%s", tempDirectory, schemaName,
tableIdentifier.name());
+
+ // Create a physical Lance dataset
+ org.apache.arrow.vector.types.pojo.Schema arrowSchema =
+ new org.apache.arrow.vector.types.pojo.Schema(
+ Arrays.asList(
+ Field.nullable("lance_col_name1", new ArrowType.Int(32, true)),
+ Field.nullable("lance_col_name2", new ArrowType.Utf8())));
+
+ try (RootAllocator allocator = new RootAllocator();
+ Dataset dataset =
+ Dataset.create(allocator, location, arrowSchema, new
WriteParams.Builder().build())) {
+ // Dataset created
+ }
+
+ // Register the table in Gravitino
+ Map<String, String> properties = createProperties();
+ properties.put(Table.PROPERTY_LOCATION, location);
+ properties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ properties.put(Table.PROPERTY_EXTERNAL, "true");
+ properties.put(LANCE_TABLE_REGISTER, "true");
+
+ Table registeredTable =
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ columns,
+ TABLE_COMMENT,
+ properties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]);
+
+ Assertions.assertNotNull(registeredTable);
+
+ // Register again with OVERWRITE mode - should replace metadata
+ Column[] newColumns =
+ new Column[] {
+ Column.of(LANCE_COL_NAME1, Types.IntegerType.get(), "col_1_comment"),
+ Column.of(LANCE_COL_NAME2, Types.StringType.get(), "col_2_comment"),
+ Column.of(LANCE_COL_NAME3, Types.LongType.get(), "col_3_comment")
+ };
+
+ Map<String, String> overwriteProperties = createProperties();
+ overwriteProperties.put(Table.PROPERTY_LOCATION, location);
+ overwriteProperties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ overwriteProperties.put(Table.PROPERTY_EXTERNAL, "true");
+ overwriteProperties.put(LANCE_TABLE_REGISTER, "true");
+ overwriteProperties.put(LANCE_CREATION_MODE, "OVERWRITE");
+
+ Table overwrittenTable =
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ newColumns,
+ "Updated comment",
+ overwriteProperties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]);
+
+ Assertions.assertNotNull(overwrittenTable);
+ Assertions.assertEquals(3, overwrittenTable.columns().length);
+ Assertions.assertEquals("Updated comment", overwrittenTable.comment());
+
+ // Verify physical dataset still exists
+ File tableDir = new File(location);
+ Assertions.assertTrue(tableDir.exists());
+ }
+
+ @Test
+ public void testRegisterTableWithCreateModeFailsWhenExists() throws
IOException {
+ // First, create a physical Lance dataset
+ Column[] columns =
+ new Column[] {
+ Column.of(LANCE_COL_NAME1, Types.IntegerType.get(), "col_1_comment"),
+ Column.of(LANCE_COL_NAME2, Types.StringType.get(), "col_2_comment")
+ };
+
+ NameIdentifier tableIdentifier =
+ NameIdentifier.of(schemaName,
GravitinoITUtils.genRandomName(TABLE_PREFIX));
+ String location = String.format("%s/%s/%s", tempDirectory, schemaName,
tableIdentifier.name());
+
+ // Create a physical Lance dataset
+ org.apache.arrow.vector.types.pojo.Schema arrowSchema =
+ new org.apache.arrow.vector.types.pojo.Schema(
+ Arrays.asList(
+ Field.nullable("lance_col_name1", new ArrowType.Int(32, true)),
+ Field.nullable("lance_col_name2", new ArrowType.Utf8())));
+
+ try (RootAllocator allocator = new RootAllocator();
+ Dataset dataset =
+ Dataset.create(allocator, location, arrowSchema, new
WriteParams.Builder().build())) {
+ // Dataset created
+ }
+
+ // Register the table in Gravitino
+ Map<String, String> properties = createProperties();
+ properties.put(Table.PROPERTY_LOCATION, location);
+ properties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ properties.put(Table.PROPERTY_EXTERNAL, "true");
+ properties.put(LANCE_TABLE_REGISTER, "true");
+
+ Table registeredTable =
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ columns,
+ TABLE_COMMENT,
+ properties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]);
+
+ Assertions.assertNotNull(registeredTable);
+
+ // Try to register again with CREATE mode - should fail
+ Map<String, String> createProperties = createProperties();
+ createProperties.put(Table.PROPERTY_LOCATION, location);
+ createProperties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ createProperties.put(Table.PROPERTY_EXTERNAL, "true");
+ createProperties.put(LANCE_TABLE_REGISTER, "true");
+ createProperties.put(LANCE_CREATION_MODE, "CREATE");
+
+ Assertions.assertThrows(
+ Exception.class,
+ () ->
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier,
+ columns,
+ TABLE_COMMENT,
+ createProperties,
+ Transforms.EMPTY_TRANSFORM,
+ Distributions.NONE,
+ new SortOrder[0]));
+ }
}
diff --git
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java
index 68add7a863..ae0f567215 100644
---
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java
+++
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java
@@ -20,7 +20,9 @@
package org.apache.gravitino.lance.common.ops.gravitino;
import static
org.apache.gravitino.lance.common.ops.gravitino.LanceDataTypeConverter.CONVERTER;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_CREATION_MODE;
import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_LOCATION;
+import static
org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_FORMAT;
import static org.apache.gravitino.rel.Column.DEFAULT_VALUE_NOT_SET;
import com.google.common.base.Preconditions;
@@ -51,17 +53,13 @@ import org.apache.arrow.vector.types.pojo.Field;
import org.apache.gravitino.Catalog;
import org.apache.gravitino.NameIdentifier;
import org.apache.gravitino.exceptions.NoSuchTableException;
-import org.apache.gravitino.exceptions.TableAlreadyExistsException;
import org.apache.gravitino.lance.common.ops.LanceTableOperations;
import org.apache.gravitino.lance.common.utils.ArrowUtils;
import org.apache.gravitino.lance.common.utils.LancePropertiesUtils;
import org.apache.gravitino.rel.Column;
import org.apache.gravitino.rel.Table;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
public class GravitinoLanceTableOperations implements LanceTableOperations {
- private static final Logger LOG =
LoggerFactory.getLogger(GravitinoLanceTableOperations.class);
private final GravitinoLanceNamespaceWrapper namespaceWrapper;
@@ -128,43 +126,18 @@ public class GravitinoLanceTableOperations implements
LanceTableOperations {
createTableProperties.put(LANCE_LOCATION, tableLocation);
}
// The format is defined in GenericLakehouseCatalog
- createTableProperties.put(Table.PROPERTY_TABLE_FORMAT, "lance");
+ createTableProperties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
createTableProperties.put(Table.PROPERTY_EXTERNAL, "true");
- Table t;
- try {
- t =
- catalog
- .asTableCatalog()
- .createTable(
- tableIdentifier, columns.toArray(new Column[0]), null,
createTableProperties);
- } catch (TableAlreadyExistsException exception) {
- if (mode == CreateTableRequest.ModeEnum.CREATE) {
- throw LanceNamespaceException.conflict(
- "Table already exists: " + tableId,
- TableAlreadyExistsException.class.getSimpleName(),
- tableId,
- CommonUtil.formatCurrentStackTrace());
- } else if (mode == CreateTableRequest.ModeEnum.OVERWRITE) {
- LOG.info("Overwriting existing table: {}", tableId);
- catalog.asTableCatalog().purgeTable(tableIdentifier);
-
- t =
- catalog
- .asTableCatalog()
- .createTable(
- tableIdentifier, columns.toArray(new Column[0]), null,
createTableProperties);
- } else { // EXIST_OK
- CreateTableResponse response = new CreateTableResponse();
- Table existingTable =
catalog.asTableCatalog().loadTable(tableIdentifier);
- response.setProperties(existingTable.properties());
- response.setLocation(existingTable.properties().get(LANCE_LOCATION));
- response.setVersion(null);
- response.setStorageOptions(
-
LancePropertiesUtils.getLanceStorageOptions(existingTable.properties()));
- return response;
- }
- }
+ // Pass creation mode as property to delegate handling to
LanceTableOperations
+ createTableProperties.put(LANCE_CREATION_MODE, mode.name());
+
+ // Single call - mode is handled server-side
+ Table t =
+ catalog
+ .asTableCatalog()
+ .createTable(
+ tableIdentifier, columns.toArray(new Column[0]), null,
createTableProperties);
CreateTableResponse response = new CreateTableResponse();
response.setProperties(t.properties());
@@ -206,30 +179,17 @@ public class GravitinoLanceTableOperations implements
LanceTableOperations {
NameIdentifier.of(nsId.levelAtListPos(1), nsId.levelAtListPos(2));
Map<String, String> copiedTableProperties =
Maps.newHashMap(tableProperties);
- copiedTableProperties.put("format", "lance");
- Table t = null;
- try {
- t =
- catalog
- .asTableCatalog()
- .createTable(tableIdentifier, new Column[] {}, null,
copiedTableProperties);
- } catch (TableAlreadyExistsException exception) {
- if (mode == RegisterTableRequest.ModeEnum.CREATE) {
- throw LanceNamespaceException.conflict(
- "Table already exists: " + tableId,
- TableAlreadyExistsException.class.getSimpleName(),
- tableId,
- CommonUtil.formatCurrentStackTrace());
- } else if (mode == RegisterTableRequest.ModeEnum.OVERWRITE) {
- LOG.info("Overwriting existing table: {}", tableId);
- catalog.asTableCatalog().dropTable(tableIdentifier);
-
- t =
- catalog
- .asTableCatalog()
- .createTable(tableIdentifier, new Column[] {}, null,
copiedTableProperties);
- }
- }
+ copiedTableProperties.put(Table.PROPERTY_TABLE_FORMAT, LANCE_TABLE_FORMAT);
+ copiedTableProperties.put(Table.PROPERTY_EXTERNAL, "true");
+
+ // Pass creation mode as property to delegate handling to
LanceTableOperations
+ copiedTableProperties.put(LANCE_CREATION_MODE, mode.name());
+
+ // Single call - mode is handled server-side
+ Table t =
+ catalog
+ .asTableCatalog()
+ .createTable(tableIdentifier, new Column[] {}, null,
copiedTableProperties);
RegisterTableResponse response = new RegisterTableResponse();
response.setProperties(t.properties());
diff --git
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java
index c34a7be58a..e3be5d0528 100644
---
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java
+++
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java
@@ -29,6 +29,13 @@ public class LanceConstants {
// Key for table location in table properties map
public static final String LANCE_LOCATION = "location";
+ // Key for creation mode in table properties
+ public static final String LANCE_CREATION_MODE = "lance.creation-mode";
+
// Prefix for storage options in LanceConfig
public static final String LANCE_STORAGE_OPTIONS_PREFIX = "lance.storage.";
+
+ public static final String LANCE_TABLE_REGISTER = "lance.register";
+
+ public static final String LANCE_TABLE_FORMAT = "lance";
}
diff --git
a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceExceptionMapper.java
b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceExceptionMapper.java
index 2465e6ed63..59bf7e6642 100644
---
a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceExceptionMapper.java
+++
b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceExceptionMapper.java
@@ -27,6 +27,7 @@ import javax.ws.rs.core.Response;
import javax.ws.rs.ext.ExceptionMapper;
import javax.ws.rs.ext.Provider;
import org.apache.gravitino.exceptions.NotFoundException;
+import org.apache.gravitino.exceptions.TableAlreadyExistsException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -58,6 +59,10 @@ public class LanceExceptionMapper implements
ExceptionMapper<Exception> {
return LanceNamespaceException.badRequest(
ex.getMessage(), ex.getClass().getSimpleName(), instance,
getStackTrace(ex));
+ } else if (ex instanceof TableAlreadyExistsException) {
+ return LanceNamespaceException.conflict(
+ ex.getMessage(), ex.getClass().getSimpleName(), instance,
getStackTrace(ex));
+
} else if (ex instanceof UnsupportedOperationException) {
return LanceNamespaceException.unsupportedOperation(
ex.getMessage(), ex.getClass().getSimpleName(), instance,
getStackTrace(ex));
diff --git
a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java
b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java
index ab46353c83..e01eca59d9 100644
---
a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java
+++
b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java
@@ -56,6 +56,7 @@ import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.Response;
import org.apache.gravitino.lance.common.ops.NamespaceWrapper;
+import org.apache.gravitino.lance.common.utils.LanceConstants;
import org.apache.gravitino.lance.common.utils.SerializationUtils;
import org.apache.gravitino.lance.service.LanceExceptionMapper;
import org.apache.gravitino.metrics.MetricNames;
@@ -165,7 +166,7 @@ public class LanceTableOperations {
? Maps.newHashMap()
: Maps.newHashMap(registerTableRequest.getProperties());
props.put(LANCE_LOCATION, registerTableRequest.getLocation());
- props.put("register", "true");
+ props.put(LanceConstants.LANCE_TABLE_REGISTER, "true");
ModeEnum mode = registerTableRequest.getMode();
RegisterTableResponse response =
diff --git
a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java
b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java
index 028d98215e..9dab836811 100644
---
a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java
+++
b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java
@@ -442,7 +442,6 @@ public class LanceRESTServiceIT extends BaseIT {
() -> {
ns.createEmptyTable(request);
});
- Assertions.assertTrue(exception.getMessage().contains("Table already
exists"));
Assertions.assertEquals(409, exception.getCode());
// Try to create a table with wrong location should fail
diff --git
a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java
b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java
index 02d5a6e812..4f5f05c8d9 100644
---
a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java
+++
b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java
@@ -57,6 +57,7 @@ import javax.ws.rs.core.Response;
import org.apache.gravitino.exceptions.NoSuchCatalogException;
import org.apache.gravitino.lance.common.ops.LanceTableOperations;
import org.apache.gravitino.lance.common.ops.NamespaceWrapper;
+import org.apache.gravitino.lance.common.utils.LanceConstants;
import org.apache.gravitino.rest.RESTUtils;
import org.glassfish.jersey.internal.inject.AbstractBinder;
import org.glassfish.jersey.server.ResourceConfig;
@@ -516,7 +517,8 @@ public class TestLanceNamespaceOperations extends
JerseyTest {
// Test that the "register" property is set to "true"
RegisterTableResponse registerTableResponse = new RegisterTableResponse();
registerTableResponse.setLocation("/path/to/registered_table");
- registerTableResponse.setProperties(ImmutableMap.of("key", "value",
"register", "true"));
+ registerTableResponse.setProperties(
+ ImmutableMap.of("key", "value", LanceConstants.LANCE_TABLE_REGISTER,
"true"));
when(tableOps.registerTable(any(), any(), any(),
any())).thenReturn(registerTableResponse);
RegisterTableRequest tableRequest = new RegisterTableRequest();
@@ -541,7 +543,7 @@ public class TestLanceNamespaceOperations extends
JerseyTest {
Mockito.argThat(
props ->
props != null
- && "true".equals(props.get("register"))
+ &&
"true".equals(props.get(LanceConstants.LANCE_TABLE_REGISTER))
&&
"/path/to/registered_table".equals(props.get("location"))
&& "custom-value".equals(props.get("custom-key"))));
}