This is an automated email from the ASF dual-hosted git repository.
yuxia pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluss.git
The following commit(s) were added to refs/heads/main by this push:
new 3f8797cf4 [lake/paimon] Tolerate Paimon lake table existent with
different insignificant options (#1995)
3f8797cf4 is described below
commit 3f8797cf4ea59aab8f417869c6e8b5c2595bf5f3
Author: Liebing <[email protected]>
AuthorDate: Wed Nov 19 09:54:16 2025 +0800
[lake/paimon] Tolerate Paimon lake table existent with different
insignificant options (#1995)
---
.../fluss/lake/paimon/PaimonLakeCatalog.java | 43 +-------
.../lake/paimon/utils/PaimonTableValidation.java | 112 +++++++++++++++++++++
.../lake/paimon/LakeEnabledTableCreateITCase.java | 49 +++++++++
3 files changed, 164 insertions(+), 40 deletions(-)
diff --git
a/fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/PaimonLakeCatalog.java
b/fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/PaimonLakeCatalog.java
index 712c2af38..0332c4447 100644
---
a/fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/PaimonLakeCatalog.java
+++
b/fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/PaimonLakeCatalog.java
@@ -18,7 +18,6 @@
package org.apache.fluss.lake.paimon;
import org.apache.fluss.annotation.VisibleForTesting;
-import org.apache.fluss.config.ConfigOptions;
import org.apache.fluss.config.Configuration;
import org.apache.fluss.exception.TableAlreadyExistException;
import org.apache.fluss.exception.TableNotExistException;
@@ -28,7 +27,6 @@ import org.apache.fluss.metadata.TableDescriptor;
import org.apache.fluss.metadata.TablePath;
import org.apache.fluss.utils.IOUtils;
-import org.apache.paimon.CoreOptions;
import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.CatalogContext;
import org.apache.paimon.catalog.CatalogFactory;
@@ -43,12 +41,12 @@ import org.apache.paimon.types.DataTypes;
import java.util.LinkedHashMap;
import java.util.List;
-import java.util.Map;
-import static
org.apache.fluss.lake.paimon.utils.PaimonConversions.FLUSS_CONF_PREFIX;
import static org.apache.fluss.lake.paimon.utils.PaimonConversions.toPaimon;
import static
org.apache.fluss.lake.paimon.utils.PaimonConversions.toPaimonSchema;
import static
org.apache.fluss.lake.paimon.utils.PaimonConversions.toPaimonSchemaChanges;
+import static
org.apache.fluss.lake.paimon.utils.PaimonTableValidation.checkTableIsEmpty;
+import static
org.apache.fluss.lake.paimon.utils.PaimonTableValidation.validatePaimonSchemaCompatible;
import static org.apache.fluss.metadata.TableDescriptor.BUCKET_COLUMN_NAME;
import static org.apache.fluss.metadata.TableDescriptor.OFFSET_COLUMN_NAME;
import static org.apache.fluss.metadata.TableDescriptor.TIMESTAMP_COLUMN_NAME;
@@ -129,7 +127,7 @@ public class PaimonLakeCatalog implements LakeCatalog {
try {
Table table = paimonCatalog.getTable(tablePath);
FileStoreTable fileStoreTable = (FileStoreTable) table;
- validatePaimonSchemaCapability(
+ validatePaimonSchemaCompatible(
tablePath, fileStoreTable.schema().toSchema(), schema);
// if creating a new fluss table, we should ensure the lake
table is empty
if (isCreatingFlussTable) {
@@ -166,41 +164,6 @@ public class PaimonLakeCatalog implements LakeCatalog {
}
}
- private void validatePaimonSchemaCapability(
- Identifier tablePath, Schema existingSchema, Schema newSchema) {
- // Adjust options for comparison
- Map<String, String> existingOptions = existingSchema.options();
- Map<String, String> newOptions = newSchema.options();
- // `path` will be set automatically by Paimon, so we need to remove it
in existing options
- existingOptions.remove(CoreOptions.PATH.key());
- // when enable datalake with an existing table,
`table.datalake.enabled` will be `false`
- // in existing options, but `true` in new options.
- String datalakeConfigKey = FLUSS_CONF_PREFIX +
ConfigOptions.TABLE_DATALAKE_ENABLED.key();
- if
(Boolean.FALSE.toString().equalsIgnoreCase(existingOptions.get(datalakeConfigKey)))
{
- existingOptions.remove(datalakeConfigKey);
- newOptions.remove(datalakeConfigKey);
- }
-
- if (!existingSchema.equals(newSchema)) {
- throw new TableAlreadyExistException(
- String.format(
- "The table %s already exists in Paimon catalog,
but the table schema is not compatible. "
- + "Existing schema: %s, new schema: %s. "
- + "Please first drop the table in Paimon
catalog or use a new table name.",
- tablePath.getEscapedFullName(), existingSchema,
newSchema));
- }
- }
-
- private void checkTableIsEmpty(Identifier tablePath, FileStoreTable table)
{
- if (table.latestSnapshot().isPresent()) {
- throw new TableAlreadyExistException(
- String.format(
- "The table %s already exists in Paimon catalog,
and the table is not empty. "
- + "Please first drop the table in Paimon
catalog or use a new table name.",
- tablePath.getEscapedFullName()));
- }
- }
-
@Override
public void close() {
IOUtils.closeQuietly(paimonCatalog, "paimon catalog");
diff --git
a/fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/utils/PaimonTableValidation.java
b/fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/utils/PaimonTableValidation.java
new file mode 100644
index 000000000..0144e3725
--- /dev/null
+++
b/fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/utils/PaimonTableValidation.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.lake.paimon.utils;
+
+import org.apache.fluss.exception.TableAlreadyExistException;
+
+import org.apache.paimon.CoreOptions;
+import org.apache.paimon.catalog.Identifier;
+import org.apache.paimon.options.ConfigOption;
+import org.apache.paimon.schema.Schema;
+import org.apache.paimon.table.FileStoreTable;
+
+import java.lang.reflect.Field;
+import java.util.HashMap;
+import java.util.Map;
+
+import static
org.apache.fluss.lake.paimon.utils.PaimonConversions.FLUSS_CONF_PREFIX;
+
+/** Utils to verify whether the existing Paimon table is compatible with the
table to be created. */
+public class PaimonTableValidation {
+
+ private static final Map<String, ConfigOption<?>> PAIMON_CONFIGS =
extractPaimonConfigs();
+
+ public static void validatePaimonSchemaCompatible(
+ Identifier tablePath, Schema existingSchema, Schema newSchema) {
+ // Adjust options for comparison
+ Map<String, String> existingOptions = existingSchema.options();
+ Map<String, String> newOptions = newSchema.options();
+
+ // when enable datalake with an existing table,
`table.datalake.enabled` will be `false`
+ // in existing options, but `true` in new options.
+ String datalakeConfigKey =
+ FLUSS_CONF_PREFIX
+ +
org.apache.fluss.config.ConfigOptions.TABLE_DATALAKE_ENABLED.key();
+ if
(Boolean.FALSE.toString().equalsIgnoreCase(existingOptions.get(datalakeConfigKey)))
{
+ existingOptions.remove(datalakeConfigKey);
+ newOptions.remove(datalakeConfigKey);
+ }
+
+ // remove changeable options
+ removeChangeableOptions(existingOptions);
+ removeChangeableOptions(newOptions);
+
+ // ignore the existing options that are not in new options
+ existingOptions.entrySet().removeIf(entry ->
!newOptions.containsKey(entry.getKey()));
+
+ if (!existingSchema.equals(newSchema)) {
+ throw new TableAlreadyExistException(
+ String.format(
+ "The table %s already exists in Paimon catalog,
but the table schema is not compatible. "
+ + "Existing schema: %s, new schema: %s. "
+ + "Please first drop the table in Paimon
catalog or use a new table name.",
+ tablePath.getEscapedFullName(), existingSchema,
newSchema));
+ }
+ }
+
+ private static void removeChangeableOptions(Map<String, String> options) {
+ options.entrySet()
+ .removeIf(
+ entry ->
+ // currently we take all Paimon options and
Fluss option as
+ // unchangeable.
+ !PAIMON_CONFIGS.containsKey(entry.getKey())
+ &&
!entry.getKey().startsWith(FLUSS_CONF_PREFIX));
+ }
+
+ public static void checkTableIsEmpty(Identifier tablePath, FileStoreTable
table) {
+ if (table.latestSnapshot().isPresent()) {
+ throw new TableAlreadyExistException(
+ String.format(
+ "The table %s already exists in Paimon catalog,
and the table is not empty. "
+ + "Please first drop the table in Paimon
catalog or use a new table name.",
+ tablePath.getEscapedFullName()));
+ }
+ }
+
+ private static Map<String, ConfigOption<?>> extractPaimonConfigs() {
+ Map<String, ConfigOption<?>> options = new HashMap<>();
+
+ Field[] fields = CoreOptions.class.getFields();
+ for (Field field : fields) {
+ if (!ConfigOption.class.isAssignableFrom(field.getType())) {
+ continue;
+ }
+
+ try {
+ ConfigOption<?> configOption = (ConfigOption<?>)
field.get(null);
+ options.put(configOption.key(), configOption);
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException(
+ "Unable to extract ConfigOption fields from
CoreOptions class.", e);
+ }
+ }
+
+ return options;
+ }
+}
diff --git
a/fluss-lake/fluss-lake-paimon/src/test/java/org/apache/fluss/lake/paimon/LakeEnabledTableCreateITCase.java
b/fluss-lake/fluss-lake-paimon/src/test/java/org/apache/fluss/lake/paimon/LakeEnabledTableCreateITCase.java
index 3e47bdc46..9e28ce7d4 100644
---
a/fluss-lake/fluss-lake-paimon/src/test/java/org/apache/fluss/lake/paimon/LakeEnabledTableCreateITCase.java
+++
b/fluss-lake/fluss-lake-paimon/src/test/java/org/apache/fluss/lake/paimon/LakeEnabledTableCreateITCase.java
@@ -43,6 +43,7 @@ import org.apache.paimon.data.BinaryString;
import org.apache.paimon.data.GenericRow;
import org.apache.paimon.data.Timestamp;
import org.apache.paimon.options.Options;
+import org.apache.paimon.schema.SchemaChange;
import org.apache.paimon.table.Table;
import org.apache.paimon.table.sink.BatchTableCommit;
import org.apache.paimon.table.sink.BatchTableWrite;
@@ -476,6 +477,54 @@ class LakeEnabledTableCreateITCase {
+ "Existing schema: UpdateSchema{fields=[`c1`
STRING, `c2` INT, `__bucket` INT, `__offset` BIGINT, `__timestamp` TIMESTAMP(6)
WITH LOCAL TIME ZONE], partitionKeys=[], primaryKeys=[], options={bucket=-1,
fluss.table.replication.factor=1, fluss.table.datalake.enabled=true,
fluss.table.datalake.format=paimon, partition.legacy-name=false,
file.format=parquet, fluss.k1=v1}, comment=null}, "
+ "new schema: UpdateSchema{fields=[`c1`
STRING, `c2` INT, `c3` STRING, `__bucket` INT, `__offset` BIGINT, `__timestamp`
TIMESTAMP(6) WITH LOCAL TIME ZONE], partitionKeys=[], primaryKeys=[],
options={bucket=-1, fluss.table.replication.factor=1,
fluss.table.datalake.enabled=true, fluss.table.datalake.format=paimon,
partition.legacy-name=false, file.format=parquet, fluss.k1=v1}, comment=null}. "
+ "Please first drop the table in Paimon
catalog or use a new table name.");
+
+ // add an insignificant option to Paimon table will be ok
+ Identifier paimonTablePath =
+ Identifier.create(tablePath.getDatabaseName(),
tablePath.getTableName());
+ SchemaChange schemaChange1 = SchemaChange.setOption("any.k1",
"any.v1");
+ paimonCatalog.alterTable(paimonTablePath,
Collections.singletonList(schemaChange1), false);
+ admin.createTable(tablePath, td, false).get();
+ admin.dropTable(tablePath, false).get();
+
+ // alter a Fluss option to Paimon table will throw exception
+ SchemaChange schemaChange2 = SchemaChange.setOption("fluss.k1", "v2");
+ paimonCatalog.alterTable(paimonTablePath,
Collections.singletonList(schemaChange2), false);
+ TableDescriptor finalTd = td;
+ assertThatThrownBy(() -> admin.createTable(tablePath, finalTd,
false).get())
+ .cause()
+ .isInstanceOf(LakeTableAlreadyExistException.class)
+ .hasMessageContaining(
+ "The table `fluss`.`log_table_with_exist_lake_table`
already exists in Paimon catalog, "
+ + "but the table schema is not compatible.");
+
+ // reset fluss.k1 in Paimon
+ SchemaChange schemaChange3 = SchemaChange.setOption("fluss.k1", "v1");
+ paimonCatalog.alterTable(paimonTablePath,
Collections.singletonList(schemaChange3), false);
+
+ // add a new Paimon option (not specified in the Fluss table) to
Paimon table will be ok
+ SchemaChange schemaChange4 =
+
SchemaChange.setOption(CoreOptions.SNAPSHOT_NUM_RETAINED_MIN.key(), "2");
+ paimonCatalog.alterTable(paimonTablePath,
Collections.singletonList(schemaChange4), false);
+ admin.createTable(tablePath, finalTd, false).get();
+ admin.dropTable(tablePath, false).get();
+
+ // try to create a Fluss table specify a different value of exist
Paimon option will throw
+ // exception
+ customProperties.put("paimon.snapshot.num-retained.min", "3");
+ TableDescriptor td1 =
+ createTableDescriptor(
+ 2,
+ BUCKET_NUM,
+ Collections.emptyList(),
+ Collections.emptyList(),
+ customProperties,
+ false);
+ assertThatThrownBy(() -> admin.createTable(tablePath, td1,
false).get())
+ .cause()
+ .isInstanceOf(LakeTableAlreadyExistException.class)
+ .hasMessageContaining(
+ "The table `fluss`.`log_table_with_exist_lake_table`
already exists in Paimon catalog, "
+ + "but the table schema is not compatible.");
}
@Test