This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 347ec25fd0 [core] Validate file index related options (#7846)
347ec25fd0 is described below

commit 347ec25fd05f961e62638dda16f14c4a509476c9
Author: tsreaper <[email protected]>
AuthorDate: Thu May 14 11:55:51 2026 +0800

    [core] Validate file index related options (#7846)
    
    Add schema-level validation for the file-index.<index-type>.columns
    options (bloom-filter / bitmap / bsi / range-bitmap).
    
    Currently, if a user configures a column name that does not exist in the
    table schema (or uses the nested col[k] syntax on a non-map column / a
    map with a non-string key), the table can be created and written to
    successfully, but the write job will fail much later — when
    MergeTreeWriter.flushWriteBuffer triggers DataFileIndexWriter.<init>
    during checkpoint, throwing IllegalArgumentException: xxx does not exist
    in column fields (or is not map type / Only support map data type with
    key field of CHAR、VARCHAR、STRING.). The same misconfiguration also
    blocks compaction.
    
    This PR moves these three checks up-front into SchemaValidation so the
    error is surfaced at create-table / alter-table time:
    
    1. Every column listed in any file-index.<type>.columns must exist in
    the schema.
    2. If nested syntax col[k] is used, col must be of MAP type.
    3. A MAP referenced via nested syntax must have CHAR / VARCHAR key type
    (matching MapFileIndexMaintainer's runtime requirement).
---
 .../org/apache/paimon/schema/SchemaValidation.java | 47 ++++++++++++++
 .../apache/paimon/schema/SchemaValidationTest.java | 75 ++++++++++++++++++++++
 .../procedure/RewriteFileIndexProcedureITCase.java |  2 +-
 3 files changed, 123 insertions(+), 1 deletion(-)

diff --git 
a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java 
b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
index b5285bb606..13ee86cdd0 100644
--- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
+++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
@@ -23,6 +23,7 @@ import org.apache.paimon.CoreOptions.ChangelogProducer;
 import org.apache.paimon.CoreOptions.MergeEngine;
 import org.apache.paimon.TableType;
 import org.apache.paimon.factories.FactoryUtil;
+import org.apache.paimon.fileindex.FileIndexOptions;
 import org.apache.paimon.format.FileFormat;
 import org.apache.paimon.mergetree.compact.aggregate.FieldAggregator;
 import 
org.apache.paimon.mergetree.compact.aggregate.factory.FieldAggregatorFactory;
@@ -273,6 +274,8 @@ public class SchemaValidation {
 
         validateMergeFunctionFactory(schema);
 
+        validateFileIndex(schema);
+
         validateRowTracking(schema, options);
 
         validateIncrementalClustering(schema, options);
@@ -531,6 +534,50 @@ public class SchemaValidation {
         createMergeFunctionFactory(schema);
     }
 
+    private static void validateFileIndex(TableSchema schema) {
+        CoreOptions options = new CoreOptions(schema.options());
+        FileIndexOptions fileIndexOptions = options.indexColumnsOptions();
+        if (fileIndexOptions.isEmpty()) {
+            return;
+        }
+
+        Map<String, DataField> fieldMap = new HashMap<>();
+        for (DataField field : schema.fields()) {
+            fieldMap.put(field.name(), field);
+        }
+
+        for (Map.Entry<FileIndexOptions.Column, Map<String, Options>> entry :
+                fileIndexOptions.entrySet()) {
+            FileIndexOptions.Column column = entry.getKey();
+            String columnName = column.getColumnName();
+            checkArgument(
+                    fieldMap.containsKey(columnName),
+                    "Column '%s' specified in 
'file-index.<index-type>.columns' "
+                            + "does not exist in table schema. Existing 
columns: %s.",
+                    columnName,
+                    schema.fieldNames());
+
+            DataField field = fieldMap.get(columnName);
+            if (column.isNestedColumn()) {
+                checkArgument(
+                        field.type().getTypeRoot() == DataTypeRoot.MAP,
+                        "Column '%s' is configured as nested column in "
+                                + "'file-index.<index-type>.columns' but is 
not a map type. "
+                                + "Only map type supports nested column.",
+                        columnName);
+                DataType keyType = ((MapType) field.type()).getKeyType();
+                DataTypeRoot keyRoot = keyType.getTypeRoot();
+                checkArgument(
+                        keyRoot == DataTypeRoot.CHAR || keyRoot == 
DataTypeRoot.VARCHAR,
+                        "Column '%s' is configured as nested column in "
+                                + "'file-index.<index-type>.columns', but its 
map key type is %s. "
+                                + "Only CHAR/VARCHAR/STRING is supported.",
+                        columnName,
+                        keyType);
+            }
+        }
+    }
+
     private static void validateForDeletionVectors(CoreOptions options) {
         checkArgument(
                 options.changelogProducer() == ChangelogProducer.NONE
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java 
b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
index d2bad5248c..cec075724f 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
@@ -369,4 +369,79 @@ class SchemaValidationTest {
                                                 "")))
                 .hasMessageContaining("primary-key");
     }
+
+    @Test
+    public void testFileIndexColumns() {
+        List<String> keys =
+                Arrays.asList(
+                        "file-index.bloom-filter.columns",
+                        "file-index.bitmap.columns",
+                        "file-index.bsi.columns",
+                        "file-index.range-bitmap.columns");
+
+        for (String key : keys) {
+            // valid: all referenced columns exist
+            Map<String, String> okOptions = new HashMap<>();
+            okOptions.put(key, "f0,f3");
+            assertThatCode(() -> validateTableSchemaExec(okOptions))
+                    .as("valid key=%s", key)
+                    .doesNotThrowAnyException();
+
+            // invalid: references a non-existent column
+            Map<String, String> badOptions = new HashMap<>();
+            badOptions.put(key, "f0,not_exist");
+            assertThatThrownBy(() -> validateTableSchemaExec(badOptions))
+                    .isInstanceOf(IllegalArgumentException.class)
+                    .hasMessageContaining(
+                            "Column 'not_exist' specified in 
'file-index.<index-type>.columns' does not exist in table schema.");
+        }
+    }
+
+    @Test
+    public void testFileIndexNestedColumn() {
+        List<String> keys =
+                Arrays.asList(
+                        "file-index.bloom-filter.columns",
+                        "file-index.bitmap.columns",
+                        "file-index.bsi.columns",
+                        "file-index.range-bitmap.columns");
+
+        for (String key : keys) {
+            // valid: nested syntax on a map column with string key
+            Map<String, String> okOptions = new HashMap<>();
+            okOptions.put(key, "m[k]");
+            assertThatCode(() -> validateTableSchemaWithMapField(okOptions))
+                    .doesNotThrowAnyException();
+
+            // invalid: nested syntax on a non-map column
+            Map<String, String> nonMapOptions = new HashMap<>();
+            nonMapOptions.put(key, "f3[k]");
+            assertThatThrownBy(() -> 
validateTableSchemaWithMapField(nonMapOptions))
+                    .isInstanceOf(IllegalArgumentException.class)
+                    .hasMessageContaining(
+                            "Column 'f3' is configured as nested column in 
'file-index.<index-type>.columns' but is not a map type.");
+
+            // invalid: nested syntax on a map column with non-string key
+            Map<String, String> nonStringKeyOptions = new HashMap<>();
+            nonStringKeyOptions.put(key, "mi[k]");
+            assertThatThrownBy(() -> 
validateTableSchemaWithMapField(nonStringKeyOptions))
+                    .isInstanceOf(IllegalArgumentException.class)
+                    .hasMessageContaining(
+                            "Column 'mi' is configured as nested column in 
'file-index.<index-type>.columns', but its map key type is INT. Only 
CHAR/VARCHAR/STRING is supported.");
+        }
+    }
+
+    private void validateTableSchemaWithMapField(Map<String, String> options) {
+        List<DataField> fields =
+                Arrays.asList(
+                        new DataField(0, "f0", DataTypes.INT()),
+                        new DataField(1, "f1", DataTypes.INT()),
+                        new DataField(2, "f2", DataTypes.INT()),
+                        new DataField(3, "f3", DataTypes.STRING()),
+                        new DataField(4, "m", 
DataTypes.MAP(DataTypes.STRING(), DataTypes.INT())),
+                        new DataField(5, "mi", DataTypes.MAP(DataTypes.INT(), 
DataTypes.INT())));
+        options.put(BUCKET.key(), String.valueOf(-1));
+        validateTableSchema(
+                new TableSchema(1, fields, 10, emptyList(), 
singletonList("f1"), options, ""));
+    }
 }
diff --git 
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/procedure/RewriteFileIndexProcedureITCase.java
 
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/procedure/RewriteFileIndexProcedureITCase.java
index 23102f2a3d..60aa33977c 100644
--- 
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/procedure/RewriteFileIndexProcedureITCase.java
+++ 
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/procedure/RewriteFileIndexProcedureITCase.java
@@ -138,7 +138,7 @@ public class RewriteFileIndexProcedureITCase extends 
CatalogITCaseBase {
         Assertions.assertThat(count.get()).isEqualTo(6);
 
         tEnv.getConfig().set(TableConfigOptions.TABLE_DML_SYNC, true);
-        sql("ALTER TABLE T SET 
('file-index.bloom-filter.columns'='order_id,v')");
+        sql("ALTER TABLE T SET ('file-index.bloom-filter.columns'='k,v')");
         if (isNamedArgument) {
             sql("CALL sys.rewrite_file_index(`table` => 'default.T', 
partitions => 'dt=20221208')");
         } else {

Reply via email to