This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 1e1cee60d0 [core] Validate file index data type compatibility at table 
creation time (#8168)
1e1cee60d0 is described below

commit 1e1cee60d086c8aeaa12fe1b63ca2f2a17b975c3
Author: Zouxxyy <[email protected]>
AuthorDate: Mon Jun 8 19:12:02 2026 +0800

    [core] Validate file index data type compatibility at table creation time 
(#8168)
---
 docs/docs/concepts/spec/fileindex.mdx              |  3 +
 .../paimon/fileindex/FileIndexerFactory.java       |  6 ++
 .../paimon/fileindex/FileIndexerFactoryUtils.java  |  2 +-
 .../fileindex/bitmap/BitmapFileIndexFactory.java   |  5 ++
 .../bloomfilter/BloomFilterFileIndexFactory.java   |  5 ++
 .../bsi/BitSliceIndexBitmapFileIndexFactory.java   |  5 ++
 .../rangebitmap/RangeBitmapFileIndexFactory.java   |  6 ++
 .../org/apache/paimon/schema/SchemaValidation.java | 18 +++++
 .../apache/paimon/schema/SchemaValidationTest.java | 85 +++++++++++++++++++++-
 9 files changed, 132 insertions(+), 3 deletions(-)

diff --git a/docs/docs/concepts/spec/fileindex.mdx 
b/docs/docs/concepts/spec/fileindex.mdx
index e2f0899f7d..71f3a61a1b 100644
--- a/docs/docs/concepts/spec/fileindex.mdx
+++ b/docs/docs/concepts/spec/fileindex.mdx
@@ -98,6 +98,9 @@ Content of bloom filter index is simple:
 This class use (64-bits) long hash. Store the num hash function (one integer) 
and bit set bytes only. Hash bytes type 
 (like varchar, binary, etc.) using xx hash, hash numeric type by [specified 
number 
hash](http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm).
 
+BloomFilter only support the following data type: TinyIntType, SmallIntType, 
IntType, BigIntType, FloatType, DoubleType,
+DateType, TimeType, TimestampType, LocalZonedTimestampType, CharType, 
VarCharType, BinaryType, VarBinaryType.
+
 ## Index: Bitmap
 
 * `file-index.bitmap.columns`: specify the columns that need bitmap index.
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java
 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java
index 85dcc97f46..19d9ef26f7 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java
@@ -27,4 +27,10 @@ public interface FileIndexerFactory {
     String identifier();
 
     FileIndexer create(DataType type, Options options);
+
+    /**
+     * Validate whether the given data type is supported by this index. Throws 
{@link
+     * UnsupportedOperationException} if not supported.
+     */
+    default void validate(DataType dataType) {}
 }
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java
 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java
index 269ba77266..29f922d077 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java
@@ -46,7 +46,7 @@ public class FileIndexerFactoryUtils {
         }
     }
 
-    static FileIndexerFactory load(String type) {
+    public static FileIndexerFactory load(String type) {
         FileIndexerFactory fileIndexerFactory = factories.get(type);
         if (fileIndexerFactory == null) {
             throw new RuntimeException("Can't find file index for type: " + 
type);
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexFactory.java
 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexFactory.java
index 5b2ea144ac..3fcb8ff4fa 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexFactory.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexFactory.java
@@ -37,4 +37,9 @@ public class BitmapFileIndexFactory implements 
FileIndexerFactory {
     public FileIndexer create(DataType dataType, Options options) {
         return new BitmapFileIndex(dataType, options);
     }
+
+    @Override
+    public void validate(DataType dataType) {
+        BitmapFileIndex.getValueMapper(dataType);
+    }
 }
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java
 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java
index ec77a0a040..c0eb6ec751 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java
@@ -37,4 +37,9 @@ public class BloomFilterFileIndexFactory implements 
FileIndexerFactory {
     public FileIndexer create(DataType type, Options options) {
         return new BloomFilterFileIndex(type, options);
     }
+
+    @Override
+    public void validate(DataType dataType) {
+        FastHash.getHashFunction(dataType);
+    }
 }
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexFactory.java
 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexFactory.java
index c6d182ce01..2be218bcc3 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexFactory.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexFactory.java
@@ -37,4 +37,9 @@ public class BitSliceIndexBitmapFileIndexFactory implements 
FileIndexerFactory {
     public FileIndexer create(DataType dataType, Options options) {
         return new BitSliceIndexBitmapFileIndex(dataType);
     }
+
+    @Override
+    public void validate(DataType dataType) {
+        BitSliceIndexBitmapFileIndex.getValueMapper(dataType);
+    }
 }
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexFactory.java
 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexFactory.java
index 3eb4122971..ac2e9d6b89 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexFactory.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexFactory.java
@@ -20,6 +20,7 @@ package org.apache.paimon.fileindex.rangebitmap;
 
 import org.apache.paimon.fileindex.FileIndexer;
 import org.apache.paimon.fileindex.FileIndexerFactory;
+import org.apache.paimon.fileindex.rangebitmap.dictionary.chunked.KeyFactory;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.types.DataType;
 
@@ -37,4 +38,9 @@ public class RangeBitmapFileIndexFactory implements 
FileIndexerFactory {
     public FileIndexer create(DataType dataType, Options options) {
         return new RangeBitmapFileIndex(dataType, options);
     }
+
+    @Override
+    public void validate(DataType dataType) {
+        KeyFactory.create(dataType);
+    }
 }
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java 
b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
index 85eba1614b..7733b6a080 100644
--- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
+++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
@@ -24,6 +24,8 @@ import org.apache.paimon.CoreOptions.MergeEngine;
 import org.apache.paimon.TableType;
 import org.apache.paimon.factories.FactoryUtil;
 import org.apache.paimon.fileindex.FileIndexOptions;
+import org.apache.paimon.fileindex.FileIndexerFactory;
+import org.apache.paimon.fileindex.FileIndexerFactoryUtils;
 import org.apache.paimon.format.FileFormat;
 import org.apache.paimon.mergetree.compact.aggregate.FieldAggregator;
 import 
org.apache.paimon.mergetree.compact.aggregate.factory.FieldAggregatorFactory;
@@ -643,6 +645,22 @@ public class SchemaValidation {
                         columnName,
                         keyType);
             }
+
+            for (String indexType : entry.getValue().keySet()) {
+                FileIndexerFactory factory = 
FileIndexerFactoryUtils.load(indexType);
+                DataType dataType =
+                        column.isNestedColumn()
+                                ? ((MapType) field.type()).getValueType()
+                                : field.type();
+                try {
+                    factory.validate(dataType);
+                } catch (UnsupportedOperationException e) {
+                    throw new IllegalArgumentException(
+                            String.format(
+                                    "Column '%s' with type '%s' is not 
supported by '%s' index. %s",
+                                    columnName, dataType.asSQLString(), 
indexType, e.getMessage()));
+                }
+            }
         }
     }
 
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java 
b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
index 2b61696956..f40fec52fb 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
@@ -405,9 +405,9 @@ class SchemaValidationTest {
                         "file-index.range-bitmap.columns");
 
         for (String key : keys) {
-            // valid: all referenced columns exist
+            // valid: all referenced columns exist and types are supported
             Map<String, String> okOptions = new HashMap<>();
-            okOptions.put(key, "f0,f3");
+            okOptions.put(key, "f0");
             assertThatCode(() -> validateTableSchemaExec(okOptions))
                     .as("valid key=%s", key)
                     .doesNotThrowAnyException();
@@ -470,6 +470,87 @@ class SchemaValidationTest {
                 new TableSchema(1, fields, 10, emptyList(), 
singletonList("f1"), options, ""));
     }
 
+    @Test
+    public void testFileIndexUnsupportedDataType() {
+        List<DataField> fields =
+                Arrays.asList(
+                        new DataField(0, "f0", DataTypes.INT()),
+                        new DataField(1, "f1", DataTypes.INT()),
+                        new DataField(2, "arr", 
DataTypes.ARRAY(DataTypes.STRING())),
+                        new DataField(3, "f3", DataTypes.STRING()));
+
+        // bloom-filter on ARRAY should fail
+        Map<String, String> bloomOptions = new HashMap<>();
+        bloomOptions.put("file-index.bloom-filter.columns", "arr");
+        bloomOptions.put(BUCKET.key(), String.valueOf(-1));
+        assertThatThrownBy(
+                        () ->
+                                validateTableSchema(
+                                        new TableSchema(
+                                                1,
+                                                fields,
+                                                10,
+                                                emptyList(),
+                                                singletonList("f1"),
+                                                bloomOptions,
+                                                "")))
+                .isInstanceOf(IllegalArgumentException.class)
+                .hasMessageContaining("not supported by 'bloom-filter' index");
+
+        // bitmap on ARRAY should fail
+        Map<String, String> bitmapOptions = new HashMap<>();
+        bitmapOptions.put("file-index.bitmap.columns", "arr");
+        bitmapOptions.put(BUCKET.key(), String.valueOf(-1));
+        assertThatThrownBy(
+                        () ->
+                                validateTableSchema(
+                                        new TableSchema(
+                                                1,
+                                                fields,
+                                                10,
+                                                emptyList(),
+                                                singletonList("f1"),
+                                                bitmapOptions,
+                                                "")))
+                .isInstanceOf(IllegalArgumentException.class)
+                .hasMessageContaining("not supported by 'bitmap' index");
+
+        // bsi on STRING should fail
+        Map<String, String> bsiOptions = new HashMap<>();
+        bsiOptions.put("file-index.bsi.columns", "f3");
+        bsiOptions.put(BUCKET.key(), String.valueOf(-1));
+        assertThatThrownBy(
+                        () ->
+                                validateTableSchema(
+                                        new TableSchema(
+                                                1,
+                                                fields,
+                                                10,
+                                                emptyList(),
+                                                singletonList("f1"),
+                                                bsiOptions,
+                                                "")))
+                .isInstanceOf(IllegalArgumentException.class)
+                .hasMessageContaining("not supported by 'bsi' index");
+
+        // bloom-filter on INT should pass
+        Map<String, String> okOptions = new HashMap<>();
+        okOptions.put("file-index.bloom-filter.columns", "f0");
+        okOptions.put(BUCKET.key(), String.valueOf(-1));
+        assertThatCode(
+                        () ->
+                                validateTableSchema(
+                                        new TableSchema(
+                                                1,
+                                                fields,
+                                                10,
+                                                emptyList(),
+                                                singletonList("f1"),
+                                                okOptions,
+                                                "")))
+                .doesNotThrowAnyException();
+    }
+
     @Test
     public void testSnapshotSequenceOrderingHappyPath() {
         Map<String, String> options = new HashMap<>();

Reply via email to