This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 1e1cee60d0 [core] Validate file index data type compatibility at table
creation time (#8168)
1e1cee60d0 is described below
commit 1e1cee60d086c8aeaa12fe1b63ca2f2a17b975c3
Author: Zouxxyy <[email protected]>
AuthorDate: Mon Jun 8 19:12:02 2026 +0800
[core] Validate file index data type compatibility at table creation time
(#8168)
---
docs/docs/concepts/spec/fileindex.mdx | 3 +
.../paimon/fileindex/FileIndexerFactory.java | 6 ++
.../paimon/fileindex/FileIndexerFactoryUtils.java | 2 +-
.../fileindex/bitmap/BitmapFileIndexFactory.java | 5 ++
.../bloomfilter/BloomFilterFileIndexFactory.java | 5 ++
.../bsi/BitSliceIndexBitmapFileIndexFactory.java | 5 ++
.../rangebitmap/RangeBitmapFileIndexFactory.java | 6 ++
.../org/apache/paimon/schema/SchemaValidation.java | 18 +++++
.../apache/paimon/schema/SchemaValidationTest.java | 85 +++++++++++++++++++++-
9 files changed, 132 insertions(+), 3 deletions(-)
diff --git a/docs/docs/concepts/spec/fileindex.mdx
b/docs/docs/concepts/spec/fileindex.mdx
index e2f0899f7d..71f3a61a1b 100644
--- a/docs/docs/concepts/spec/fileindex.mdx
+++ b/docs/docs/concepts/spec/fileindex.mdx
@@ -98,6 +98,9 @@ Content of bloom filter index is simple:
This class use (64-bits) long hash. Store the num hash function (one integer)
and bit set bytes only. Hash bytes type
(like varchar, binary, etc.) using xx hash, hash numeric type by [specified
number
hash](http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm).
+BloomFilter only support the following data type: TinyIntType, SmallIntType,
IntType, BigIntType, FloatType, DoubleType,
+DateType, TimeType, TimestampType, LocalZonedTimestampType, CharType,
VarCharType, BinaryType, VarBinaryType.
+
## Index: Bitmap
* `file-index.bitmap.columns`: specify the columns that need bitmap index.
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java
index 85dcc97f46..19d9ef26f7 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java
@@ -27,4 +27,10 @@ public interface FileIndexerFactory {
String identifier();
FileIndexer create(DataType type, Options options);
+
+ /**
+ * Validate whether the given data type is supported by this index. Throws
{@link
+ * UnsupportedOperationException} if not supported.
+ */
+ default void validate(DataType dataType) {}
}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java
index 269ba77266..29f922d077 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java
@@ -46,7 +46,7 @@ public class FileIndexerFactoryUtils {
}
}
- static FileIndexerFactory load(String type) {
+ public static FileIndexerFactory load(String type) {
FileIndexerFactory fileIndexerFactory = factories.get(type);
if (fileIndexerFactory == null) {
throw new RuntimeException("Can't find file index for type: " +
type);
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexFactory.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexFactory.java
index 5b2ea144ac..3fcb8ff4fa 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexFactory.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexFactory.java
@@ -37,4 +37,9 @@ public class BitmapFileIndexFactory implements
FileIndexerFactory {
public FileIndexer create(DataType dataType, Options options) {
return new BitmapFileIndex(dataType, options);
}
+
+ @Override
+ public void validate(DataType dataType) {
+ BitmapFileIndex.getValueMapper(dataType);
+ }
}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java
index ec77a0a040..c0eb6ec751 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java
@@ -37,4 +37,9 @@ public class BloomFilterFileIndexFactory implements
FileIndexerFactory {
public FileIndexer create(DataType type, Options options) {
return new BloomFilterFileIndex(type, options);
}
+
+ @Override
+ public void validate(DataType dataType) {
+ FastHash.getHashFunction(dataType);
+ }
}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexFactory.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexFactory.java
index c6d182ce01..2be218bcc3 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexFactory.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexFactory.java
@@ -37,4 +37,9 @@ public class BitSliceIndexBitmapFileIndexFactory implements
FileIndexerFactory {
public FileIndexer create(DataType dataType, Options options) {
return new BitSliceIndexBitmapFileIndex(dataType);
}
+
+ @Override
+ public void validate(DataType dataType) {
+ BitSliceIndexBitmapFileIndex.getValueMapper(dataType);
+ }
}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexFactory.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexFactory.java
index 3eb4122971..ac2e9d6b89 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexFactory.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexFactory.java
@@ -20,6 +20,7 @@ package org.apache.paimon.fileindex.rangebitmap;
import org.apache.paimon.fileindex.FileIndexer;
import org.apache.paimon.fileindex.FileIndexerFactory;
+import org.apache.paimon.fileindex.rangebitmap.dictionary.chunked.KeyFactory;
import org.apache.paimon.options.Options;
import org.apache.paimon.types.DataType;
@@ -37,4 +38,9 @@ public class RangeBitmapFileIndexFactory implements
FileIndexerFactory {
public FileIndexer create(DataType dataType, Options options) {
return new RangeBitmapFileIndex(dataType, options);
}
+
+ @Override
+ public void validate(DataType dataType) {
+ KeyFactory.create(dataType);
+ }
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
index 85eba1614b..7733b6a080 100644
--- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
+++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
@@ -24,6 +24,8 @@ import org.apache.paimon.CoreOptions.MergeEngine;
import org.apache.paimon.TableType;
import org.apache.paimon.factories.FactoryUtil;
import org.apache.paimon.fileindex.FileIndexOptions;
+import org.apache.paimon.fileindex.FileIndexerFactory;
+import org.apache.paimon.fileindex.FileIndexerFactoryUtils;
import org.apache.paimon.format.FileFormat;
import org.apache.paimon.mergetree.compact.aggregate.FieldAggregator;
import
org.apache.paimon.mergetree.compact.aggregate.factory.FieldAggregatorFactory;
@@ -643,6 +645,22 @@ public class SchemaValidation {
columnName,
keyType);
}
+
+ for (String indexType : entry.getValue().keySet()) {
+ FileIndexerFactory factory =
FileIndexerFactoryUtils.load(indexType);
+ DataType dataType =
+ column.isNestedColumn()
+ ? ((MapType) field.type()).getValueType()
+ : field.type();
+ try {
+ factory.validate(dataType);
+ } catch (UnsupportedOperationException e) {
+ throw new IllegalArgumentException(
+ String.format(
+ "Column '%s' with type '%s' is not
supported by '%s' index. %s",
+ columnName, dataType.asSQLString(),
indexType, e.getMessage()));
+ }
+ }
}
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
index 2b61696956..f40fec52fb 100644
---
a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
@@ -405,9 +405,9 @@ class SchemaValidationTest {
"file-index.range-bitmap.columns");
for (String key : keys) {
- // valid: all referenced columns exist
+ // valid: all referenced columns exist and types are supported
Map<String, String> okOptions = new HashMap<>();
- okOptions.put(key, "f0,f3");
+ okOptions.put(key, "f0");
assertThatCode(() -> validateTableSchemaExec(okOptions))
.as("valid key=%s", key)
.doesNotThrowAnyException();
@@ -470,6 +470,87 @@ class SchemaValidationTest {
new TableSchema(1, fields, 10, emptyList(),
singletonList("f1"), options, ""));
}
+ @Test
+ public void testFileIndexUnsupportedDataType() {
+ List<DataField> fields =
+ Arrays.asList(
+ new DataField(0, "f0", DataTypes.INT()),
+ new DataField(1, "f1", DataTypes.INT()),
+ new DataField(2, "arr",
DataTypes.ARRAY(DataTypes.STRING())),
+ new DataField(3, "f3", DataTypes.STRING()));
+
+ // bloom-filter on ARRAY should fail
+ Map<String, String> bloomOptions = new HashMap<>();
+ bloomOptions.put("file-index.bloom-filter.columns", "arr");
+ bloomOptions.put(BUCKET.key(), String.valueOf(-1));
+ assertThatThrownBy(
+ () ->
+ validateTableSchema(
+ new TableSchema(
+ 1,
+ fields,
+ 10,
+ emptyList(),
+ singletonList("f1"),
+ bloomOptions,
+ "")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("not supported by 'bloom-filter' index");
+
+ // bitmap on ARRAY should fail
+ Map<String, String> bitmapOptions = new HashMap<>();
+ bitmapOptions.put("file-index.bitmap.columns", "arr");
+ bitmapOptions.put(BUCKET.key(), String.valueOf(-1));
+ assertThatThrownBy(
+ () ->
+ validateTableSchema(
+ new TableSchema(
+ 1,
+ fields,
+ 10,
+ emptyList(),
+ singletonList("f1"),
+ bitmapOptions,
+ "")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("not supported by 'bitmap' index");
+
+ // bsi on STRING should fail
+ Map<String, String> bsiOptions = new HashMap<>();
+ bsiOptions.put("file-index.bsi.columns", "f3");
+ bsiOptions.put(BUCKET.key(), String.valueOf(-1));
+ assertThatThrownBy(
+ () ->
+ validateTableSchema(
+ new TableSchema(
+ 1,
+ fields,
+ 10,
+ emptyList(),
+ singletonList("f1"),
+ bsiOptions,
+ "")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("not supported by 'bsi' index");
+
+ // bloom-filter on INT should pass
+ Map<String, String> okOptions = new HashMap<>();
+ okOptions.put("file-index.bloom-filter.columns", "f0");
+ okOptions.put(BUCKET.key(), String.valueOf(-1));
+ assertThatCode(
+ () ->
+ validateTableSchema(
+ new TableSchema(
+ 1,
+ fields,
+ 10,
+ emptyList(),
+ singletonList("f1"),
+ okOptions,
+ "")))
+ .doesNotThrowAnyException();
+ }
+
@Test
public void testSnapshotSequenceOrderingHappyPath() {
Map<String, String> options = new HashMap<>();