This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 347ec25fd0 [core] Validate file index related options (#7846)
347ec25fd0 is described below
commit 347ec25fd05f961e62638dda16f14c4a509476c9
Author: tsreaper <[email protected]>
AuthorDate: Thu May 14 11:55:51 2026 +0800
[core] Validate file index related options (#7846)
Add schema-level validation for the file-index.<index-type>.columns
options (bloom-filter / bitmap / bsi / range-bitmap).
Currently, if a user configures a column name that does not exist in the
table schema (or uses the nested col[k] syntax on a non-map column / a
map with a non-string key), the table can be created and written to
successfully, but the write job will fail much later — when
MergeTreeWriter.flushWriteBuffer triggers DataFileIndexWriter.<init>
during checkpoint, throwing IllegalArgumentException: xxx does not exist
in column fields (or is not map type / Only support map data type with
key field of CHAR、VARCHAR、STRING.). The same misconfiguration also
blocks compaction.
This PR moves these three checks up-front into SchemaValidation so the
error is surfaced at create-table / alter-table time:
1. Every column listed in any file-index.<type>.columns must exist in
the schema.
2. If nested syntax col[k] is used, col must be of MAP type.
3. A MAP referenced via nested syntax must have CHAR / VARCHAR key type
(matching MapFileIndexMaintainer's runtime requirement).
---
.../org/apache/paimon/schema/SchemaValidation.java | 47 ++++++++++++++
.../apache/paimon/schema/SchemaValidationTest.java | 75 ++++++++++++++++++++++
.../procedure/RewriteFileIndexProcedureITCase.java | 2 +-
3 files changed, 123 insertions(+), 1 deletion(-)
diff --git
a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
index b5285bb606..13ee86cdd0 100644
--- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
+++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java
@@ -23,6 +23,7 @@ import org.apache.paimon.CoreOptions.ChangelogProducer;
import org.apache.paimon.CoreOptions.MergeEngine;
import org.apache.paimon.TableType;
import org.apache.paimon.factories.FactoryUtil;
+import org.apache.paimon.fileindex.FileIndexOptions;
import org.apache.paimon.format.FileFormat;
import org.apache.paimon.mergetree.compact.aggregate.FieldAggregator;
import
org.apache.paimon.mergetree.compact.aggregate.factory.FieldAggregatorFactory;
@@ -273,6 +274,8 @@ public class SchemaValidation {
validateMergeFunctionFactory(schema);
+ validateFileIndex(schema);
+
validateRowTracking(schema, options);
validateIncrementalClustering(schema, options);
@@ -531,6 +534,50 @@ public class SchemaValidation {
createMergeFunctionFactory(schema);
}
+ private static void validateFileIndex(TableSchema schema) {
+ CoreOptions options = new CoreOptions(schema.options());
+ FileIndexOptions fileIndexOptions = options.indexColumnsOptions();
+ if (fileIndexOptions.isEmpty()) {
+ return;
+ }
+
+ Map<String, DataField> fieldMap = new HashMap<>();
+ for (DataField field : schema.fields()) {
+ fieldMap.put(field.name(), field);
+ }
+
+ for (Map.Entry<FileIndexOptions.Column, Map<String, Options>> entry :
+ fileIndexOptions.entrySet()) {
+ FileIndexOptions.Column column = entry.getKey();
+ String columnName = column.getColumnName();
+ checkArgument(
+ fieldMap.containsKey(columnName),
+ "Column '%s' specified in
'file-index.<index-type>.columns' "
+ + "does not exist in table schema. Existing
columns: %s.",
+ columnName,
+ schema.fieldNames());
+
+ DataField field = fieldMap.get(columnName);
+ if (column.isNestedColumn()) {
+ checkArgument(
+ field.type().getTypeRoot() == DataTypeRoot.MAP,
+ "Column '%s' is configured as nested column in "
+ + "'file-index.<index-type>.columns' but is
not a map type. "
+ + "Only map type supports nested column.",
+ columnName);
+ DataType keyType = ((MapType) field.type()).getKeyType();
+ DataTypeRoot keyRoot = keyType.getTypeRoot();
+ checkArgument(
+ keyRoot == DataTypeRoot.CHAR || keyRoot ==
DataTypeRoot.VARCHAR,
+ "Column '%s' is configured as nested column in "
+ + "'file-index.<index-type>.columns', but its
map key type is %s. "
+ + "Only CHAR/VARCHAR/STRING is supported.",
+ columnName,
+ keyType);
+ }
+ }
+ }
+
private static void validateForDeletionVectors(CoreOptions options) {
checkArgument(
options.changelogProducer() == ChangelogProducer.NONE
diff --git
a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
index d2bad5248c..cec075724f 100644
---
a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java
@@ -369,4 +369,79 @@ class SchemaValidationTest {
"")))
.hasMessageContaining("primary-key");
}
+
+ @Test
+ public void testFileIndexColumns() {
+ List<String> keys =
+ Arrays.asList(
+ "file-index.bloom-filter.columns",
+ "file-index.bitmap.columns",
+ "file-index.bsi.columns",
+ "file-index.range-bitmap.columns");
+
+ for (String key : keys) {
+ // valid: all referenced columns exist
+ Map<String, String> okOptions = new HashMap<>();
+ okOptions.put(key, "f0,f3");
+ assertThatCode(() -> validateTableSchemaExec(okOptions))
+ .as("valid key=%s", key)
+ .doesNotThrowAnyException();
+
+ // invalid: references a non-existent column
+ Map<String, String> badOptions = new HashMap<>();
+ badOptions.put(key, "f0,not_exist");
+ assertThatThrownBy(() -> validateTableSchemaExec(badOptions))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining(
+ "Column 'not_exist' specified in
'file-index.<index-type>.columns' does not exist in table schema.");
+ }
+ }
+
+ @Test
+ public void testFileIndexNestedColumn() {
+ List<String> keys =
+ Arrays.asList(
+ "file-index.bloom-filter.columns",
+ "file-index.bitmap.columns",
+ "file-index.bsi.columns",
+ "file-index.range-bitmap.columns");
+
+ for (String key : keys) {
+ // valid: nested syntax on a map column with string key
+ Map<String, String> okOptions = new HashMap<>();
+ okOptions.put(key, "m[k]");
+ assertThatCode(() -> validateTableSchemaWithMapField(okOptions))
+ .doesNotThrowAnyException();
+
+ // invalid: nested syntax on a non-map column
+ Map<String, String> nonMapOptions = new HashMap<>();
+ nonMapOptions.put(key, "f3[k]");
+ assertThatThrownBy(() ->
validateTableSchemaWithMapField(nonMapOptions))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining(
+ "Column 'f3' is configured as nested column in
'file-index.<index-type>.columns' but is not a map type.");
+
+ // invalid: nested syntax on a map column with non-string key
+ Map<String, String> nonStringKeyOptions = new HashMap<>();
+ nonStringKeyOptions.put(key, "mi[k]");
+ assertThatThrownBy(() ->
validateTableSchemaWithMapField(nonStringKeyOptions))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining(
+ "Column 'mi' is configured as nested column in
'file-index.<index-type>.columns', but its map key type is INT. Only
CHAR/VARCHAR/STRING is supported.");
+ }
+ }
+
+ private void validateTableSchemaWithMapField(Map<String, String> options) {
+ List<DataField> fields =
+ Arrays.asList(
+ new DataField(0, "f0", DataTypes.INT()),
+ new DataField(1, "f1", DataTypes.INT()),
+ new DataField(2, "f2", DataTypes.INT()),
+ new DataField(3, "f3", DataTypes.STRING()),
+ new DataField(4, "m",
DataTypes.MAP(DataTypes.STRING(), DataTypes.INT())),
+ new DataField(5, "mi", DataTypes.MAP(DataTypes.INT(),
DataTypes.INT())));
+ options.put(BUCKET.key(), String.valueOf(-1));
+ validateTableSchema(
+ new TableSchema(1, fields, 10, emptyList(),
singletonList("f1"), options, ""));
+ }
}
diff --git
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/procedure/RewriteFileIndexProcedureITCase.java
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/procedure/RewriteFileIndexProcedureITCase.java
index 23102f2a3d..60aa33977c 100644
---
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/procedure/RewriteFileIndexProcedureITCase.java
+++
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/procedure/RewriteFileIndexProcedureITCase.java
@@ -138,7 +138,7 @@ public class RewriteFileIndexProcedureITCase extends
CatalogITCaseBase {
Assertions.assertThat(count.get()).isEqualTo(6);
tEnv.getConfig().set(TableConfigOptions.TABLE_DML_SYNC, true);
- sql("ALTER TABLE T SET
('file-index.bloom-filter.columns'='order_id,v')");
+ sql("ALTER TABLE T SET ('file-index.bloom-filter.columns'='k,v')");
if (isNamedArgument) {
sql("CALL sys.rewrite_file_index(`table` => 'default.T',
partitions => 'dt=20221208')");
} else {