This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2ec0eed823d [fix](array-index) Fix types that do not support indexing
also in array nesting should also not be supported. (#50162)
2ec0eed823d is described below
commit 2ec0eed823dcbb9a155e1a29ee46c8bf55e4bf07
Author: amory <[email protected]>
AuthorDate: Mon Apr 21 15:20:04 2025 +0800
[fix](array-index) Fix types that do not support indexing also in array
nesting should also not be supported. (#50162)
Fix types that do not support indexing. Types in array nesting should
also not be supported.
---
.../java/org/apache/doris/analysis/IndexDef.java | 20 ++-
.../trees/plans/commands/info/IndexDefinition.java | 19 ++-
.../org/apache/doris/analysis/IndexDefTest.java | 66 +++++++++
.../trees/plans/commands/IndexDefinitionTest.java | 62 ++++++++
.../data/inverted_index_p0/test_array_index2.out | Bin 0 -> 277 bytes
.../inverted_index_p0/test_array_index2.groovy | 158 +++++++++++++++++++++
6 files changed, 319 insertions(+), 6 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
index bb14137c51b..1e7ec048174 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@@ -17,9 +17,11 @@
package org.apache.doris.analysis;
+import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
@@ -214,6 +216,19 @@ public class IndexDef {
return (this.indexType == IndexType.INVERTED);
}
+ // Check if the column type is supported for inverted index
+ public boolean isSupportIdxType(Type colType) {
+ if (colType.isArrayType()) {
+ Type itemType = ((ArrayType) colType).getItemType();
+ return isSupportIdxType(itemType);
+ }
+ PrimitiveType primitiveType = colType.getPrimitiveType();
+ return primitiveType.isDateType() || primitiveType.isDecimalV2Type()
|| primitiveType.isDecimalV3Type()
+ || primitiveType.isFixedPointType() ||
primitiveType.isStringType()
+ || primitiveType == PrimitiveType.BOOLEAN
+ || primitiveType.isVariantType() || primitiveType.isIPType();
+ }
+
public void checkColumn(Column column, KeysType keysType, boolean
enableUniqueKeyMergeOnWrite,
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat)
throws AnalysisException {
if (indexType == IndexType.BITMAP || indexType == IndexType.INVERTED
|| indexType == IndexType.BLOOMFILTER
@@ -221,9 +236,8 @@ public class IndexDef {
String indexColName = column.getName();
caseSensitivityColumns.add(indexColName);
PrimitiveType colType = column.getDataType();
- if (!(colType.isDateType() || colType.isDecimalV2Type() ||
colType.isDecimalV3Type()
- || colType.isFixedPointType() || colType.isStringType() ||
colType == PrimitiveType.BOOLEAN
- || colType.isVariantType() || colType.isIPType() ||
colType.isArrayType())) {
+ Type columnType = column.getType();
+ if (!isSupportIdxType(columnType)) {
throw new AnalysisException(colType + " is not supported in "
+ indexType.toString() + " index. "
+ "invalid index: " + indexName);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
index 20e6032ab72..2878e0ee8e8 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
@@ -25,6 +25,7 @@ import org.apache.doris.catalog.Index;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.common.Config;
import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
@@ -108,6 +109,20 @@ public class IndexDefinition {
this.comment = null;
}
+ /**
+ * Check if the column type is supported for inverted index
+ */
+ public boolean isSupportIdxType(DataType columnType) {
+ if (columnType.isArrayType()) {
+ DataType itemType = ((ArrayType) columnType).getItemType();
+ return isSupportIdxType(itemType);
+ }
+ return columnType.isDateLikeType() || columnType.isDecimalLikeType()
+ || columnType.isIntegralType() || columnType.isStringLikeType()
+ || columnType.isBooleanType() || columnType.isVariantType()
+ || columnType.isIPType();
+ }
+
/**
* checkColumn
*/
@@ -119,9 +134,7 @@ public class IndexDefinition {
String indexColName = column.getName();
caseSensitivityCols.add(indexColName);
DataType colType = column.getType();
- if (!(colType.isDateLikeType() || colType.isDecimalLikeType() ||
colType.isArrayType()
- || colType.isIntegralType() || colType.isStringLikeType()
- || colType.isBooleanType() || colType.isVariantType() ||
colType.isIPType())) {
+ if (!isSupportIdxType(colType)) {
// TODO add colType.isAggState()
throw new AnalysisException(colType + " is not supported in "
+ indexType.toString()
+ " index. " + "invalid index: " + name);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java
b/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java
index 3d528ce1b68..3ca20a670fe 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java
@@ -17,9 +17,14 @@
package org.apache.doris.analysis;
+import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.KeysType;
+import org.apache.doris.catalog.MapType;
import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.ScalarType;
+import org.apache.doris.catalog.StructField;
+import org.apache.doris.catalog.StructType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
@@ -28,6 +33,8 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
+import java.util.ArrayList;
+
public class IndexDefTest {
private IndexDef def;
@@ -80,4 +87,63 @@ public class IndexDefTest {
Assert.assertEquals("INDEX `index1` ON table1 (`col1`) USING INVERTED
COMMENT 'balabala'",
def.toSql("table1"));
}
+
+ @Test
+ public void testArrayTypeSupport() throws AnalysisException {
+ def = new IndexDef("array_index", false, Lists.newArrayList("col1"),
+ IndexDef.IndexType.INVERTED, null, "array test");
+
+ // Test array of supported types
+ Column arrayOfString = new Column("col1",
+ ArrayType.create(ScalarType.createVarchar(10), false));
+ def.checkColumn(arrayOfString, KeysType.DUP_KEYS, true,
TInvertedIndexFileStorageFormat.V1);
+
+ Column arrayOfInt = new Column("col1",
+ ArrayType.create(ScalarType.createType(PrimitiveType.INT),
false));
+ def.checkColumn(arrayOfInt, KeysType.DUP_KEYS, true,
TInvertedIndexFileStorageFormat.V1);
+
+ Column arrayOfDate = new Column("col1",
+ ArrayType.create(ScalarType.createType(PrimitiveType.DATE),
false));
+ def.checkColumn(arrayOfDate, KeysType.DUP_KEYS, true,
TInvertedIndexFileStorageFormat.V1);
+
+ // Array<Array<String>>
+ Column nestedArray = new Column("col1",
+
ArrayType.create(ArrayType.create(ScalarType.createVarchar(10), false), false));
+ def.checkColumn(nestedArray, KeysType.DUP_KEYS, true,
TInvertedIndexFileStorageFormat.V1);
+
+ // Test array of unsupported types
+ try {
+ Column arrayOfFloat = new Column("col1",
+
ArrayType.create(ScalarType.createType(PrimitiveType.FLOAT), false));
+ def.checkColumn(arrayOfFloat, KeysType.DUP_KEYS, true,
TInvertedIndexFileStorageFormat.V1);
+ Assert.fail("No exception throws for unsupported array element
type.");
+ } catch (AnalysisException e) {
+ Assert.assertTrue(e.getMessage().contains("is not supported in"));
+ }
+
+ try {
+ // Array<Map<String, Int>>
+ Column arrayOfMap = new Column("col1",
+ ArrayType.create(new MapType(
+ ScalarType.createVarchar(10),
+ ScalarType.createType(PrimitiveType.INT)), false));
+ def.checkColumn(arrayOfMap, KeysType.DUP_KEYS, true,
TInvertedIndexFileStorageFormat.V1);
+ Assert.fail("No exception throws for array of map type.");
+ } catch (AnalysisException e) {
+ Assert.assertTrue(e.getMessage().contains("is not supported in"));
+ }
+
+ try {
+ // Array<Struct<name:String, age:Int>>
+ ArrayList<StructField> fields = new ArrayList<>();
+ fields.add(new StructField("name", ScalarType.createVarchar(10),
null));
+ fields.add(new StructField("age",
ScalarType.createType(PrimitiveType.INT), null));
+ Column arrayOfStruct = new Column("col1",
+ ArrayType.create(new StructType(fields), false));
+ def.checkColumn(arrayOfStruct, KeysType.DUP_KEYS, true,
TInvertedIndexFileStorageFormat.V1);
+ Assert.fail("No exception throws for array of struct type.");
+ } catch (AnalysisException e) {
+ Assert.assertTrue(e.getMessage().contains("is not supported in"));
+ }
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
index 4b5636b6124..6dfe6316830 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
@@ -22,8 +22,13 @@ import org.apache.doris.catalog.KeysType;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.plans.commands.info.ColumnDefinition;
import org.apache.doris.nereids.trees.plans.commands.info.IndexDefinition;
+import org.apache.doris.nereids.types.ArrayType;
+import org.apache.doris.nereids.types.FloatType;
import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.MapType;
import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.StructField;
+import org.apache.doris.nereids.types.StructType;
import org.apache.doris.nereids.types.VariantType;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
@@ -31,6 +36,7 @@ import com.google.common.collect.Lists;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
@@ -50,6 +56,62 @@ public class IndexDefinitionTest {
}
}
+ void testArrayTypeSupport() throws AnalysisException {
+ IndexDefinition def = new IndexDefinition("array_index", false,
Lists.newArrayList("col1"),
+ "INVERTED", null, "array test");
+
+ // Test array of supported types
+ def.checkColumn(new ColumnDefinition("col1",
+ ArrayType.of(StringType.INSTANCE), false, AggregateType.NONE,
true, null, "comment"),
+ KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1);
+
+ def.checkColumn(new ColumnDefinition("col1",
+ ArrayType.of(IntegerType.INSTANCE), false, AggregateType.NONE,
true, null, "comment"),
+ KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1);
+
+ def.checkColumn(new ColumnDefinition("col1",
+ ArrayType.of(ArrayType.of(StringType.INSTANCE)), false,
+ AggregateType.NONE, true, null, "comment"),
+ KeysType.DUP_KEYS, false,
TInvertedIndexFileStorageFormat.V1);
+
+ // Test array of unsupported types
+ try {
+ // Array<Float>
+ def.checkColumn(new ColumnDefinition("col1",
+ ArrayType.of(FloatType.INSTANCE), false,
+ AggregateType.NONE, true, null, "comment"),
+ KeysType.DUP_KEYS, false,
TInvertedIndexFileStorageFormat.V1);
+ Assertions.fail("No exception throws for unsupported array element
type (Float).");
+ } catch (AnalysisException e) {
+ Assertions.assertTrue(e.getMessage().contains("is not supported
in"));
+ }
+
+ try {
+ // Array<Map<String, Int>>
+ def.checkColumn(new ColumnDefinition("col1",
+ ArrayType.of(MapType.of(StringType.INSTANCE,
IntegerType.INSTANCE)), false,
+ AggregateType.NONE, true, null, "comment"),
+ KeysType.DUP_KEYS, false,
TInvertedIndexFileStorageFormat.V1);
+ Assertions.fail("No exception throws for array of map type.");
+ } catch (AnalysisException e) {
+ Assertions.assertTrue(e.getMessage().contains("is not supported
in"));
+ }
+
+ try {
+ // Array<Struct<name:String, age:Int>>
+ ArrayList<StructField> fields = new ArrayList<>();
+ fields.add(new StructField("name", StringType.INSTANCE, true,
null));
+ fields.add(new StructField("age", IntegerType.INSTANCE, true,
null));
+ def.checkColumn(new ColumnDefinition("col1",
+ ArrayType.of(new StructType(fields)), false,
+ AggregateType.NONE, true, null, "comment"),
+ KeysType.DUP_KEYS, false,
TInvertedIndexFileStorageFormat.V1);
+ Assertions.fail("No exception throws for array of struct type.");
+ } catch (AnalysisException e) {
+ Assertions.assertTrue(e.getMessage().contains("is not supported
in"));
+ }
+ }
+
@Test
void testNgramBFIndex() throws AnalysisException {
Map<String, String> properties = new HashMap<>();
diff --git a/regression-test/data/inverted_index_p0/test_array_index2.out
b/regression-test/data/inverted_index_p0/test_array_index2.out
new file mode 100644
index 00000000000..03ffac07a76
Binary files /dev/null and
b/regression-test/data/inverted_index_p0/test_array_index2.out differ
diff --git a/regression-test/suites/inverted_index_p0/test_array_index2.groovy
b/regression-test/suites/inverted_index_p0/test_array_index2.groovy
new file mode 100644
index 00000000000..e10065be141
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_array_index2.groovy
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_index2") {
+ def tableName1 = "array_test_supported"
+ def tableName2 = "array_test_unsupported"
+
+ def timeout = 60000
+ def delta_time = 1000
+ def alter_res = "null"
+ def useTime = 0
+
+ def wait_for_latest_op_on_table_finish = { table_name, OpTimeout ->
+ for(int t = delta_time; t <= OpTimeout; t += delta_time) {
+ alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName =
"${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
+ alter_res = alter_res.toString()
+ if(alter_res.contains("FINISHED")) {
+ sleep(10000) // wait change table state to normal
+ logger.info(table_name + " latest alter job finished, detail:
" + alter_res)
+ break
+ }
+ useTime = t
+ sleep(delta_time)
+ }
+ assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish
timeout")
+ }
+
+ sql "DROP TABLE IF EXISTS ${tableName1}"
+ sql "DROP TABLE IF EXISTS ${tableName2}"
+
+ // Create table with supported array types
+ sql """
+ CREATE TABLE ${tableName1} (
+ id int,
+ str_arr ARRAY<STRING>,
+ int_arr ARRAY<INT>,
+ date_arr ARRAY<DATE>
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ // Insert test data before creating indexes
+ sql """ INSERT INTO ${tableName1} VALUES
+ (1, ['hello', 'world'], [1, 2, 3], ['2023-01-01', '2023-01-02']),
+ (2, ['doris', 'apache'], [4, 5, 6], ['2023-02-01', '2023-02-02']),
+ (3, NULL, NULL, NULL),
+ (4, [], [], []),
+ (5, ['test', 'array'], [7, 8, 9], ['2023-03-01', '2023-03-02']),
+ (6, ['index', 'support'], [10, 11, 12], ['2023-04-01', '2023-04-02']);
+ """
+
+ // Create indexes on supported array types - should succeed
+ sql """ ALTER TABLE ${tableName1} ADD INDEX idx_str_arr (str_arr) USING
INVERTED; """
+ wait_for_latest_op_on_table_finish(tableName1, timeout)
+
+ sql """ ALTER TABLE ${tableName1} ADD INDEX idx_int_arr (int_arr) USING
INVERTED; """
+ wait_for_latest_op_on_table_finish(tableName1, timeout)
+
+ sql """ ALTER TABLE ${tableName1} ADD INDEX idx_date_arr (date_arr) USING
INVERTED; """
+ wait_for_latest_op_on_table_finish(tableName1, timeout)
+
+ // Create table with unsupported array types
+ sql """
+ CREATE TABLE ${tableName2} (
+ id int,
+ nested_arr ARRAY<ARRAY<STRING>>,
+ map_arr ARRAY<MAP<STRING,INT>>,
+ float_arr ARRAY<FLOAT>,
+ struct_arr ARRAY<STRUCT<
+ name:STRING,
+ age:INT,
+ score:FLOAT
+ >>
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ // Insert some data into unsupported array type table
+ sql """ INSERT INTO ${tableName2} VALUES
+ (1, [['a', 'b'], ['c', 'd']], [{'key1': 1, 'key2': 2}], [1.1, 2.2],
array(named_struct('name', 'Alice', 'age', 20, 'score', 85.5))),
+ (2, [['e', 'f']], [{'key3': 3}], [3.3], array(named_struct('name',
'Bob', 'age', 25, 'score', 90.0)));
+ """
+
+ sql """ ALTER TABLE ${tableName2} ADD INDEX idx_nested_arr (nested_arr)
USING INVERTED; """
+ wait_for_latest_op_on_table_finish(tableName2, timeout)
+
+ // Test creating index on array of map - should fail
+ test {
+ sql """ ALTER TABLE ${tableName2} ADD INDEX idx_map_arr (map_arr)
USING INVERTED; """
+ exception "is not supported in"
+ }
+
+ // Test creating index on array of float - should fail
+ test {
+ sql """ ALTER TABLE ${tableName2} ADD INDEX idx_float_arr (float_arr)
USING INVERTED; """
+ exception "is not supported in"
+ }
+
+ // Test creating index on array of struct - should fail
+ test {
+ sql """ ALTER TABLE ${tableName2} ADD INDEX idx_struct_arr
(struct_arr) USING INVERTED; """
+ exception "is not supported in"
+ }
+
+ // Test array_contains function
+ qt_sql """
+ SELECT id, str_arr, int_arr, date_arr
+ FROM ${tableName1}
+ WHERE array_contains(str_arr, 'world')
+ OR array_contains(int_arr, 8)
+ OR array_contains(date_arr, '2023-03-01')
+ ORDER BY id;
+ """
+
+ // Test array_contains with multiple conditions
+ qt_sql """
+ SELECT id
+ FROM ${tableName1}
+ WHERE array_contains(str_arr, 'apache')
+ AND array_contains(int_arr, 5)
+ AND array_contains(date_arr, '2023-02-02')
+ ORDER BY id;
+ """
+
+ // Test array_contains with NULL and empty arrays
+ qt_sql """
+ SELECT id, str_arr
+ FROM ${tableName1}
+ WHERE array_contains(str_arr, 'test')
+ OR str_arr IS NULL
+ ORDER BY id;
+ """
+
+ sql "DROP TABLE IF EXISTS ${tableName1}"
+ sql "DROP TABLE IF EXISTS ${tableName2}"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]