This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new bb179b77f7 [Feature-WIP](inverted index) support array type for
inverted index reader (#16355)
bb179b77f7 is described below
commit bb179b77f75d2b0471eb7b3b75ad783d21596194
Author: YueW <[email protected]>
AuthorDate: Thu Feb 2 16:14:14 2023 +0800
[Feature-WIP](inverted index) support array type for inverted index reader
(#16355)
---
be/src/vec/exec/scan/vscan_node.cpp | 20 ++++++-
.../main/java/org/apache/doris/catalog/Type.java | 10 ++++
.../java/org/apache/doris/analysis/IndexDef.java | 4 ++
.../org/apache/doris/analysis/MatchPredicate.java | 69 +++++++++++----------
.../data/inverted_index_p0/test_array_index.out | 58 ++++++++++++++++++
.../inverted_index_p0/test_array_index.groovy | 70 ++++++++++++++++++++++
6 files changed, 197 insertions(+), 34 deletions(-)
diff --git a/be/src/vec/exec/scan/vscan_node.cpp
b/be/src/vec/exec/scan/vscan_node.cpp
index 198e7ab0c7..d0fc12f37a 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -49,6 +49,17 @@ static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) {
if (slot->type().is_string_type() && expr->type().is_string_type()) {
return true;
}
+ if (slot->type().is_array_type()) {
+ if (slot->type().children[0].type == expr->type().type) {
+ return true;
+ }
+ if (slot->type().children[0].is_date_type() &&
expr->type().is_date_type()) {
+ return true;
+ }
+ if (slot->type().children[0].is_string_type() &&
expr->type().is_string_type()) {
+ return true;
+ }
+ }
return false;
}
@@ -391,7 +402,14 @@ Status VScanNode::_normalize_conjuncts() {
std::vector<SlotDescriptor*> slots = _output_tuple_desc->slots();
for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) {
- switch (slots[slot_idx]->type().type) {
+ auto type = slots[slot_idx]->type().type;
+ if (slots[slot_idx]->type().type == TYPE_ARRAY) {
+ type = slots[slot_idx]->type().children[0].type;
+ if (type == TYPE_ARRAY) {
+ continue;
+ }
+ }
+ switch (type) {
#define M(NAME)
\
case TYPE_##NAME: {
\
ColumnValueRange<TYPE_##NAME> range(slots[slot_idx]->col_name(),
\
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
index e6c2e3a4cd..ef3ec7c834 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
@@ -109,6 +109,7 @@ public abstract class Type {
private static final Logger LOG = LogManager.getLogger(Type.class);
private static final ArrayList<ScalarType> integerTypes;
+ private static final ArrayList<ScalarType> stringTypes;
private static final ArrayList<ScalarType> numericTypes;
private static final ArrayList<ScalarType> numericDateTimeTypes;
private static final ArrayList<ScalarType> supportedTypes;
@@ -123,6 +124,11 @@ public abstract class Type {
integerTypes.add(BIGINT);
integerTypes.add(LARGEINT);
+ stringTypes = Lists.newArrayList();
+ stringTypes.add(CHAR);
+ stringTypes.add(VARCHAR);
+ stringTypes.add(STRING);
+
numericTypes = Lists.newArrayList();
numericTypes.addAll(integerTypes);
numericTypes.add(FLOAT);
@@ -207,6 +213,10 @@ public abstract class Type {
return integerTypes;
}
+ public static ArrayList<ScalarType> getStringTypes() {
+ return stringTypes;
+ }
+
public static ArrayList<ScalarType> getNumericTypes() {
return numericTypes;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
index ed03dbd84e..d1c21b5d37 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@@ -17,6 +17,7 @@
package org.apache.doris.analysis;
+import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.PrimitiveType;
@@ -176,6 +177,9 @@ public class IndexDef {
|| indexType == IndexType.NGRAM_BF) {
String indexColName = column.getName();
PrimitiveType colType = column.getDataType();
+ if (indexType == IndexType.INVERTED && colType.isArrayType()) {
+ colType = ((ArrayType)
column.getType()).getItemType().getPrimitiveType();
+ }
if (!(colType.isDateType() || colType.isDecimalV2Type() ||
colType.isDecimalV3Type()
|| colType.isFixedPointType() || colType.isStringType() ||
colType == PrimitiveType.BOOLEAN)) {
throw new AnalysisException(colType + " is not supported in "
+ indexType.toString() + " index. "
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
index ad6a6968a7..bec9ed403c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
@@ -107,38 +107,41 @@ public class MatchPredicate extends Predicate {
Lists.<Type>newArrayList(new ArrayType(t), t),
Type.BOOLEAN));
}
-
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
- Operator.MATCH_ANY.getName(),
- symbolNotUsed,
- Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
- Type.BOOLEAN));
-
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
- Operator.MATCH_ANY.getName(),
- symbolNotUsed,
- Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR),
Type.VARCHAR),
- Type.BOOLEAN));
-
-
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
- Operator.MATCH_ALL.getName(),
- symbolNotUsed,
- Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
- Type.BOOLEAN));
-
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
- Operator.MATCH_ALL.getName(),
- symbolNotUsed,
- Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR),
Type.VARCHAR),
- Type.BOOLEAN));
-
-
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
- Operator.MATCH_PHRASE.getName(),
- symbolNotUsed,
- Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
- Type.BOOLEAN));
-
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
- Operator.MATCH_PHRASE.getName(),
- symbolNotUsed,
- Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR),
Type.VARCHAR),
- Type.BOOLEAN));
+
+ for (Type t : Type.getStringTypes()) {
+
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+ Operator.MATCH_ANY.getName(),
+ symbolNotUsed,
+ Lists.<Type>newArrayList(t, t),
+ Type.BOOLEAN));
+
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+ Operator.MATCH_ANY.getName(),
+ symbolNotUsed,
+ Lists.<Type>newArrayList(new ArrayType(t), t),
+ Type.BOOLEAN));
+
+
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+ Operator.MATCH_ALL.getName(),
+ symbolNotUsed,
+ Lists.<Type>newArrayList(t, t),
+ Type.BOOLEAN));
+
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+ Operator.MATCH_ALL.getName(),
+ symbolNotUsed,
+ Lists.<Type>newArrayList(new ArrayType(t), t),
+ Type.BOOLEAN));
+
+
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+ Operator.MATCH_PHRASE.getName(),
+ symbolNotUsed,
+ Lists.<Type>newArrayList(t, t),
+ Type.BOOLEAN));
+
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+ Operator.MATCH_PHRASE.getName(),
+ symbolNotUsed,
+ Lists.<Type>newArrayList(new ArrayType(t), t),
+ Type.BOOLEAN));
+ }
}
private final Operator op;
@@ -219,7 +222,7 @@ public class MatchPredicate extends Predicate {
collectChildReturnTypes(),
Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
if (fn == null) {
throw new AnalysisException(
- "no function found for " + op.toString() + " " + toSql());
+ "no function found for " + op.toString() + "," + toSql());
}
Expr e1 = getChild(0);
Expr e2 = getChild(1);
diff --git a/regression-test/data/inverted_index_p0/test_array_index.out
b/regression-test/data/inverted_index_p0/test_array_index.out
new file mode 100644
index 0000000000..8a858f5611
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_array_index.out
@@ -0,0 +1,58 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+1 [10, 20, 30] ['i', 'love', 'china']
+
+-- !sql --
+1 [10, 20, 30] ['i', 'love', 'china']
+2 [20, 30, 40] ['i', 'love', 'north korea']
+
+-- !sql --
+2 [20, 30, 40] ['i', 'love', 'north korea']
+
+-- !sql --
+2 [20, 30, 40] ['i', 'love', 'north korea']
+
+-- !sql --
+2 [20, 30, 40] ['i', 'love', 'north korea']
+3 [30, 40, 50] \N
+4 [40, 50, 60] \N
+
+-- !sql --
+1 [10, 20, 30] ['i', 'love', 'china']
+2 [20, 30, 40] ['i', 'love', 'north korea']
+3 [30, 40, 50] \N
+4 [40, 50, 60] \N
+
+-- !sql --
+3 [30, 40, 50] \N
+4 [40, 50, 60] \N
+
+-- !sql --
+1 [10, 20, 30] ['i', 'love', 'china']
+2 [20, 30, 40] ['i', 'love', 'north korea']
+3 [30, 40, 50] \N
+
+-- !sql --
+1 [10, 20, 30] ['i', 'love', 'china']
+
+-- !sql --
+1 [10, 20, 30] ['i', 'love', 'china']
+2 [20, 30, 40] ['i', 'love', 'north korea']
+
+-- !sql --
+1 [10, 20, 30] ['i', 'love', 'china']
+2 [20, 30, 40] ['i', 'love', 'north korea']
+3 [30, 40, 50] \N
+
+-- !sql --
+2 [20, 30, 40] ['i', 'love', 'north korea']
+3 [30, 40, 50] \N
+4 [40, 50, 60] \N
+
+-- !sql --
+3 [30, 40, 50] \N
+4 [40, 50, 60] \N
+
+-- !sql --
+4 [40, 50, 60] \N
+
diff --git a/regression-test/suites/inverted_index_p0/test_array_index.groovy
b/regression-test/suites/inverted_index_p0/test_array_index.groovy
new file mode 100644
index 0000000000..d240dbdeb7
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_array_index.groovy
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_array_index"){
+ // prepare test table
+
+
+ def timeout = 60000
+ def delta_time = 1000
+ def alter_res = "null"
+ def useTime = 0
+
+ def indexTblName = "array_test"
+
+ sql "DROP TABLE IF EXISTS ${indexTblName}"
+ // create 1 replica table
+ sql """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id`int(11)NULL,
+ `int_array` array<int(20)> NULL,
+ `c_array` array<varchar(20)> NULL,
+ INDEX c_array_idx(`c_array`) USING INVERTED
PROPERTIES("parser"="english") COMMENT 'c_array index',
+ INDEX int_array_idx(`int_array`) USING INVERTED COMMENT
'int_array index'
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1",
+ "persistent"="false"
+ );
+ """
+
+ // set enable_vectorized_engine=true
+ sql """ SET enable_vectorized_engine=true; """
+ def var_result = sql "show variables"
+ logger.info("show variales result: " + var_result )
+
+ sql "INSERT INTO $indexTblName VALUES (1, [10,20,30],
['i','love','china']), (2, [20,30,40], ['i','love','north korea']), (3,
[30,40,50], NULL);"
+ sql "INSERT INTO $indexTblName VALUES (4, [40,50,60], NULL);"
+ qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china';"
+ qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love';"
+ qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north';"
+ qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea';"
+ qt_sql "SELECT * FROM $indexTblName WHERE int_array element_ge 40;"
+ qt_sql "SELECT * FROM $indexTblName WHERE int_array element_le 40;"
+ qt_sql "SELECT * FROM $indexTblName WHERE int_array element_gt 40;"
+ qt_sql "SELECT * FROM $indexTblName WHERE int_array element_lt 40;"
+ qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 10;"
+ qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 20;"
+ qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 30;"
+ qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40;"
+ qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50;"
+ qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60;"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]